From 0a51e95d1a2ecbb74b4902be4a4f4e595e192272 Mon Sep 17 00:00:00 2001
From: Thulinma <jaron@vietors.com>
Date: Sat, 18 Jul 2020 19:31:04 +0200
Subject: [PATCH] Improved EBML B-frame detection/handling

---
 src/input/input_ebml.cpp |  29 ++++++--
 src/input/input_ebml.h   | 149 +++++++++++++++++----------------------
 2 files changed, 86 insertions(+), 92 deletions(-)

diff --git a/src/input/input_ebml.cpp b/src/input/input_ebml.cpp
index c480f7d8..4367bbdb 100644
--- a/src/input/input_ebml.cpp
+++ b/src/input/input_ebml.cpp
@@ -170,6 +170,10 @@ namespace Mist{
     if (M.inputLocalVars.isMember("timescale")){
       timeScale = ((double)M.inputLocalVars["timescale"].asInt()) / 1000000.0;
     }
+    if (!M.inputLocalVars.isMember("version") || M.inputLocalVars["version"].asInt() < 2){
+      INFO_MSG("Header needs update, regenerating");
+      return false;
+    }
     return true;
   }
 
@@ -177,9 +181,15 @@ namespace Mist{
     if (!inFile){return false;}
     // Create header file from file
     uint64_t bench = Util::getMicros();
-    if (!meta){meta.reInit(streamName);}
+    if (!meta || (needsLock() && isSingular())){
+      meta.reInit(streamName);
+    }
 
     while (readElement()){
+      if (!config->is_active){
+        WARN_MSG("Aborting header generation due to shutdown: %s", Util::exitReason);
+        return false;
+      }
       EBML::Element E(ptr, readingMinimal);
       if (E.getID() == EBML::EID_TRACKENTRY){
         EBML::Element tmpElem = E.findChild(EBML::EID_TRACKNUMBER);
@@ -364,7 +374,7 @@ namespace Mist{
         if (isVideo && B.isKeyframe()){
           while (TP.hasPackets(true)){
             packetData &C = TP.getPacketData(true);
-            meta.update(C.time, C.offset, C.track, C.dsize, C.bpos, C.key);
+            meta.update(C.time, C.offset, idx, C.dsize, C.bpos, C.key);
             TP.remove();
           }
           TP.flush();
@@ -394,12 +404,12 @@ namespace Mist{
             frameSize = assStr.size();
           }
           if (frameSize){
-            TP.add(newTime * timeScale, 0, tNum, frameSize, lastClusterBPos, B.isKeyframe() && !isAudio, isVideo);
+            TP.add(newTime * timeScale, tNum, frameSize, lastClusterBPos, B.isKeyframe() && !isAudio, isVideo);
           }
         }
         while (TP.hasPackets()){
           packetData &C = TP.getPacketData(isVideo);
-          meta.update(C.time, C.offset, M.trackIDToIndex(C.track, getpid()), C.dsize, C.bpos, C.key);
+          meta.update(C.time, C.offset, idx, C.dsize, C.bpos, C.key);
           TP.remove();
         }
       }
@@ -417,6 +427,7 @@ namespace Mist{
       }
     }
 
+    meta.inputLocalVars["version"] = 2;
     bench = Util::getMicros(bench);
     INFO_MSG("Header generated in %" PRIu64 " ms", bench / 1000);
     clearPredictors();
@@ -473,6 +484,8 @@ namespace Mist{
   }
 
   void InputEBML::getNext(size_t idx){
+    bool singleTrack = (idx != INVALID_TRACK_ID);
+    size_t wantedID = singleTrack?M.getID(idx):0;
     // Make sure we empty our buffer first
     if (bufferedPacks && packBuf.size()){
       for (std::map<uint64_t, trackPredictor>::iterator it = packBuf.begin(); it != packBuf.end(); ++it){
@@ -483,6 +496,7 @@ namespace Mist{
           fillPacket(C);
           TP.remove();
           --bufferedPacks;
+          if (singleTrack && it->first != wantedID){getNext(idx);}
           return;
         }
       }
@@ -500,6 +514,7 @@ namespace Mist{
                 fillPacket(C);
                 TP.remove();
                 --bufferedPacks;
+                if (singleTrack && it->first != wantedID){getNext(idx);}
                 return;
               }
             }
@@ -510,7 +525,7 @@ namespace Mist{
         }
         B = EBML::Block(ptr);
       }while (!B || B.getType() != EBML::ELEM_BLOCK ||
-               (idx != INVALID_TRACK_ID && M.getID(idx) != B.getTrackNum()));
+               (singleTrack && wantedID != B.getTrackNum()));
     }else{
       B = EBML::Block(ptr);
     }
@@ -531,6 +546,7 @@ namespace Mist{
         fillPacket(C);
         TP.remove();
         --bufferedPacks;
+        if (singleTrack && trackIdx != idx){getNext(idx);}
         return;
       }
     }
@@ -563,7 +579,7 @@ namespace Mist{
           memcpy(ptr, assStr.data(), frameSize);
         }
         if (frameSize){
-          TP.add(newTime * timeScale, 0, tNum, frameSize, lastClusterBPos,
+          TP.add(newTime * timeScale, tNum, frameSize, lastClusterBPos,
                  B.isKeyframe() && !isAudio, isVideo, (void *)ptr);
           ++bufferedPacks;
         }
@@ -574,6 +590,7 @@ namespace Mist{
       fillPacket(C);
       TP.remove();
       --bufferedPacks;
+      if (singleTrack && trackIdx != idx){getNext(idx);}
     }else{
       // We didn't set thisPacket yet. Read another.
       // Recursing is fine, this can only happen a few times in a row.
diff --git a/src/input/input_ebml.h b/src/input/input_ebml.h
index f3fc28c0..a740b821 100644
--- a/src/input/input_ebml.h
+++ b/src/input/input_ebml.h
@@ -4,7 +4,7 @@
 
 namespace Mist{
 
-#define PKT_COUNT 64
+#define PKT_COUNT 24
 
   class packetData{
   public:
@@ -29,119 +29,96 @@ namespace Mist{
   };
   class trackPredictor{
   public:
-    packetData pkts[PKT_COUNT];
-    uint64_t frameOffset;   /// The static average offset between transmit time and display time
-    bool frameOffsetKnown;  /// Whether the average frame offset is known
-    uint16_t smallestFrame; /// low-ball estimate of time per frame
-    uint64_t lastTime;      /// last send transmit timestamp
-    uint64_t ctr;           /// ingested frame count
-    uint64_t rem;           /// removed frame count
-    uint64_t maxOffset;     /// maximum offset for this track
-    uint64_t lowestTime;    /// First timestamp to enter the buffer
+    packetData pkts[PKT_COUNT]; /// Buffer for packet data
+    uint64_t times[PKT_COUNT];  /// Sorted timestamps of buffered packets
+    size_t maxDelay;            /// Maximum amount of bframes we expect
+    uint32_t timeOffset;        /// Milliseconds we need to subtract from times so that offsets are always > 0
+    uint64_t ctr;               /// ingested frame count
+    uint64_t rem;               /// removed frame count
+    bool initialized;
     trackPredictor(){
-      smallestFrame = 0xFFFF;
-      frameOffsetKnown = false;
-      frameOffset = 0;
-      maxOffset = 0;
+      initialized = false;
+      maxDelay = 0;
+      timeOffset = 0;
       flush();
     }
     bool hasPackets(bool finished = false){
-      if (finished || frameOffsetKnown){
+      if (finished){
         return (ctr - rem > 0);
       }else{
-        return (ctr - rem > 12);
+        return ((initialized || ctr > 16) && ctr - rem > maxDelay);
       }
     }
     /// Clears all internal values, for reuse as-new.
     void flush(){
-      lastTime = 0;
       ctr = 0;
       rem = 0;
-      lowestTime = 0;
     }
     packetData &getPacketData(bool mustCalcOffsets){
       // grab the next packet to output
       packetData &p = pkts[rem % PKT_COUNT];
-      if (!mustCalcOffsets){
-        frameOffsetKnown = true;
+      if (!mustCalcOffsets || !maxDelay){
         return p;
       }
-      if (rem && !p.key){
-        uint64_t dispTime = p.time;
-        if (p.time + frameOffset < lastTime + smallestFrame){
-          uint32_t shift =
-              (uint32_t)((((lastTime + smallestFrame) - (p.time + frameOffset)) + (smallestFrame - 1)) / smallestFrame) *
-              smallestFrame;
-          if (shift < smallestFrame){shift = smallestFrame;}
-          VERYHIGH_MSG("Offset negative, shifting original time forward by %" PRIu32, shift);
-          p.time += shift;
-        }
-        p.offset = p.time - (lastTime + smallestFrame) + frameOffset;
-        if (p.offset > maxOffset){
-          uint64_t diff = p.offset - maxOffset;
-          VERYHIGH_MSG("Shifting forward %" PRIu64 "ms (maxOffset reached: %" PRIu64 " > %" PRIu64 ")",
-                       diff, p.offset, maxOffset);
-          p.offset -= diff;
-          lastTime += diff;
-        }
-        p.time = (lastTime + smallestFrame);
-        // If we calculate an offset less than a frame away,
-        // we assume it's just time stamp drift due to lack of precision.
-        p.offset = ((uint32_t)((p.offset + (smallestFrame / 2)) / smallestFrame)) * smallestFrame;
-        // Shift the time forward if needed, but never backward
-        if (p.offset + p.time < dispTime){
-          VERYHIGH_MSG("Shifting forward %" PRIu64 "ms (time drift)", dispTime - (p.offset + p.time));
-          p.time += dispTime - (p.offset + p.time);
-        }
-      }else{
-        if (!frameOffsetKnown){
-          // Check the first few timestamps against each other, find the smallest distance.
-          for (uint64_t i = 1; i < ctr; ++i){
-            uint64_t t1 = pkts[i % PKT_COUNT].time;
-            for (uint64_t j = 0; j < ctr; ++j){
-              if (i == j){continue;}
-              uint64_t t2 = pkts[j % PKT_COUNT].time;
-              uint64_t tDiff = (t1 < t2) ? (t2 - t1) : (t1 - t2);
-              if (tDiff < smallestFrame){smallestFrame = tDiff;}
+      //Calculate the timeOffset when extracting the first frame
+      if (!initialized){
+        size_t buffLen = (ctr-rem-1) % PKT_COUNT;
+        for (size_t i = 0; i <= buffLen; ++i){
+          if (pkts[i].time < times[i]){
+            if (times[i] - pkts[i].time > timeOffset){
+              timeOffset = times[i] - pkts[i].time;
             }
           }
-          // Cool, now we're pretty sure we know the frame rate. Let's calculate some offsets.
-          for (uint64_t i = 1; i < ctr; ++i){
-            uint64_t timeDiff = pkts[i % PKT_COUNT].time - lowestTime;
-            uint64_t timeExpt = smallestFrame * i;
-            if (timeDiff > timeExpt && maxOffset < timeDiff - timeExpt){
-              maxOffset = timeDiff - timeExpt;
-            }
-            if (timeDiff < timeExpt && frameOffset < timeExpt - timeDiff){
-              frameOffset = timeExpt - timeDiff;
-            }
-          }
-          maxOffset += frameOffset;
-          // Print for debugging purposes, and consider them gospel from here on forward. Yay!
-          HIGH_MSG("smallestFrame=%" PRIu16 ", frameOffset=%" PRIu64 ", maxOffset=%" PRIu64,
-                   smallestFrame, frameOffset, maxOffset);
-          frameOffsetKnown = true;
+          DONTEVEN_MSG("Checking time offset against entry %zu/%zu: %" PRIu64 "-%" PRIu64 " = %" PRIu32, i, buffLen, times[i], pkts[i].time, timeOffset);
         }
-        p.offset = ((uint32_t)((frameOffset + (smallestFrame / 2)) / smallestFrame)) * smallestFrame;
+        MEDIUM_MSG("timeOffset calculated to be %" PRIu32 ", max frame delay %zu", timeOffset, maxDelay);
+        initialized = true;
       }
-      lastTime = p.time;
-      INSANE_MSG("Outputting%s %" PRIu64 "+%" PRIu64 " (#%" PRIu64 ", Max=%" PRIu64
-                 "), display at %" PRIu64,
-                 (p.key ? "KEY" : ""), p.time, p.offset, rem, maxOffset, p.time + p.offset);
+
+      uint64_t origTime = p.time;
+      //Set new timestamp to first time in sorted array
+      p.time = times[0];
+      //Subtract timeOffset if possible
+      if (p.time >= timeOffset){p.time -= timeOffset;}
+      //If possible, calculate offset based on original timestamp difference with new timestamp
+      if (origTime > p.time){p.offset = origTime-p.time;}
+      //Less than 3 milliseconds off? Assume we needed 0 and it's a rounding error in timestamps.
+      if (p.offset < 3){p.offset = 0;}
+      DONTEVEN_MSG("Outputting%s %" PRIu64 "+%" PRIu64 " (#%" PRIu64 "), display at %" PRIu64,
+                 (p.key ? " KEY" : ""), p.time, p.offset, rem, p.time + p.offset);
       return p;
     }
 
-    void add(uint64_t packTime, uint64_t packOffset, uint64_t packTrack, uint64_t packDataSize,
+    void add(uint64_t packTime, uint64_t packTrack, uint64_t packDataSize,
              uint64_t packBytePos, bool isKeyframe, bool isVideo, void *dataPtr = 0){
-      if (!ctr){lowestTime = packTime;}
-      if (packTime > lowestTime && packTime - lowestTime < smallestFrame){
-        smallestFrame = packTime - lowestTime;
-      }
-      pkts[ctr % PKT_COUNT].set(packTime, packOffset, packTrack, packDataSize, packBytePos, isKeyframe, dataPtr);
+      pkts[ctr % PKT_COUNT].set(packTime, 0, packTrack, packDataSize, packBytePos, isKeyframe, dataPtr);
       ++ctr;
-      if (ctr == PKT_COUNT - 1){frameOffsetKnown = true;}
+      if (!isVideo){return;}
+      size_t buffLen = ctr-rem-1;
+      //Just in case somebody messed up, ensure we don't go out of our PKT_COUNT sized array
+      if (buffLen >= PKT_COUNT){buffLen = PKT_COUNT - 1;}
+      times[buffLen] = packTime;
+      if (buffLen){
+        //Swap the times while the previous is higher than the current
+        size_t i = buffLen;
+        while (i && times[i] < times[i-1]){
+          uint64_t tmp = times[i-1];
+          times[i-1] = times[i];
+          times[i] = tmp;
+          --i;
+          //Keep track of maximum delay
+          if (!initialized && buffLen - i + 1 > maxDelay){
+            maxDelay = buffLen - i + 1;
+          }
+        }
+      }
+    }
+    void remove(){
+      ++rem;
+      size_t buffLen = ctr-rem;
+      if (buffLen >= PKT_COUNT){buffLen = PKT_COUNT-1;}
+      for (size_t i = 0; i < buffLen; ++i){times[i] = times[i+1];}
     }
-    void remove(){++rem;}
   };
 
   class InputEBML : public Input{