From 6e2e7d81b2ea9ea35eae2c3fccdc04d6f5914a70 Mon Sep 17 00:00:00 2001
From: Thulinma <jaron@vietors.com>
Date: Thu, 1 Nov 2018 17:04:05 +0100
Subject: [PATCH] TS optimize, specifically MPEG2 speedup

---
 lib/nal.cpp       | 106 +++++++++++++++++++++-------------------------
 lib/ts_stream.cpp |   9 ++--
 2 files changed, 54 insertions(+), 61 deletions(-)
diff --git a/lib/nal.cpp b/lib/nal.cpp
index d8864b84..b1debdff 100644
--- a/lib/nal.cpp
+++ b/lib/nal.cpp
@@ -3,17 +3,17 @@
 #endif
 #include <cstdlib>
 #include <cstring>
-#include <math.h>//for log
+#include <math.h> //for log
 
-#include "nal.h"
-#include "bitstream.h"
 #include "bitfields.h"
+#include "bitstream.h"
 #include "defines.h"
+#include "nal.h"
 
-namespace nalu {
-  std::deque<int> parseNalSizes(DTSC::Packet & pack){
+namespace nalu{
+  std::deque<int> parseNalSizes(DTSC::Packet &pack){
     std::deque<int> result;
-    char * data;
+    char *data;
     unsigned int dataLen;
     pack.getString("data", data, dataLen);
     int offset = 0;
@@ -25,7 +25,7 @@ namespace nalu {
     return result;
   }
 
-  std::string removeEmulationPrevention(const std::string & data) {
+  std::string removeEmulationPrevention(const std::string &data){
     std::string result;
     result.resize(data.size());
     result[0] = data[0];
@@ -33,76 +33,71 @@ namespace nalu {
     unsigned int dataPtr = 2;
     unsigned int dataLen = data.size();
     unsigned int resPtr = 2;
-    while (dataPtr + 2 < dataLen) {
-      if (!data[dataPtr] && !data[dataPtr + 1] && data[dataPtr + 2] == 3){ //We have found an emulation prevention
+    while (dataPtr + 2 < dataLen){
+      if (!data[dataPtr] && !data[dataPtr + 1] &&
+          data[dataPtr + 2] == 3){// We have found an emulation prevention
         result[resPtr++] = data[dataPtr++];
         result[resPtr++] = data[dataPtr++];
-        dataPtr++; //Skip the emulation prevention byte
-      } else {
+        dataPtr++; // Skip the emulation prevention byte
+      }else{
         result[resPtr++] = data[dataPtr++];
       }
     }
 
-    while (dataPtr < dataLen){
-      result[resPtr++] = data[dataPtr++];
-    }
+    while (dataPtr < dataLen){result[resPtr++] = data[dataPtr++];}
     return result.substr(0, resPtr);
   }
 
-  unsigned long toAnnexB(const char * data, unsigned long dataSize, char *& result){
-    //toAnnexB keeps the same size.
-    if (!result){
-      result = (char *)malloc(dataSize);
-    }
+  unsigned long toAnnexB(const char *data, unsigned long dataSize, char *&result){
+    // toAnnexB keeps the same size.
+    if (!result){result = (char *)malloc(dataSize);}
     int offset = 0;
     while (offset < dataSize){
-      //Read unit size
+      // Read unit size
       unsigned long unitSize = Bit::btohl(data + offset);
-      //Write annex b header
+      // Write annex b header
       memset(result + offset, 0x00, 3);
       result[offset + 3] = 0x01;
-      //Copy the nal unit
+      // Copy the nal unit
       memcpy(result + offset + 4, data + offset + 4, unitSize);
-      //Update the offset
+      // Update the offset
       offset += 4 + unitSize;
     }
     return dataSize;
   }
 
-  ///Scans data for the last non-zero byte, returning a pointer to it.
-  const char* nalEndPosition(const char * data, uint32_t dataSize){
-    while(dataSize > 0 && memcmp(data+dataSize-1, "\000",1) == 0 ){
-      dataSize--;
-    }
-    return data+dataSize;
+  /// Scans data for the last non-zero byte, returning a pointer to it.
+  const char *nalEndPosition(const char *data, uint32_t dataSize){
+    while (dataSize && !data[dataSize - 1]){--dataSize;}
+    return data + dataSize;
   }
 
-  ///Scan data for Annex B start code. Returns pointer to it when found, null otherwise.
-  const char * scanAnnexB(const char * data, uint32_t dataSize){
-    char * offset = (char*)data;
-    const char * maxData = data + dataSize - 2;
-    while(offset < maxData){
+  /// Scan data for Annex B start code. Returns pointer to it when found, null otherwise.
+  const char *scanAnnexB(const char *data, uint32_t dataSize){
+    char *offset = (char *)data;
+    const char *maxData = data + dataSize - 2;
+    while (offset < maxData){
       if (offset[2] > 1){
-        //We have no zero in the third byte, so we need to skip at least 3 bytes forward
+        // We have no zero in the third byte, so we need to skip at least 3 bytes forward
         offset += 3;
         continue;
       }
       if (!offset[2]){
-        //We skip forward 1 or 2 bytes depending on contents of the second byte
-        offset += (offset[1]?2:1);
+        // We COULD skip forward 1 or 2 bytes depending on contents of the second byte
+        // offset += (offset[1]?2:1);
+        //... but skipping a single byte (removing the 'if') is actually faster (benchmarked).
+        ++offset;
         continue;
       }
-      if (!offset[0] && !offset[1]){
-        return offset;
-      }
-      //We have no zero in the third byte, so we need to skip at least 3 bytes forward
+      if (!offset[0] && !offset[1]){return offset;}
+      // We have no zero in the third byte, so we need to skip at least 3 bytes forward
       offset += 3;
     }
     return 0;
   }
 
-  unsigned long fromAnnexB(const char * data, unsigned long dataSize, char *& result){
-    const char * lastCheck = data + dataSize - 3;
+  unsigned long fromAnnexB(const char *data, unsigned long dataSize, char *&result){
+    const char *lastCheck = data + dataSize - 3;
     if (!result){
       FAIL_MSG("No output buffer given to FromAnnexB");
       return 0;
@@ -110,26 +105,20 @@ namespace nalu {
     int offset = 0;
     int newOffset = 0;
     while (offset < dataSize){
-      const char * begin = data + offset;
-      while ( begin < lastCheck && !(!begin[0] && !begin[1] && begin[2] == 0x01)){
+      const char *begin = data + offset;
+      while (begin < lastCheck && !(!begin[0] && !begin[1] && begin[2] == 0x01)){
         begin++;
-        if (begin < lastCheck && begin[0]){
-          begin++;
-        }
+        if (begin < lastCheck && begin[0]){begin++;}
       }
-      begin += 3;//Initialize begin after the first 0x000001 pattern.
+      begin += 3; // Initialize begin after the first 0x000001 pattern.
       if (begin > data + dataSize){
         offset = dataSize;
         continue;
       }
-      const char * end = (const char*)memmem(begin, dataSize - (begin - data), "\000\000\001", 3);
-      if (!end) {
-        end = data + dataSize;
-      }
-      //Check for 4-byte lead in's. Yes, we access -1 here
-      if (end > begin && (end - data) != dataSize && end[-1] == 0x00){
-        end--;
-      }
+      const char *end = (const char *)memmem(begin, dataSize - (begin - data), "\000\000\001", 3);
+      if (!end){end = data + dataSize;}
+      // Check for 4-byte lead in's. Yes, we access -1 here
+      if (end > begin && (end - data) != dataSize && end[-1] == 0x00){end--;}
       unsigned int nalSize = end - begin;
       Bit::htobl(result + newOffset, nalSize);
       memcpy(result + newOffset + 4, begin, nalSize);
@@ -139,4 +128,5 @@ namespace nalu {
     }
     return newOffset;
   }
-}
+}// namespace nalu
+
diff --git a/lib/ts_stream.cpp b/lib/ts_stream.cpp
index 0647fd86..cc8297fb 100644
--- a/lib/ts_stream.cpp
+++ b/lib/ts_stream.cpp
@@ -132,8 +132,8 @@ namespace TS{
     int tid = newPack.getPID();
     bool unitStart = newPack.getUnitStart();
     std::deque<Packet> & PS = pesStreams[tid];
-    if ((pidToCodec.count(tid) || tid == 0 || newPack.isPMT()) &&
-        (unitStart || PS.size())){
+    if ((unitStart || PS.size()) &&
+        (tid == 0 || newPack.isPMT() || pidToCodec.count(tid))){
       PS.push_back(newPack);
       if (unitStart){
         pesPositions[tid].push_back(bytePos);
@@ -667,13 +667,16 @@ namespace TS{
         return;
       }
 
-      while (nextPtr < pesEnd){
+      uint32_t nalno = 0;
+      //We only check the first 8 packets, because keys should always be near the front of a PES.
+      while (nextPtr < pesEnd && nalno < 8){
         if (!nextPtr){nextPtr = pesEnd;}
         //Calculate size of NAL unit, removing null bytes from the end
         nalSize = nalu::nalEndPosition(pesPayload, nextPtr - pesPayload) - pesPayload;
 
         // Check if this is a keyframe
         parseNal(tid, pesPayload, nextPtr, isKeyFrame);
+        ++nalno;
 
         if (((nextPtr - pesPayload) + 3) >= realPayloadSize){break;}//end of the loop
         realPayloadSize -= ((nextPtr - pesPayload) + 3); // decrease the total size