From 6e2e7d81b2ea9ea35eae2c3fccdc04d6f5914a70 Mon Sep 17 00:00:00 2001 From: Thulinma Date: Thu, 1 Nov 2018 17:04:05 +0100 Subject: [PATCH] TS optimize, specifically MPEG2 speedup --- lib/nal.cpp | 106 +++++++++++++++++++++------------------------- lib/ts_stream.cpp | 9 ++-- 2 files changed, 54 insertions(+), 61 deletions(-) diff --git a/lib/nal.cpp b/lib/nal.cpp index d8864b84..b1debdff 100644 --- a/lib/nal.cpp +++ b/lib/nal.cpp @@ -3,17 +3,17 @@ #endif #include #include -#include //for log +#include //for log -#include "nal.h" -#include "bitstream.h" #include "bitfields.h" +#include "bitstream.h" #include "defines.h" +#include "nal.h" -namespace nalu { - std::deque parseNalSizes(DTSC::Packet & pack){ +namespace nalu{ + std::deque parseNalSizes(DTSC::Packet &pack){ std::deque result; - char * data; + char *data; unsigned int dataLen; pack.getString("data", data, dataLen); int offset = 0; @@ -25,7 +25,7 @@ namespace nalu { return result; } - std::string removeEmulationPrevention(const std::string & data) { + std::string removeEmulationPrevention(const std::string &data){ std::string result; result.resize(data.size()); result[0] = data[0]; @@ -33,76 +33,71 @@ namespace nalu { unsigned int dataPtr = 2; unsigned int dataLen = data.size(); unsigned int resPtr = 2; - while (dataPtr + 2 < dataLen) { - if (!data[dataPtr] && !data[dataPtr + 1] && data[dataPtr + 2] == 3){ //We have found an emulation prevention + while (dataPtr + 2 < dataLen){ + if (!data[dataPtr] && !data[dataPtr + 1] && + data[dataPtr + 2] == 3){// We have found an emulation prevention result[resPtr++] = data[dataPtr++]; result[resPtr++] = data[dataPtr++]; - dataPtr++; //Skip the emulation prevention byte - } else { + dataPtr++; // Skip the emulation prevention byte + }else{ result[resPtr++] = data[dataPtr++]; } } - while (dataPtr < dataLen){ - result[resPtr++] = data[dataPtr++]; - } + while (dataPtr < dataLen){result[resPtr++] = data[dataPtr++];} return result.substr(0, resPtr); } - unsigned long toAnnexB(const char * data, unsigned long dataSize, char *& result){ - //toAnnexB keeps the same size. - if (!result){ - result = (char *)malloc(dataSize); - } + unsigned long toAnnexB(const char *data, unsigned long dataSize, char *&result){ + // toAnnexB keeps the same size. + if (!result){result = (char *)malloc(dataSize);} int offset = 0; while (offset < dataSize){ - //Read unit size + // Read unit size unsigned long unitSize = Bit::btohl(data + offset); - //Write annex b header + // Write annex b header memset(result + offset, 0x00, 3); result[offset + 3] = 0x01; - //Copy the nal unit + // Copy the nal unit memcpy(result + offset + 4, data + offset + 4, unitSize); - //Update the offset + // Update the offset offset += 4 + unitSize; } return dataSize; } - ///Scans data for the last non-zero byte, returning a pointer to it. - const char* nalEndPosition(const char * data, uint32_t dataSize){ - while(dataSize > 0 && memcmp(data+dataSize-1, "\000",1) == 0 ){ - dataSize--; - } - return data+dataSize; + /// Scans data for the last non-zero byte, returning a pointer to it. + const char *nalEndPosition(const char *data, uint32_t dataSize){ + while (dataSize && !data[dataSize - 1]){--dataSize;} + return data + dataSize; } - ///Scan data for Annex B start code. Returns pointer to it when found, null otherwise. - const char * scanAnnexB(const char * data, uint32_t dataSize){ - char * offset = (char*)data; - const char * maxData = data + dataSize - 2; - while(offset < maxData){ + /// Scan data for Annex B start code. Returns pointer to it when found, null otherwise. + const char *scanAnnexB(const char *data, uint32_t dataSize){ + char *offset = (char *)data; + const char *maxData = data + dataSize - 2; + while (offset < maxData){ if (offset[2] > 1){ - //We have no zero in the third byte, so we need to skip at least 3 bytes forward + // We have no zero in the third byte, so we need to skip at least 3 bytes forward offset += 3; continue; } if (!offset[2]){ - //We skip forward 1 or 2 bytes depending on contents of the second byte - offset += (offset[1]?2:1); + // We COULD skip forward 1 or 2 bytes depending on contents of the second byte + // offset += (offset[1]?2:1); + //... but skipping a single byte (removing the 'if') is actually faster (benchmarked). + ++offset; continue; } - if (!offset[0] && !offset[1]){ - return offset; - } - //We have no zero in the third byte, so we need to skip at least 3 bytes forward + if (!offset[0] && !offset[1]){return offset;} + // We have no zero in the third byte, so we need to skip at least 3 bytes forward offset += 3; } return 0; } - unsigned long fromAnnexB(const char * data, unsigned long dataSize, char *& result){ - const char * lastCheck = data + dataSize - 3; + unsigned long fromAnnexB(const char *data, unsigned long dataSize, char *&result){ + const char *lastCheck = data + dataSize - 3; if (!result){ FAIL_MSG("No output buffer given to FromAnnexB"); return 0; @@ -110,26 +105,20 @@ namespace nalu { int offset = 0; int newOffset = 0; while (offset < dataSize){ - const char * begin = data + offset; - while ( begin < lastCheck && !(!begin[0] && !begin[1] && begin[2] == 0x01)){ + const char *begin = data + offset; + while (begin < lastCheck && !(!begin[0] && !begin[1] && begin[2] == 0x01)){ begin++; - if (begin < lastCheck && begin[0]){ - begin++; - } + if (begin < lastCheck && begin[0]){begin++;} } - begin += 3;//Initialize begin after the first 0x000001 pattern. + begin += 3; // Initialize begin after the first 0x000001 pattern. if (begin > data + dataSize){ offset = dataSize; continue; } - const char * end = (const char*)memmem(begin, dataSize - (begin - data), "\000\000\001", 3); - if (!end) { - end = data + dataSize; - } - //Check for 4-byte lead in's. Yes, we access -1 here - if (end > begin && (end - data) != dataSize && end[-1] == 0x00){ - end--; - } + const char *end = (const char *)memmem(begin, dataSize - (begin - data), "\000\000\001", 3); + if (!end){end = data + dataSize;} + // Check for 4-byte lead in's. Yes, we access -1 here + if (end > begin && (end - data) != dataSize && end[-1] == 0x00){end--;} unsigned int nalSize = end - begin; Bit::htobl(result + newOffset, nalSize); memcpy(result + newOffset + 4, begin, nalSize); @@ -139,4 +128,5 @@ namespace nalu { } return newOffset; } -} +}// namespace nalu + diff --git a/lib/ts_stream.cpp b/lib/ts_stream.cpp index 0647fd86..cc8297fb 100644 --- a/lib/ts_stream.cpp +++ b/lib/ts_stream.cpp @@ -132,8 +132,8 @@ namespace TS{ int tid = newPack.getPID(); bool unitStart = newPack.getUnitStart(); std::deque & PS = pesStreams[tid]; - if ((pidToCodec.count(tid) || tid == 0 || newPack.isPMT()) && - (unitStart || PS.size())){ + if ((unitStart || PS.size()) && + (tid == 0 || newPack.isPMT() || pidToCodec.count(tid))){ PS.push_back(newPack); if (unitStart){ pesPositions[tid].push_back(bytePos); @@ -667,13 +667,16 @@ namespace TS{ return; } - while (nextPtr < pesEnd){ + uint32_t nalno = 0; + //We only check the first 8 packets, because keys should always be near the front of a PES. + while (nextPtr < pesEnd && nalno < 8){ if (!nextPtr){nextPtr = pesEnd;} //Calculate size of NAL unit, removing null bytes from the end nalSize = nalu::nalEndPosition(pesPayload, nextPtr - pesPayload) - pesPayload; // Check if this is a keyframe parseNal(tid, pesPayload, nextPtr, isKeyFrame); + ++nalno; if (((nextPtr - pesPayload) + 3) >= realPayloadSize){break;}//end of the loop realPayloadSize -= ((nextPtr - pesPayload) + 3); // decrease the total size