From 1dd27f215ac5e8cde5920a0a0c67c9c6e4672561 Mon Sep 17 00:00:00 2001 From: Thulinma Date: Thu, 30 Nov 2023 18:28:57 +0100 Subject: [PATCH] Spun out MP4 parser into new mp4_stream.cpp; updated MP4 input to use it; added support for fMP4 to this new MP4 Stream library --- lib/meson.build | 2 + lib/mp4.cpp | 26 +- lib/mp4_generic.cpp | 41 +- lib/mp4_generic.h | 78 ++-- lib/mp4_stream.cpp | 501 ++++++++++++++++++++++ lib/mp4_stream.h | 119 ++++++ src/analysers/analyser_mp4.cpp | 271 +++++------- src/analysers/analyser_mp4.h | 3 + src/input/input_mp4.cpp | 757 +++++++++++++-------------------- src/input/input_mp4.h | 77 +--- 10 files changed, 1105 insertions(+), 770 deletions(-) create mode 100644 lib/mp4_stream.cpp create mode 100644 lib/mp4_stream.h diff --git a/lib/meson.build b/lib/meson.build index cdcc8dac..81f99e51 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -27,6 +27,7 @@ headers = [ 'mp4_generic.h', 'mp4.h', 'mp4_ms.h', + 'mp4_stream.h', 'mpeg.h', 'nal.h', 'ogg.h', @@ -96,6 +97,7 @@ libmist = library('mist', 'mp4_encryption.cpp', 'mp4_generic.cpp', 'mp4_ms.cpp', + 'mp4_stream.cpp', 'mpeg.cpp', 'nal.cpp', 'ogg.cpp', diff --git a/lib/mp4.cpp b/lib/mp4.cpp index 37501b58..67975d7f 100644 --- a/lib/mp4.cpp +++ b/lib/mp4.cpp @@ -619,7 +619,7 @@ namespace MP4{ void containerBox::setContent(Box &newContent, uint32_t no){ int tempLoc = 0; - unsigned int contentCount = getContentCount(); + uint32_t contentCount = getContentCount(); for (unsigned int i = 0; i < no; i++){ if (i < contentCount){ tempLoc += getBoxLen(tempLoc); @@ -646,20 +646,24 @@ namespace MP4{ } Box containerBox::getChild(const char *boxName){ - uint32_t count = getContentCount(); - for (uint32_t i = 0; i < count; i++){ - Box &thisChild = getContent(i); - if (thisChild.isType(boxName)){return Box(thisChild.asBox(), false);} + size_t maxLoc = boxedSize() - 8; + size_t tempLoc = payloadOffset; + while (tempLoc < maxLoc){ + Box thisChild(data+tempLoc, false); + if (thisChild.isType(boxName)){return thisChild;} + tempLoc += calcBoxSize(data+tempLoc); } return Box((char *)"\000\000\000\010erro", false); } std::deque containerBox::getChildren(const char *boxName){ std::deque res; - uint32_t count = getContentCount(); - for (uint32_t i = 0; i < count; i++){ - Box &thisChild = getContent(i); - if (thisChild.isType(boxName)){res.push_back(Box(thisChild.asBox(), false));} + size_t maxLoc = boxedSize() - 8; + size_t tempLoc = payloadOffset; + while (tempLoc < maxLoc){ + Box thisChild(data+tempLoc, false); + if (thisChild.isType(boxName)){res.push_back(thisChild);} + tempLoc += calcBoxSize(data+tempLoc); } return res; } @@ -707,8 +711,8 @@ namespace MP4{ Box &containerFullBox::getContent(uint32_t no){ static Box ret = Box((char *)"\000\000\000\010erro", false); if (no > getContentCount()){return ret;} - unsigned int i = 0; - int tempLoc = 4; + uint32_t i = 0; + size_t tempLoc = 4; while (i < no){ tempLoc += getBoxLen(tempLoc); i++; diff --git a/lib/mp4_generic.cpp b/lib/mp4_generic.cpp index ed0601a7..954e85e6 100644 --- a/lib/mp4_generic.cpp +++ b/lib/mp4_generic.cpp @@ -61,13 +61,13 @@ namespace MP4{ void TRUN::setFlags(uint32_t newFlags){setInt24(newFlags, 1);} - uint32_t TRUN::getFlags(){return getInt24(1);} + uint32_t TRUN::getFlags() const {return getInt24(1);} void TRUN::setDataOffset(uint32_t newOffset){ if (getFlags() & trundataOffset){setInt32(newOffset, 8);} } - uint32_t TRUN::getDataOffset(){ + uint32_t TRUN::getDataOffset() const { if (getFlags() & trundataOffset){ return getInt32(8); }else{ @@ -84,7 +84,7 @@ namespace MP4{ } } - uint32_t TRUN::getFirstSampleFlags(){ + uint32_t TRUN::getFirstSampleFlags() const { if (!(getFlags() & trunfirstSampleFlags)){return 0;} if (getFlags() & trundataOffset){ return getInt32(12); @@ -93,7 +93,7 @@ namespace MP4{ } } - uint32_t TRUN::getSampleInformationCount(){return getInt32(4);} + uint32_t TRUN::getSampleInformationCount() const {return getInt32(4);} void TRUN::setSampleInformation(trunSampleInformation newSample, uint32_t no){ uint32_t flags = getFlags(); @@ -125,7 +125,7 @@ namespace MP4{ if (getSampleInformationCount() < no + 1){setInt32(no + 1, 4);} } - trunSampleInformation TRUN::getSampleInformation(uint32_t no){ + trunSampleInformation TRUN::getSampleInformation(uint32_t no, TFHD * tfhd) const{ trunSampleInformation ret; ret.sampleDuration = 0; ret.sampleSize = 0; @@ -140,19 +140,30 @@ namespace MP4{ if (flags & trunsampleOffsets){sampInfoSize += 4;} uint32_t offset = 8; if (flags & trundataOffset){offset += 4;} - if (flags & trunfirstSampleFlags){offset += 4;} + if (flags & trunfirstSampleFlags){ + if (!no){ret.sampleFlags = getFirstSampleFlags();} + offset += 4; + } uint32_t innerOffset = 0; if (flags & trunsampleDuration){ ret.sampleDuration = getInt32(offset + no * sampInfoSize + innerOffset); innerOffset += 4; + }else if (tfhd){ + ret.sampleDuration = tfhd->getDefaultSampleDuration(); } if (flags & trunsampleSize){ ret.sampleSize = getInt32(offset + no * sampInfoSize + innerOffset); innerOffset += 4; + }else if (tfhd){ + ret.sampleSize = tfhd->getDefaultSampleSize(); } if (flags & trunsampleFlags){ ret.sampleFlags = getInt32(offset + no * sampInfoSize + innerOffset); innerOffset += 4; + }else if ((flags & trunfirstSampleFlags) && !no){ + ret.sampleFlags = getFirstSampleFlags(); + }else if (tfhd){ + ret.sampleFlags = tfhd->getDefaultSampleFlags(); } if (flags & trunsampleOffsets){ ret.sampleOffset = getInt32(offset + no * sampInfoSize + innerOffset); @@ -161,7 +172,7 @@ namespace MP4{ return ret; } - std::string TRUN::toPrettyString(uint32_t indent){ + std::string TRUN::toPrettyString(uint32_t indent) const { std::stringstream r; r << std::string(indent, ' ') << "[trun] Track Fragment Run (" << boxedSize() << ")" << std::endl; r << std::string(indent + 1, ' ') << "Version " << (int)getInt8(0) << std::endl; @@ -201,17 +212,17 @@ namespace MP4{ std::string prettySampleFlags(uint32_t flag){ std::stringstream r; + if (flag & noKeySample){ + r << "noKeySample"; + }else{ + r << "isKeySample"; + } if (flag & noIPicture){r << " noIPicture";} if (flag & isIPicture){r << " isIPicture";} if (flag & noDisposable){r << " noDisposable";} if (flag & isDisposable){r << " isDisposable";} if (flag & isRedundant){r << " isRedundant";} if (flag & noRedundant){r << " noRedundant";} - if (flag & noKeySample){ - r << " noKeySample"; - }else{ - r << " isKeySample"; - } return r.str(); } @@ -2610,11 +2621,11 @@ namespace MP4{ void STSZ::setSampleSize(uint32_t newSampleSize){setInt32(newSampleSize, 4);} - uint32_t STSZ::getSampleSize(){return getInt32(4);} + uint32_t STSZ::getSampleSize() const {return getInt32(4);} void STSZ::setSampleCount(uint32_t newSampleCount){setInt32(newSampleCount, 8);} - uint32_t STSZ::getSampleCount(){return getInt32(8);} + uint32_t STSZ::getSampleCount() const {return getInt32(8);} void STSZ::setEntrySize(uint32_t newEntrySize, uint32_t no){ if (no + 1 > getSampleCount()){ @@ -2626,7 +2637,7 @@ namespace MP4{ setInt32(newEntrySize, 12 + no * 4); } - uint32_t STSZ::getEntrySize(uint32_t no){ + uint32_t STSZ::getEntrySize(uint32_t no) const { if (no >= getSampleCount()){return 0;} long unsigned int retVal = getInt32(12 + no * 4); if (retVal == 0){ diff --git a/lib/mp4_generic.h b/lib/mp4_generic.h index eaee65fc..449cf340 100644 --- a/lib/mp4_generic.h +++ b/lib/mp4_generic.h @@ -39,6 +39,36 @@ namespace MP4{ std::string toPrettyString(uint32_t indent = 0); }; + enum tfhdflags{ + tfhdBaseOffset = 0x000001, + tfhdSampleDesc = 0x000002, + tfhdSampleDura = 0x000008, + tfhdSampleSize = 0x000010, + tfhdSampleFlag = 0x000020, + tfhdNoDuration = 0x010000, + tfhdBaseIsMoof = 0x020000, + }; + class TFHD : public Box{ + public: + TFHD(); + void setFlags(uint32_t newFlags); + uint32_t getFlags(); + void setTrackID(uint32_t newID); + uint32_t getTrackID(); + void setBaseDataOffset(uint64_t newOffset); + uint64_t getBaseDataOffset(); + void setSampleDescriptionIndex(uint32_t newIndex); + uint32_t getSampleDescriptionIndex(); + void setDefaultSampleDuration(uint32_t newDuration); + uint32_t getDefaultSampleDuration(); + void setDefaultSampleSize(uint32_t newSize); + uint32_t getDefaultSampleSize(); + void setDefaultSampleFlags(uint32_t newFlags); + uint32_t getDefaultSampleFlags(); + bool getDefaultBaseIsMoof(); + std::string toPrettyString(uint32_t indent = 0); + }; + struct trunSampleInformation { uint32_t sampleDuration; uint32_t sampleSize; @@ -69,45 +99,15 @@ namespace MP4{ public: TRUN(); void setFlags(uint32_t newFlags); - uint32_t getFlags(); + uint32_t getFlags() const; void setDataOffset(uint32_t newOffset); - uint32_t getDataOffset(); + uint32_t getDataOffset() const; void setFirstSampleFlags(uint32_t newSampleFlags); - uint32_t getFirstSampleFlags(); - uint32_t getSampleInformationCount(); + uint32_t getFirstSampleFlags() const; + uint32_t getSampleInformationCount() const; void setSampleInformation(trunSampleInformation newSample, uint32_t no); - trunSampleInformation getSampleInformation(uint32_t no); - std::string toPrettyString(uint32_t indent = 0); - }; - - enum tfhdflags{ - tfhdBaseOffset = 0x000001, - tfhdSampleDesc = 0x000002, - tfhdSampleDura = 0x000008, - tfhdSampleSize = 0x000010, - tfhdSampleFlag = 0x000020, - tfhdNoDuration = 0x010000, - tfhdBaseIsMoof = 0x020000, - }; - class TFHD : public Box{ - public: - TFHD(); - void setFlags(uint32_t newFlags); - uint32_t getFlags(); - void setTrackID(uint32_t newID); - uint32_t getTrackID(); - void setBaseDataOffset(uint64_t newOffset); - uint64_t getBaseDataOffset(); - void setSampleDescriptionIndex(uint32_t newIndex); - uint32_t getSampleDescriptionIndex(); - void setDefaultSampleDuration(uint32_t newDuration); - uint32_t getDefaultSampleDuration(); - void setDefaultSampleSize(uint32_t newSize); - uint32_t getDefaultSampleSize(); - void setDefaultSampleFlags(uint32_t newFlags); - uint32_t getDefaultSampleFlags(); - bool getDefaultBaseIsMoof(); - std::string toPrettyString(uint32_t indent = 0); + trunSampleInformation getSampleInformation(uint32_t no, TFHD * tfhd = 0) const; + std::string toPrettyString(uint32_t indent = 0) const; }; class AVCC : public Box{ @@ -641,11 +641,11 @@ namespace MP4{ public: STSZ(char v = 1, uint32_t f = 0); void setSampleSize(uint32_t newSampleSize); - uint32_t getSampleSize(); + uint32_t getSampleSize() const; void setSampleCount(uint32_t newSampleCount); - uint32_t getSampleCount(); + uint32_t getSampleCount() const; void setEntrySize(uint32_t newEntrySize, uint32_t no); - uint32_t getEntrySize(uint32_t no); + uint32_t getEntrySize(uint32_t no) const; std::string toPrettyString(uint32_t indent = 0); }; diff --git a/lib/mp4_stream.cpp b/lib/mp4_stream.cpp new file mode 100644 index 00000000..3a200dd1 --- /dev/null +++ b/lib/mp4_stream.cpp @@ -0,0 +1,501 @@ +#include "mp4_stream.h" +#include "h264.h" +#include "mp4_dash.h" + + +namespace MP4{ + + Stream::Stream(){ + } + + Stream::~Stream(){ + } + + void Stream::open(Util::ResizeablePointer & ptr){ + + } + + bool Stream::hasPacket(size_t tid) const{ + return false; + } + + bool Stream::hasPacket() const{ + return !curPositions.empty(); + } + + void Stream::getPacket(size_t tid, DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx){ + } + + uint32_t Stream::getEarliestPID(){ + return INVALID_TRACK_ID; + } + + void Stream::getEarliestPacket(DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx){ + if (curPositions.empty()){ + pack.null(); + return; + } + // pop uit set + MP4::PartTime curPart = *curPositions.begin(); + curPositions.erase(curPositions.begin()); + + thisTime = curPart.time; + thisIdx = curPart.trackID; + pack.genericFill(curPart.time, curPart.offset, curPart.trackID, 0/*readBuffer + (curPart.bpos-readPos)*/, curPart.size, 0, curPart.keyframe); + + // get the next part for this track + curPart.index++; + if (curPart.index < trkHdrs[curPart.trackID].size()){ + trkHdrs[curPart.trackID].getPart(curPart.index, &curPart.bpos, &curPart.size, &curPart.time, &curPart.offset, &curPart.keyframe); + curPositions.insert(curPart); + } + } + + void Stream::initializeMetadata(DTSC::Meta &meta, size_t tid, size_t mappingId){ + } + + TrackHeader::TrackHeader(){ + timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0; + bposIndex = bposSample = 0; + offsetIndex = offsetSample = 0; + keyIndex = keySample = 0; + hasOffsets = false; + hasKeys = false; + isVideo = false; + sttsBox.clear(); + cttsBox.clear(); + stszBox.clear(); + stcoBox.clear(); + co64Box.clear(); + stscBox.clear(); + stssBox.clear(); + stco64 = false; + trafMode = false; + trackId = 0; + } + + void TrackHeader::nextMoof(){ + timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0; + bposIndex = bposSample = 0; + offsetIndex = offsetSample = 0; + + trafMode = true; + trafs.clear(); + } + + /// Switch back to non-moof reading mode, disabling TRAF mode and wiping all TRAF boxes + void TrackHeader::revertToMoov(){ + timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0; + bposIndex = bposSample = 0; + offsetIndex = offsetSample = 0; + keyIndex = keySample = 0; + + trafMode = false; + trafs.clear(); + } + + void TrackHeader::read(TRAK &trakBox){ + vidWidth = vidHeight = audChannels = audRate = audSize = 0; + codec.clear(); + + MDIA mdiaBox = trakBox.getChild(); + timeScale = mdiaBox.getChild().getTimeScale(); + lang = mdiaBox.getChild().getLanguage(); + + TKHD tkhd = trakBox.getChild(); + trackId = tkhd.getTrackID(); + if (tkhd.getWidth()){ + vidWidth = tkhd.getWidth(); + vidHeight = tkhd.getHeight(); + } + + STBL stblBox = mdiaBox.getChild().getChild(); + + sttsBox.copyFrom(stblBox.getChild()); + + cttsBox.copyFrom(stblBox.getChild()); + hasOffsets = cttsBox.isType("ctts"); + + stszBox.copyFrom(stblBox.getChild()); + + stcoBox.copyFrom(stblBox.getChild()); + co64Box.copyFrom(stblBox.getChild()); + stco64 = co64Box.isType("co64"); + + stscBox.copyFrom(stblBox.getChild()); + + stssBox.copyFrom(stblBox.getChild()); + hasKeys = stssBox.isType("stss"); + + Box sEntryBox = stblBox.getChild().getEntry(0); + sType = sEntryBox.getType(); + + std::string handler = mdiaBox.getChild().getHandlerType(); + isVideo = false; + if (handler == "vide"){ + isVideo = true; + trackType = "video"; + }else if (handler == "soun"){ + trackType = "audio"; + }else if (handler == "sbtl"){ + trackType = "meta"; + }else{ + INFO_MSG("Unsupported handler: %s", handler.c_str()); + } + + isCompatible = false; + + if (sType == "avc1" || sType == "h264" || sType == "mp4v"){ + codec = "H264"; + isCompatible = true; + VisualSampleEntry &vEntryBox = (VisualSampleEntry &)sEntryBox; + if (!vidWidth){ + vidWidth = vEntryBox.getWidth(); + vidHeight = vEntryBox.getHeight(); + } + MP4::Box initBox = vEntryBox.getCLAP(); + if (initBox.isType("avcC")){initData.assign(initBox.payload(), initBox.payloadSize());} + initBox = vEntryBox.getPASP(); + if (initBox.isType("avcC")){initData.assign(initBox.payload(), initBox.payloadSize());} + // Read metadata from init data if not set + if (!vidWidth){ + h264::sequenceParameterSet sps; + sps.fromDTSCInit(initData); + h264::SPSMeta spsChar = sps.getCharacteristics(); + vidWidth = spsChar.width; + vidHeight = spsChar.height; + } + } + if (sType == "hev1" || sType == "hvc1"){ + codec = "HEVC"; + isCompatible = true; + MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox; + if (!vidWidth){ + vidWidth = vEntryBox.getWidth(); + vidHeight = vEntryBox.getHeight(); + } + MP4::Box initBox = vEntryBox.getCLAP(); + if (initBox.isType("hvcC")){initData.assign(initBox.payload(), initBox.payloadSize());} + initBox = vEntryBox.getPASP(); + if (initBox.isType("hvcC")){initData.assign(initBox.payload(), initBox.payloadSize());} + } + if (sType == "av01"){ + codec = "AV1"; + isCompatible = true; + MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox; + if (!vidWidth){ + vidWidth = vEntryBox.getWidth(); + vidHeight = vEntryBox.getHeight(); + } + MP4::Box initBox = vEntryBox.getCLAP(); + if (initBox.isType("av1C")){initData.assign(initBox.payload(), initBox.payloadSize());} + initBox = vEntryBox.getPASP(); + if (initBox.isType("av1C")){initData.assign(initBox.payload(), initBox.payloadSize());} + } + if (sType == "mp4a" || sType == "aac " || sType == "ac-3"){ + MP4::AudioSampleEntry &aEntryBox = (MP4::AudioSampleEntry &)sEntryBox; + audRate = aEntryBox.getSampleRate(); + audChannels = aEntryBox.getChannelCount(); + audSize = 16; /// \TODO Actually get this from somewhere, probably..? + + if (sType == "ac-3"){ + codec = "AC3"; + isCompatible = true; + }else{ + MP4::Box codingBox = aEntryBox.getCodecBox(); + if (codingBox.getType() == "esds"){ + MP4::ESDS & esdsBox = (MP4::ESDS &)codingBox; + codec = esdsBox.getCodec(); + isCompatible = true; + initData = esdsBox.getInitData(); + } + if (codingBox.getType() == "wave"){ + MP4::WAVE & waveBox = (MP4::WAVE &)codingBox; + for (size_t c = 0; c < waveBox.getContentCount(); ++c){ + MP4::Box content = waveBox.getContent(c); + if (content.getType() == "esds"){ + MP4::ESDS & esdsBox = (MP4::ESDS &)content; + codec = esdsBox.getCodec(); + isCompatible = true; + initData = esdsBox.getInitData(); + } + } + } + } + } + if (sType == "tx3g"){// plain text subtitles + codec = "subtitle"; + isCompatible = true; + } + } + + void TrackHeader::read(TRAF &trafBox){ + if (!trafMode){ + // Warn anyone that forgot to call nextMoof(), hopefully preventing future issues + WARN_MSG("Reading TRAF box header without signalling start of next MOOF box first!"); + } + TRAF tBox; + trafs.push_back(tBox); + trafs.rbegin()->copyFrom(trafBox); + } + + void TrackHeader::increaseTime(uint32_t delta){ + // Calculate millisecond-time for current timestamp + uint64_t timePrev = (timeTotal * 1000) / timeScale; + timeTotal += delta; + + //Undo time shifts as much as possible + if (timeExtra){ + timeTotal -= timeExtra; + timeExtra = 0; + } + + //Make sure our timestamps go up by at least 1ms for every packet + if (timePrev >= (uint64_t)((timeTotal * 1000) / timeScale)){ + uint32_t wantSamples = ((timePrev+1) * timeScale) / 1000; + timeExtra += wantSamples - timeTotal; + timeTotal = wantSamples; + } + ++timeSample; + } + + + uint64_t TrackHeader::size() const { + if (!trafMode){ + return (stszBox ? stszBox.getSampleCount() : 0); + } + if (!trafs.size()){return 0;} + uint64_t parts = 0; + for (std::deque::const_iterator t = trafs.begin(); t != trafs.end(); ++t){ + std::deque runs = ((TRAF)(*t)).getChildren(); + for (std::deque::const_iterator r = runs.begin(); r != runs.end(); ++r){ + parts += r->getSampleInformationCount(); + } + } + return parts; + } + + /// Retrieves the information associated with a specific part (=frame). + /// The index is the zero-based part number, all other arguments are optional and if non-zero will be filled. + void TrackHeader::getPart(uint64_t index, uint64_t * byteOffset, uint32_t * byteLen, uint64_t * time, int32_t * timeOffset, bool * keyFrame, uint64_t moofPos){ + // Switch between reading TRAF boxes or global headers + if (!trafMode){ + // Reading global headers + + // Calculate time, if requested + if (time){ + // If we went backwards, reset our current position + if (index < timeSample){ + timeIndex = timeFirstSample = timeSample = timeExtra = timeTotal = 0; + } + // Find the packet count per chunk entry for this sample + uint64_t eCnt = sttsBox.getEntryCount(); + STTSEntry entry; + while (timeIndex < eCnt){ + entry = sttsBox.getSTTSEntry(timeIndex); + // check where the next index starts + uint64_t nextSampleIndex = timeFirstSample + entry.sampleCount; + // If the next chunk starts with a higher sample than we want, we can stop here + if (nextSampleIndex > index){break;} + timeFirstSample = nextSampleIndex; + // Increase timestamp by delta for each sample with the same delta + while (timeSample < nextSampleIndex){increaseTime(entry.sampleDelta);} + ++timeIndex; + } + + // Inside the samples with the same delta, we may still need to increase the timestamp. + while (timeSample < index){increaseTime(entry.sampleDelta);} + *time = (timeTotal * 1000) / timeScale; + } + + // Look up time offset, if requested and available + if (timeOffset){ + if (hasOffsets){ + // If we went backwards, reset our current position + if (index < offsetSample){ + offsetIndex = offsetSample = 0; + } + // Find the packet count per chunk entry for this sample + uint64_t eCnt = cttsBox.getEntryCount(); + CTTSEntry entry; + while (offsetIndex < eCnt){ + entry = cttsBox.getCTTSEntry(offsetIndex); + // check where the next index starts + uint64_t nextSampleIndex = offsetSample + entry.sampleCount; + // If the next chunk starts with a higher sample than we want, we can stop here + if (nextSampleIndex > index){break;} + offsetSample = nextSampleIndex; + ++offsetIndex; + } + *timeOffset = (entry.sampleOffset * 1000) / timeScale; + }else{ + // Default to zero if there are no offsets for this track + *timeOffset = 0; + } + } + + // Look up keyframe-ness, if requested and available + if (keyFrame){ + if (!isVideo){ + // Non-video tracks are never keyframes + *keyFrame = false; + }else{ + // Video tracks with keys follow them + if (hasKeys){ + // If we went backwards, reset our current position + if (index < keySample){ + keyIndex = keySample = 0; + } + // Find the packet count per chunk entry for this sample + uint64_t eCnt = stssBox.getEntryCount(); + while (keyIndex < eCnt){ + // check where the next index starts + uint64_t nextSampleIndex; + if (keyIndex + 1 < eCnt){ + nextSampleIndex = stssBox.getSampleNumber(keyIndex + 1) - 1; + }else{ + nextSampleIndex = stszBox.getSampleCount(); + } + // If the next key has a higher sample than we want, we can stop here + if (nextSampleIndex > index){break;} + keySample = nextSampleIndex; + ++keyIndex; + } + *keyFrame = (keySample == index); + }else{ + // Everything is a keyframe if there are no keys listed for a video track + *keyFrame = true; + } + } + } + + // Calculate byte position of packet, if requested + if (byteOffset){ + // If we went backwards, reset our current position + if (index < bposSample){ + bposIndex = bposSample = 0; + } + // Find the packet count per chunk entry for this sample + uint64_t eCnt = stscBox.getEntryCount(); + STSCEntry entry; + while (bposIndex < eCnt){ + entry = stscBox.getSTSCEntry(bposIndex); + // check where the next index starts + uint64_t nextSampleIndex; + if (bposIndex + 1 < eCnt){ + nextSampleIndex = bposSample + (stscBox.getSTSCEntry(bposIndex + 1).firstChunk - entry.firstChunk) * + entry.samplesPerChunk; + }else{ + nextSampleIndex = stszBox.getSampleCount(); + } + // If the next chunk starts with a higher sample than we want, we can stop here + if (nextSampleIndex > index){break;} + bposSample = nextSampleIndex; + ++bposIndex; + } + + // Find the chunk index the sample is in + uint64_t chunkIndex = (entry.firstChunk - 1) + ((index - bposSample) / entry.samplesPerChunk); + // Set offset to position of start of this chunk + *byteOffset = (stco64 ? co64Box.getChunkOffset(chunkIndex) : stcoBox.getChunkOffset(chunkIndex)); + // Increase the offset by all samples in the chunk we already passed to arrive at our current sample + uint64_t sampleStart = bposSample + (chunkIndex - (entry.firstChunk - 1)) * entry.samplesPerChunk; + for (int j = sampleStart; j < index; j++){*byteOffset += stszBox.getEntrySize(j);} + } + + // Look up byte length of packet, if requested + if (byteLen){ + *byteLen = stszBox.getEntrySize(index); + } + + // Specifically for text tracks, remove the 2-byte header if possible + if (byteOffset && byteLen && *byteLen >= 2 && sType == "tx3g"){ + *byteLen -= 2; + *byteOffset += 2; + } + }else{ + // Reading from TRAF boxes + size_t skipped = 0; + for (std::deque::const_iterator t = trafs.begin(); t != trafs.end(); ++t){ + size_t firstTRAFIndex = skipped; + std::deque runs = ((TRAF)(*t)).getChildren(); + for (std::deque::const_iterator r = runs.begin(); r != runs.end(); ++r){ + uint32_t count = r->getSampleInformationCount(); + if (index >= skipped + count){ + skipped += count; + continue; + } + // Okay, our index is inside this TRUN! + // Let's pull the TFHD box into this as well... + TFHD tfhd = ((TRAF)(*t)).getChild(); + trunSampleInformation si = r->getSampleInformation(index - skipped, &tfhd); + if (byteOffset){ + size_t offset = 0; + if (tfhd.getDefaultBaseIsMoof()){ + offset += moofPos; + } + if (r->getFlags() & MP4::trundataOffset){ + offset += r->getDataOffset(); + size_t target = index - skipped; + for (size_t i = 0; i < target; ++i){ + offset += r->getSampleInformation(i, &tfhd).sampleSize; + } + }else{ + FAIL_MSG("Unimplemented: trun box does not contain a data offset!"); + } + *byteOffset = offset; + } + if (time){ + // If we went backwards, reset our current position + if (!index || index < timeSample){ + timeIndex = timeFirstSample = timeSample = timeExtra = 0; + TFDT tfdt = ((TRAF)(*t)).getChild(); + timeTotal = tfdt.getBaseMediaDecodeTime(); + } + std::deque::const_iterator runIt = runs.begin(); + uint32_t locCount = runIt->getSampleInformationCount(); + size_t locSkipped = firstTRAFIndex; + while (timeSample < index){ + // Most common case: timeSample is in the current TRUN box + if (timeSample >= skipped && timeSample < skipped + count){ + trunSampleInformation i = r->getSampleInformation(timeSample - skipped, &tfhd); + increaseTime(i.sampleDuration); + continue; + } + // Less common case: everything else + // Ensure "runIt" points towards the TRUN box that index "timeSample" is in + while (timeSample >= locSkipped + locCount && runIt != runs.end()){ + locSkipped += locCount; + runIt++; + locCount = runIt->getSampleInformationCount(); + } + // Abort increase if we can't find the box. This _should_ never happen... + if (runIt == runs.end()){ + WARN_MSG("Attempted to read time information from a TRAF box that did not contain the sample we're reading!"); + break; + } + // Cool, now we know it's valid, increase the time accordingly. + trunSampleInformation i = runIt->getSampleInformation(timeSample - locSkipped, &tfhd); + increaseTime(i.sampleDuration); + } + *time = (timeTotal * 1000) / timeScale; + } + if (byteLen){ + *byteLen = si.sampleSize; + } + if (timeOffset){ + *timeOffset = (si.sampleOffset * 1000) / timeScale; + } + if (keyFrame){ + *keyFrame = !(si.sampleFlags & MP4::noKeySample); + } + return; + } + } + } + + } + + +} // namespace MP4 + diff --git a/lib/mp4_stream.h b/lib/mp4_stream.h new file mode 100644 index 00000000..7625760e --- /dev/null +++ b/lib/mp4_stream.h @@ -0,0 +1,119 @@ +#include "dtsc.h" +#include "util.h" +#include "mp4_generic.h" + +namespace MP4{ + + class PartTime{ + public: + PartTime() : time(0), duration(0), offset(0), trackID(0), bpos(0), size(0), index(0){} + bool operator<(const PartTime &rhs) const{ + if (time < rhs.time){return true;} + if (time > rhs.time){return false;} + if (trackID < rhs.trackID){return true;} + return (trackID == rhs.trackID && bpos < rhs.bpos); + } + uint64_t time; + uint64_t duration; + int32_t offset; + size_t trackID; + uint64_t bpos; + uint32_t size; + uint64_t index; + bool keyframe; + }; + + + class TrackHeader{ + public: + TrackHeader(); + + /// Reads (new) track header information for processing + void read(TRAK &trakBox); + /// Reads (new) track header information for processing + void read(TRAF &trafBox); + + /// Signal that we're going to be reading the next moof box now. + /// Wipes internal TRAF boxes, ensures TRAF mode is enabled so no reads happen from MOOV headers anymore. + void nextMoof(); + + /// Switch back to non-moof reading mode, disabling TRAF mode and wiping all TRAF boxes + void revertToMoov(); + + /// Returns true if we know how to parse this track, false otherwise + bool compatible() const {return isCompatible;} + + /// Retrieves the information associated with a specific part (=frame). + void getPart(uint64_t index, uint64_t * byteOffset = 0, uint32_t * byteLen = 0, uint64_t * time = 0, int32_t * timeOffset = 0, bool * keyFrame = 0, uint64_t moofPos = 0); + + /// Returns the number of parts this track header contains + uint64_t size() const; + + // Information about the track. Public for convenience, but setting them has no effect. + // The exception is sType, which affects processing of the data in some cases and should not be written to. + // All of these are filled by the read() function when reading an MP4::TRAK box. + size_t trackId; ///< MP4-internal ID for this track + uint64_t timeScale; ///< Timescale in units per second + std::string sType; ///< MP4-internal codec name for this track - do not write to externally! + std::string codec; ///< Mist codec name for this track + std::string trackType; ///< Which Mist-compatible track type this is + std::string initData; ///< Initialization data for the track, in Mist-compatible format + std::string lang; ///< Language of the track + uint32_t vidWidth, vidHeight; + uint32_t audChannels, audRate, audSize; + + private: + /// Internal function that increases the time of the current part to the next part + void increaseTime(uint32_t delta); + + // next variables are needed for the stsc/stco loop + uint64_t bposIndex; ///< Current read index in stsc box + uint64_t bposSample; ///< First sample number in current chunk entry + // next variables are needed for the stts loop + uint64_t timeIndex; ///< Index in STTS box + uint64_t timeSample; ///< Sample counter for STTS box + uint64_t timeFirstSample; ///< First sample in STTS box entry + uint64_t timeTotal; ///< Total timestamp for STTS box + uint64_t timeExtra; ///< Extra timestamp for STTS box + uint64_t offsetIndex; ///< Index in CTTS box + uint64_t offsetSample; ///< First sample number in CTTS entry + uint64_t keyIndex; ///< Index in stss box + uint64_t keySample; ///< First sample number in stss entry + + STSS stssBox; ///< keyframe list + STCO stcoBox; ///< positions of chunks (32-bit) + CO64 co64Box; ///< positions of chunks (64-bit) + STSZ stszBox; ///< packet sizes + STTS sttsBox; ///< packet durations + CTTS cttsBox; ///< packet time offsets (optional) + STSC stscBox; ///< packet count per chunk + std::deque trafs; ///< Current traf boxes, if any + bool stco64; // 64 bit chunk offsets? + bool hasOffsets; ///< Are time offsets present? + bool hasKeys; ///< Are keyframes listed? + bool isVideo; ///< Is this a video track? + bool isCompatible; ///< True if Mist supports this track type + bool trafMode; ///< True if we are ignoring the moov headers and only looking at traf headers + }; + + class Stream{ + public: + Stream(); + ~Stream(); + void open(Util::ResizeablePointer & ptr); + bool hasPacket(size_t tid) const; + bool hasPacket() const; + void getPacket(size_t tid, DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx); + uint32_t getEarliestPID(); + void getEarliestPacket(DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx); + void initializeMetadata(DTSC::Meta &meta, size_t tid = INVALID_TRACK_ID, size_t mappingId = INVALID_TRACK_ID); + private: + std::map trkHdrs; + std::map codecs; + std::set curPositions; + MOOV moovBox; + Box mdatBox; + }; + +} // namespace MP4 + diff --git a/src/analysers/analyser_mp4.cpp b/src/analysers/analyser_mp4.cpp index 5eab2e01..30d4fa2b 100644 --- a/src/analysers/analyser_mp4.cpp +++ b/src/analysers/analyser_mp4.cpp @@ -3,113 +3,6 @@ #include #include - -class mp4TrackHeader{ -public: - mp4TrackHeader(){ - initialised = false; - stscStart = 0; - sampleIndex = 0; - deltaIndex = 0; - deltaPos = 0; - deltaTotal = 0; - offsetIndex = 0; - offsetPos = 0; - sttsBox.clear(); - hasCTTS = false; - cttsBox.clear(); - stszBox.clear(); - stcoBox.clear(); - co64Box.clear(); - stco64 = false; - trackId = 0; - } - void read(MP4::TRAK &trakBox){ - initialised = false; - std::string tmp; // temporary string for copying box data - MP4::Box trakLoopPeek; - timeScale = 1; - - MP4::MDIA mdiaBox = trakBox.getChild(); - - timeScale = mdiaBox.getChild().getTimeScale(); - trackId = trakBox.getChild().getTrackID(); - - MP4::STBL stblBox = mdiaBox.getChild().getChild(); - - sttsBox.copyFrom(stblBox.getChild()); - cttsBox.copyFrom(stblBox.getChild()); - stszBox.copyFrom(stblBox.getChild()); - stcoBox.copyFrom(stblBox.getChild()); - co64Box.copyFrom(stblBox.getChild()); - stscBox.copyFrom(stblBox.getChild()); - stco64 = co64Box.isType("co64"); - hasCTTS = cttsBox.isType("ctts"); - } - size_t trackId; - MP4::STCO stcoBox; - MP4::CO64 co64Box; - MP4::STSZ stszBox; - MP4::STTS sttsBox; - bool hasCTTS; - MP4::CTTS cttsBox; - MP4::STSC stscBox; - uint64_t timeScale; - void getPart(uint64_t index, uint64_t &offset, uint64_t &size){ - if (index < sampleIndex){ - sampleIndex = 0; - stscStart = 0; - } - - uint64_t stscCount = stscBox.getEntryCount(); - MP4::STSCEntry stscEntry; - while (stscStart < stscCount){ - stscEntry = stscBox.getSTSCEntry(stscStart); - // check where the next index starts - uint64_t nextSampleIndex; - if (stscStart + 1 < stscCount){ - nextSampleIndex = sampleIndex + (stscBox.getSTSCEntry(stscStart + 1).firstChunk - stscEntry.firstChunk) * - stscEntry.samplesPerChunk; - }else{ - nextSampleIndex = stszBox.getSampleCount(); - } - if (nextSampleIndex > index){break;} - sampleIndex = nextSampleIndex; - ++stscStart; - } - - if (sampleIndex > index){ - FAIL_MSG("Could not complete seek - not in file (%" PRIu64 " > %" PRIu64 ")", sampleIndex, index); - } - - uint64_t stcoPlace = (stscEntry.firstChunk - 1) + ((index - sampleIndex) / stscEntry.samplesPerChunk); - uint64_t stszStart = sampleIndex + (stcoPlace - (stscEntry.firstChunk - 1)) * stscEntry.samplesPerChunk; - - offset = (stco64 ? co64Box.getChunkOffset(stcoPlace) : stcoBox.getChunkOffset(stcoPlace)); - for (int j = stszStart; j < index; j++){offset += stszBox.getEntrySize(j);} - size = stszBox.getEntrySize(index); - - initialised = true; - } - uint64_t size(){return (stszBox.asBox() ? stszBox.getSampleCount() : 0);} - -private: - bool initialised; - // next variables are needed for the stsc/stco loop - uint64_t stscStart; - uint64_t sampleIndex; - // next variables are needed for the stts loop - uint64_t deltaIndex; ///< Index in STTS box - uint64_t deltaPos; ///< Sample counter for STTS box - uint64_t deltaTotal; ///< Total timestamp for STTS box - // for CTTS box loop - uint64_t offsetIndex; ///< Index in CTTS box - uint64_t offsetPos; ///< Sample counter for CTTS box - - bool stco64; -}; - - void AnalyserMP4::init(Util::Config &conf){ Analyser::init(conf); } @@ -147,22 +40,72 @@ bool AnalyserMP4::parsePacket(){ if (mp4Data.read(mp4Buffer)){ INFO_MSG("Read a %" PRIu64 "b %s box at position %" PRIu64, mp4Data.boxedSize(), mp4Data.getType().c_str(), prePos); + + // If we get an mdat, analyse it if we have known tracks, otherwise store it for later if (mp4Data.getType() == "mdat"){ + // Remember where we saw the mdat box mdatPos = prePos; - analyseData(mp4Data); - return true; + if (hdrs.size()){ + // We have tracks, analyse it directly + analyseData(mp4Data); + }else{ + // No tracks yet, mdat is probably before the moov, we'll store a copy for later. + mdat.assign(mp4Data.asBox(), mp4Data.boxedSize()); + } } + + // moof is parsed into the tracks we already have if (mp4Data.getType() == "moof"){ - moof.assign(mp4Data.asBox(), mp4Data.boxedSize()); moofPos = prePos; + // Indicate that we're reading the next moof box to all track headers + for (std::map::iterator t = hdrs.begin(); t != hdrs.end(); ++t){ + t->second.nextMoof(); + } + // Loop over traf boxes inside the moof box + std::deque trafs = ((MP4::MOOF*)&mp4Data)->getChildren(); + for (std::deque::iterator t = trafs.begin(); t != trafs.end(); ++t){ + if (!(t->getChild())){ + WARN_MSG("Could not find thfd box inside traf box!"); + continue; + } + uint32_t trackId = t->getChild().getTrackID(); + if (!hdrs.count(trackId)){ + WARN_MSG("Could not find matching trak box for traf box %" PRIu32 "!", trackId); + continue; + } + hdrs[trackId].read(*t); + } } + + // moov contains tracks; we parse it (wiping existing tracks, if any) and if we saw an mdat earlier, now analyse it. if (mp4Data.getType() == "moov"){ - moov.assign(mp4Data.asBox(), mp4Data.boxedSize()); + // Remember where we saw this box moovPos = prePos; + // Wipe existing headers, we got new ones. + hdrs.clear(); + // Loop over trak boxes inside the moov box + std::deque traks = ((MP4::MOOV*)&mp4Data)->getChildren(); + for (std::deque::iterator trakIt = traks.begin(); trakIt != traks.end(); trakIt++){ + // Create a temporary header, since we don't know the trackId yet... + MP4::TrackHeader tHdr; + tHdr.read(*trakIt); + if (!tHdr.compatible()){ + INFO_MSG("Unsupported: %s", tHdr.sType.c_str()); + }else{ + INFO_MSG("Detected %s", tHdr.codec.c_str()); + } + // Regardless of support, we now put it in our track header array (after all, even unsupported tracks can be analysed!) + hdrs[tHdr.trackId].read(*trakIt); + } + // If we stored an mdat earlier, we can now analyse and then wipe it + if (mdat.size()){ + MP4::Box mdatBox(mdat, false); + analyseData(mdatBox); + mdat.truncate(0); + } } - if (detail >= 2){ - std::cout << mp4Data.toPrettyString(0) << std::endl; - } + // No matter what box we saw, (try to) pretty-print it + if (detail >= 2){std::cout << mp4Data.toPrettyString(0) << std::endl;} return true; } FAIL_MSG("Could not read box at position %" PRIu64, prePos); @@ -202,66 +145,58 @@ h264::nalUnit * getNalUnit(const char * data, size_t pktLen){ } void AnalyserMP4::analyseData(MP4::Box & mdatBox){ - if (moov.size()){ - MP4::Box globHdr(moov, false); - std::deque traks = ((MP4::MOOV*)&globHdr)->getChildren(); - - size_t trkCounter = 0; - for (std::deque::iterator trakIt = traks.begin(); trakIt != traks.end(); trakIt++){ - trkCounter++; - MP4::MDIA mdiaBox = trakIt->getChild(); - - std::string hdlrType = mdiaBox.getChild().getHandlerType(); - if (hdlrType != "vide" && hdlrType != "soun" && hdlrType != "sbtl"){ - INFO_MSG("Unsupported handler: %s", hdlrType.c_str()); + // Abort if we have no headers + if (!hdrs.size()){return;} + // Loop over known headers + for (std::map::iterator t = hdrs.begin(); t != hdrs.end(); ++t){ + size_t noPkts = t->second.size(); + for (size_t i = 0; i < noPkts; ++i){ + uint64_t offset = 0, time = 0; + int32_t timeOffset = 0; + uint32_t size = 0; + bool keyFrame = false; + t->second.getPart(i, &offset, &size, &time, &timeOffset, &keyFrame, moofPos); + // Update mediaTime with last parsed packet, if time increased + if (time > mediaTime){mediaTime = time;} + std::cout << "Packet " << i << " for track " << t->first << " (" << t->second.codec << ")"; + if (keyFrame){std::cout << " (KEY)";} + std::cout << ": " << size << "b @" << offset << ", T " << time; + if (timeOffset){ + if (timeOffset > 0){ + std::cout << "+" << timeOffset; + }else{ + std::cout << timeOffset; + } + } + std::cout << std::endl; + if (offset < mdatPos){ + std::cout << "Data is before mdat!" << std::endl; continue; } + if (offset - mdatPos + size > mdatBox.boxedSize()){ + std::cout << "Data is after mdat!" << std::endl; + continue; + } + if (detail < 4){continue;} + const char * ptr = mdatBox.asBox() - mdatPos + offset; + if (t->second.codec == "H264"){ + size_t j = 0; + while (j+4 <= size){ + uint32_t len = Bit::btohl(ptr+j); + std::cout << len << " bytes: "; + printByteRange(ptr, j, 4); + if (j+4+len > size){len = size-j-4;} - std::string sType = mdiaBox.getChild().getChild().getChild().getEntry(0).getType(); - if (sType == "avc1" || sType == "h264" || sType == "mp4v"){sType = "H264";} - if (sType == "hev1" || sType == "hvc1"){sType = "HEVC";} - if (sType == "ac-3"){sType = "AC3";} - if (sType == "tx3g"){sType = "subtitle";} - INFO_MSG("Detected %s", sType.c_str()); - mp4TrackHeader tHdr; - tHdr.read(*trakIt); - size_t noPkts = tHdr.size(); - for (size_t i = 0; i < noPkts; ++i){ - uint64_t offset = 0, size = 0; - tHdr.getPart(i, offset, size); - std::cout << "Packet " << i << " for track " << trkCounter << " (" << sType << "): " << size << " bytes" << std::endl; - if (offset < mdatPos){ - std::cout << "Data is before mdat!" << std::endl; - continue; + h264::nalUnit * nalu = getNalUnit(ptr+j+4, len); + nalu->toPrettyString(std::cout); + delete nalu; + if (detail > 5){printByteRange(ptr, j+4, j+4+len);} + j += 4 + len; } - if (offset - mdatPos + size > mdatBox.boxedSize()){ - std::cout << "Data is after mdat!" << std::endl; - continue; - } - const char * ptr = mdatBox.asBox() - mdatPos + offset; - if (sType == "H264"){ - size_t j = 0; - while (j+4 <= size){ - uint32_t len = Bit::btohl(ptr+j); - std::cout << len << " bytes: "; - printByteRange(ptr, j, 4); - if (j+4+len > size){len = size-j-4;} - - h264::nalUnit * nalu = getNalUnit(ptr+j+4, len); - nalu->toPrettyString(std::cout); - delete nalu; - printByteRange(ptr, j+4, j+4+len); - j += 4 + len; - } - }else{ - printByteRange(ptr, 0, size); - } - + }else{ + if (detail > 5){printByteRange(ptr, 0, size);} } } } - if (moof.size()){ - } - ///\TODO update mediaTime with the current timestamp } diff --git a/src/analysers/analyser_mp4.h b/src/analysers/analyser_mp4.h index 4bfc62a6..bdc79416 100644 --- a/src/analysers/analyser_mp4.h +++ b/src/analysers/analyser_mp4.h @@ -1,5 +1,6 @@ #include "analyser.h" #include +#include class AnalyserMP4 : public Analyser{ public: @@ -10,6 +11,7 @@ public: private: Util::ResizeablePointer moof; Util::ResizeablePointer moov; + Util::ResizeablePointer mdat; uint64_t moovPos; uint64_t moofPos; uint64_t mdatPos; @@ -19,4 +21,5 @@ private: MP4::Box mp4Data; uint64_t curPos; uint64_t prePos; + std::map hdrs; }; diff --git a/src/input/input_mp4.cpp b/src/input/input_mp4.cpp index c664e8ee..9ad361f9 100644 --- a/src/input/input_mp4.cpp +++ b/src/input/input_mp4.cpp @@ -1,10 +1,7 @@ -#include #include #include #include -#include #include -#include #include #include #include @@ -16,84 +13,9 @@ namespace Mist{ - mp4TrackHeader::mp4TrackHeader(){ - initialised = false; - stscStart = 0; - sampleIndex = 0; - deltaIndex = 0; - deltaPos = 0; - deltaTotal = 0; - offsetIndex = 0; - offsetPos = 0; - stscBox.clear(); - stszBox.clear(); - stcoBox.clear(); - co64Box.clear(); - stco64 = false; - trackId = 0; - } - - uint64_t mp4TrackHeader::size(){return (stszBox.asBox() ? stszBox.getSampleCount() : 0);} - - void mp4TrackHeader::read(MP4::TRAK &trakBox){ - initialised = false; - std::string tmp; // temporary string for copying box data - MP4::Box trakLoopPeek; - timeScale = 1; - - MP4::MDIA mdiaBox = trakBox.getChild(); - - timeScale = mdiaBox.getChild().getTimeScale(); - trackId = trakBox.getChild().getTrackID(); - - MP4::STBL stblBox = mdiaBox.getChild().getChild(); - - stszBox.copyFrom(stblBox.getChild()); - stcoBox.copyFrom(stblBox.getChild()); - co64Box.copyFrom(stblBox.getChild()); - stscBox.copyFrom(stblBox.getChild()); - stco64 = co64Box.isType("co64"); - } - - void mp4TrackHeader::getPart(uint64_t index, uint64_t &offset){ - if (index < sampleIndex){ - sampleIndex = 0; - stscStart = 0; - } - - uint64_t stscCount = stscBox.getEntryCount(); - MP4::STSCEntry stscEntry; - while (stscStart < stscCount){ - stscEntry = stscBox.getSTSCEntry(stscStart); - // check where the next index starts - uint64_t nextSampleIndex; - if (stscStart + 1 < stscCount){ - nextSampleIndex = sampleIndex + (stscBox.getSTSCEntry(stscStart + 1).firstChunk - stscEntry.firstChunk) * - stscEntry.samplesPerChunk; - }else{ - nextSampleIndex = stszBox.getSampleCount(); - } - if (nextSampleIndex > index){break;} - sampleIndex = nextSampleIndex; - ++stscStart; - } - - if (sampleIndex > index){ - FAIL_MSG("Could not complete seek - not in file (%" PRIu64 " > %" PRIu64 ")", sampleIndex, index); - } - - uint64_t stcoPlace = (stscEntry.firstChunk - 1) + ((index - sampleIndex) / stscEntry.samplesPerChunk); - uint64_t stszStart = sampleIndex + (stcoPlace - (stscEntry.firstChunk - 1)) * stscEntry.samplesPerChunk; - - offset = (stco64 ? co64Box.getChunkOffset(stcoPlace) : stcoBox.getChunkOffset(stcoPlace)); - for (int j = stszStart; j < index; j++){offset += stszBox.getEntrySize(j);} - - initialised = true; - } - - mp4TrackHeader &InputMP4::headerData(size_t trackID){ - static mp4TrackHeader none; - for (std::deque::iterator it = trackHeaders.begin(); it != trackHeaders.end(); it++){ + MP4::TrackHeader &InputMP4::headerData(size_t trackID){ + static MP4::TrackHeader none; + for (std::deque::iterator it = trackHeaders.begin(); it != trackHeaders.end(); it++){ if (it->trackId == trackID){return *it;} } return none; @@ -123,6 +45,7 @@ namespace Mist{ capa["codecs"]["audio"].append("AC3"); capa["codecs"]["audio"].append("MP3"); readPos = 0; + nextBox = 0; } bool InputMP4::checkArguments(){ @@ -193,6 +116,7 @@ namespace Mist{ std::string boxType = std::string(readBuffer+4, 4); uint64_t boxSize = MP4::calcBoxSize(readBuffer); if (boxType == "moov"){ + moovPos = readPos; while (readBuffer.size() < boxSize && inFile && keepRunning()){inFile.readSome(boxSize-readBuffer.size(), *this);} if (readBuffer.size() < boxSize){ Util::logExitReason(ER_FORMAT_SPECIFIC, "Could not read entire MOOV box into memory"); @@ -203,11 +127,10 @@ namespace Mist{ // for all box in moov std::deque trak = ((MP4::MOOV*)&moovBox)->getChildren(); for (std::deque::iterator trakIt = trak.begin(); trakIt != trak.end(); trakIt++){ - trackHeaders.push_back(mp4TrackHeader()); + trackHeaders.push_back(MP4::TrackHeader()); trackHeaders.rbegin()->read(*trakIt); } hasMoov = true; - break; } activityCounter = Util::bootSecs(); //Skip to next box @@ -221,6 +144,8 @@ namespace Mist{ } readPos = inFile.getPos(); } + // Stop if we've found a MOOV box - we'll do the rest afterwards + if (hasMoov){break;} } if (!hasMoov){ @@ -243,369 +168,199 @@ namespace Mist{ meta.reInit(isSingular() ? streamName : ""); tNumber = 0; - // Create header file from MP4 data - MP4::Box moovBox(readBuffer, false); - - std::deque trak = ((MP4::MOOV*)&moovBox)->getChildren(); - HIGH_MSG("Obtained %zu trak Boxes", trak.size()); - - uint64_t globalTimeOffset = 0; - for (std::deque::iterator trakIt = trak.begin(); trakIt != trak.end(); trakIt++){ - MP4::MDIA mdiaBox = trakIt->getChild(); - std::string hdlrType = mdiaBox.getChild().getHandlerType(); - if (hdlrType != "vide" && hdlrType != "soun" && hdlrType != "sbtl"){ - INFO_MSG("Unsupported handler: %s", hdlrType.c_str()); - continue; - } - - MP4::STBL stblBox = mdiaBox.getChild().getChild(); - - MP4::STSD stsdBox = stblBox.getChild(); - MP4::Box sEntryBox = stsdBox.getEntry(0); - std::string sType = sEntryBox.getType(); - - if (!(sType == "avc1" || sType == "h264" || sType == "mp4v" || sType == "hev1" || sType == "hvc1" || sType == "mp4a" || sType == "aac " || sType == "ac-3" || sType == "tx3g")){ - INFO_MSG("Unsupported track type: %s", sType.c_str()); - continue; - } - - MP4::CTTS cttsBox = stblBox.getChild(); // optional ctts box - MP4::MDHD mdhdBox = mdiaBox.getChild(); - uint64_t timescale = mdhdBox.getTimeScale(); - - int64_t DTS_CTS_offset = 0; ///< Difference between composition time and decode time. Always positive. - if (cttsBox.isType("ctts")){ - uint32_t cttsCount = cttsBox.getEntryCount(); - for (uint32_t i = 0; i < cttsCount; ++i){ - MP4::CTTSEntry e = cttsBox.getCTTSEntry(i); - int64_t o = (-e.sampleOffset * 1000) / (int64_t)timescale; - if (o > DTS_CTS_offset){DTS_CTS_offset = o;} - } - if (DTS_CTS_offset > globalTimeOffset){globalTimeOffset = DTS_CTS_offset;} - } - } - - for (std::deque::iterator trakIt = trak.begin(); trakIt != trak.end(); trakIt++){ - MP4::MDIA mdiaBox = trakIt->getChild(); - - std::string hdlrType = mdiaBox.getChild().getHandlerType(); - if (hdlrType != "vide" && hdlrType != "soun" && hdlrType != "sbtl"){ - INFO_MSG("Unsupported handler: %s", hdlrType.c_str()); - continue; - } - - MP4::STBL stblBox = mdiaBox.getChild().getChild(); - - MP4::STSD stsdBox = stblBox.getChild(); - MP4::Box sEntryBox = stsdBox.getEntry(0); - std::string sType = sEntryBox.getType(); - - if (!(sType == "avc1" || sType == "h264" || sType == "mp4v" || sType == "hev1" || sType == "hvc1" || sType == "mp4a" || sType == "aac " || sType == "ac-3" || sType == "tx3g" || sType == "av01")){ - INFO_MSG("Unsupported track type: %s", sType.c_str()); - continue; - } - - tNumber = meta.addTrack(); - - MP4::TKHD tkhdBox = trakIt->getChild(); - if (tkhdBox.getWidth() > 0){ - meta.setWidth(tNumber, tkhdBox.getWidth()); - meta.setHeight(tNumber, tkhdBox.getHeight()); - } - meta.setID(tNumber, tkhdBox.getTrackID()); - - MP4::MDHD mdhdBox = mdiaBox.getChild(); - uint64_t timescale = mdhdBox.getTimeScale(); - meta.setLang(tNumber, mdhdBox.getLanguage()); - - HIGH_MSG("Found track %zu of type %s", tNumber, sType.c_str()); - - if (sType == "avc1" || sType == "h264" || sType == "mp4v"){ - MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox; - meta.setType(tNumber, "video"); - meta.setCodec(tNumber, "H264"); - if (!meta.getWidth(tNumber)){ - meta.setWidth(tNumber, vEntryBox.getWidth()); - meta.setHeight(tNumber, vEntryBox.getHeight()); - } - MP4::Box initBox = vEntryBox.getCLAP(); - if (initBox.isType("avcC")){ - meta.setInit(tNumber, initBox.payload(), initBox.payloadSize()); - } - initBox = vEntryBox.getPASP(); - if (initBox.isType("avcC")){ - meta.setInit(tNumber, initBox.payload(), initBox.payloadSize()); - } - /// this is a hacky way around invalid FLV data (since it gets ignored nearly - /// everywhere, but we do need correct data... - if (!meta.getWidth(tNumber)){ - h264::sequenceParameterSet sps; - sps.fromDTSCInit(meta.getInit(tNumber)); - h264::SPSMeta spsChar = sps.getCharacteristics(); - meta.setWidth(tNumber, spsChar.width); - meta.setHeight(tNumber, spsChar.height); - } - } - if (sType == "hev1" || sType == "hvc1"){ - MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox; - meta.setType(tNumber, "video"); - meta.setCodec(tNumber, "HEVC"); - if (!meta.getWidth(tNumber)){ - meta.setWidth(tNumber, vEntryBox.getWidth()); - meta.setHeight(tNumber, vEntryBox.getHeight()); - } - MP4::Box initBox = vEntryBox.getCLAP(); - if (initBox.isType("hvcC")){ - meta.setInit(tNumber, initBox.payload(), initBox.payloadSize()); - } - initBox = vEntryBox.getPASP(); - if (initBox.isType("hvcC")){ - meta.setInit(tNumber, initBox.payload(), initBox.payloadSize()); - } - } - if (sType == "av01"){ - MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox; - meta.setType(tNumber, "video"); - meta.setCodec(tNumber, "AV1"); - if (!meta.getWidth(tNumber)){ - meta.setWidth(tNumber, vEntryBox.getWidth()); - meta.setHeight(tNumber, vEntryBox.getHeight()); - } - MP4::Box initBox = vEntryBox.getCLAP(); - if (initBox.isType("av1C")){ - meta.setInit(tNumber, initBox.payload(), initBox.payloadSize()); - } - initBox = vEntryBox.getPASP(); - if (initBox.isType("av1C")){ - meta.setInit(tNumber, initBox.payload(), initBox.payloadSize()); - } - } - if (sType == "mp4a" || sType == "aac " || sType == "ac-3"){ - MP4::AudioSampleEntry &aEntryBox = (MP4::AudioSampleEntry &)sEntryBox; - meta.setType(tNumber, "audio"); - meta.setChannels(tNumber, aEntryBox.getChannelCount()); - meta.setRate(tNumber, aEntryBox.getSampleRate()); - - if (sType == "ac-3"){ - meta.setCodec(tNumber, "AC3"); - }else{ - MP4::Box codingBox = aEntryBox.getCodecBox(); - if (codingBox.getType() == "esds"){ - MP4::ESDS & esdsBox = (MP4::ESDS &)codingBox; - meta.setCodec(tNumber, esdsBox.getCodec()); - meta.setInit(tNumber, esdsBox.getInitData()); - } - if (codingBox.getType() == "wave"){ - MP4::WAVE & waveBox = (MP4::WAVE &)codingBox; - for (size_t c = 0; c < waveBox.getContentCount(); ++c){ - MP4::Box content = waveBox.getContent(c); - if (content.getType() == "esds"){ - MP4::ESDS & esdsBox = (MP4::ESDS &)content; - meta.setCodec(tNumber, esdsBox.getCodec()); - meta.setInit(tNumber, esdsBox.getInitData()); - } - } - } - } - meta.setSize(tNumber, 16); ///\todo this might be nice to calculate from mp4 file; - } - if (sType == "tx3g"){// plain text subtitles - meta.setType(tNumber, "meta"); - meta.setCodec(tNumber, "subtitle"); - } - - MP4::STSS stssBox = stblBox.getChild(); - MP4::STTS sttsBox = stblBox.getChild(); - MP4::STSZ stszBox = stblBox.getChild(); - MP4::STCO stcoBox = stblBox.getChild(); - MP4::CO64 co64Box = stblBox.getChild(); - MP4::STSC stscBox = stblBox.getChild(); - MP4::CTTS cttsBox = stblBox.getChild(); // optional ctts box - - bool stco64 = co64Box.isType("co64"); - bool hasCTTS = cttsBox.isType("ctts"); - int64_t DTS_CTS_offset = 0; ///< Difference between composition time and decode time. Always positive. - if (hasCTTS){ - uint32_t cttsCount = cttsBox.getEntryCount(); - for (uint32_t i = 0; i < cttsCount; ++i){ - MP4::CTTSEntry e = cttsBox.getCTTSEntry(i); - int64_t o = (-e.sampleOffset * 1000) / (int64_t)timescale; - if (o > DTS_CTS_offset){DTS_CTS_offset = o;} - } - } - INFO_MSG("Calculated DTS/CTS offset for track %zu: %" PRId64, tNumber, DTS_CTS_offset); - - uint64_t totaldur = 0; ///\todo note: set this to begin time - uint64_t totalExtraDur = 0; - mp4PartBpos BsetPart; - - uint64_t entryNo = 0; - uint64_t sampleNo = 0; - - uint64_t stssIndex = 0; - uint64_t stcoIndex = 0; - uint64_t stscIndex = 0; - uint64_t cttsIndex = 0; // current ctts Index we are reading - uint64_t cttsEntryRead = 0; // current part of ctts we are reading - - uint64_t stssCount = stssBox.getEntryCount(); - uint64_t stscCount = stscBox.getEntryCount(); - uint64_t stszCount = stszBox.getSampleCount(); - uint64_t stcoCount = (stco64 ? co64Box.getEntryCount() : stcoBox.getEntryCount()); - - MP4::STTSEntry sttsEntry = sttsBox.getSTTSEntry(0); - - uint32_t fromSTCOinSTSC = 0; - uint64_t tmpOffset = (stco64 ? co64Box.getChunkOffset(0) : stcoBox.getChunkOffset(0)); - - uint64_t nextFirstChunk = (stscCount > 1 ? stscBox.getSTSCEntry(1).firstChunk - 1 : stcoCount); - - for (uint64_t stszIndex = 0; stszIndex < stszCount; ++stszIndex){ - if (stcoIndex >= nextFirstChunk){ - ++stscIndex; - nextFirstChunk = - (stscIndex + 1 < stscCount ? stscBox.getSTSCEntry(stscIndex + 1).firstChunk - 1 : stcoCount); - } - BsetPart.keyframe = (meta.getType(tNumber) == "video" && stssIndex < stssCount && - stszIndex + 1 == stssBox.getSampleNumber(stssIndex)); - if (BsetPart.keyframe){++stssIndex;} - // in bpos set - BsetPart.stcoNr = stcoIndex; - // bpos = chunkoffset[samplenr] in stco - BsetPart.bpos = tmpOffset; - ++fromSTCOinSTSC; - if (fromSTCOinSTSC < stscBox.getSTSCEntry(stscIndex).samplesPerChunk){// as long as we are still in this chunk - tmpOffset += stszBox.getEntrySize(stszIndex); - }else{ - ++stcoIndex; - fromSTCOinSTSC = 0; - tmpOffset = (stco64 ? co64Box.getChunkOffset(stcoIndex) : stcoBox.getChunkOffset(stcoIndex)); - } - BsetPart.time = (totaldur * 1000) / timescale; - totaldur += sttsEntry.sampleDelta; - - //Undo time shifts as much as possible - if (totalExtraDur){ - totaldur -= totalExtraDur; - totalExtraDur = 0; - } - - //Make sure our timestamps go up by at least 1 for every packet - if (BsetPart.time >= (uint64_t)((totaldur * 1000) / timescale)){ - uint32_t wantSamples = ((BsetPart.time+1) * timescale) / 1000; - totalExtraDur += wantSamples - totaldur; - totaldur = wantSamples; - } - - sampleNo++; - if (sampleNo >= sttsEntry.sampleCount){ - ++entryNo; - sampleNo = 0; - if (entryNo < sttsBox.getEntryCount()){sttsEntry = sttsBox.getSTTSEntry(entryNo);} - } - - if (hasCTTS){ - MP4::CTTSEntry cttsEntry = cttsBox.getCTTSEntry(cttsIndex); - cttsEntryRead++; - if (cttsEntryRead >= cttsEntry.sampleCount){ - ++cttsIndex; - cttsEntryRead = 0; - } - BsetPart.timeOffset = (cttsEntry.sampleOffset * 1000) / (int64_t)timescale; - }else{ - BsetPart.timeOffset = 0; - } - - if (sType == "tx3g"){ - if (stszBox.getEntrySize(stszIndex) <= 2 && false){ - FAIL_MSG("size <=2"); - }else{ - long long packSendSize = 0; - packSendSize = 24 + (BsetPart.timeOffset ? 17 : 0) + (BsetPart.bpos ? 15 : 0) + 19 + - stszBox.getEntrySize(stszIndex) + 11 - 2 + 19; - meta.update(BsetPart.time - DTS_CTS_offset + globalTimeOffset, BsetPart.timeOffset + DTS_CTS_offset, tNumber, - stszBox.getEntrySize(stszIndex) - 2, BsetPart.bpos+2, true, packSendSize); - } - }else{ - meta.update(BsetPart.time - DTS_CTS_offset + globalTimeOffset, BsetPart.timeOffset + DTS_CTS_offset, tNumber, - stszBox.getEntrySize(stszIndex), BsetPart.bpos, BsetPart.keyframe); - } - } - } - bps = 0; - std::set tracks = M.getValidTracks(); - for (std::set::iterator it = tracks.begin(); it != tracks.end(); it++){bps += M.getBps(*it);} + + bool sawParts = false; + bool parsedInitial = false; + for (std::deque::iterator it = trackHeaders.begin(); it != trackHeaders.end(); it++){ + if (!it->compatible()){ + INFO_MSG("Unsupported track: %s", it->trackType.c_str()); + continue; + } + tNumber = meta.addTrack(); + HIGH_MSG("Found track %zu of type %s -> %s", tNumber, it->sType.c_str(), it->codec.c_str()); + meta.setID(tNumber, it->trackId); + meta.setCodec(tNumber, it->codec); + meta.setInit(tNumber, it->initData); + meta.setLang(tNumber, it->lang); + if (it->trackType == "video"){ + meta.setType(tNumber, "video"); + meta.setWidth(tNumber, it->vidWidth); + meta.setHeight(tNumber, it->vidHeight); + } + if (it->trackType == "audio"){ + meta.setType(tNumber, "audio"); + meta.setChannels(tNumber, it->audChannels); + meta.setRate(tNumber, it->audRate); + meta.setSize(tNumber, it->audSize); + } + if (it->size()){sawParts = true;} + } + + // Might be an fMP4 file! Let's try finding some moof boxes... + bool sawMoof = false; + size_t moofPos = 0; + while ((readBuffer.size() >= 16 || inFile) && keepRunning()){ + //Read box header if needed + while (readBuffer.size() < 16 && inFile && keepRunning()){inFile.readSome(16, *this);} + //Failed? Abort - this is not fatal, unlike in the loop above (could just be EOF). + if (readBuffer.size() < 16){break;} + //Box type is always on bytes 5-8 from the start of a box + std::string boxType = std::string(readBuffer+4, 4); + uint64_t boxSize = MP4::calcBoxSize(readBuffer); + if (boxType == "moof"){ + while (readBuffer.size() < boxSize && inFile && keepRunning()){inFile.readSome(boxSize-readBuffer.size(), *this);} + if (readBuffer.size() < boxSize){ + WARN_MSG("Could not read entire MOOF box into memory at %" PRIu64 " bytes, aborting further parsing!", readPos); + break; + } + if (sawParts && !parsedInitial){ + for (std::deque::iterator it = trackHeaders.begin(); it != trackHeaders.end(); ++it){ + tNumber = M.trackIDToIndex(it->trackId); + for (uint64_t partNo = 0; partNo < it->size(); ++partNo){ + uint64_t prtBpos = 0, prtTime = 0; + uint32_t prtBlen = 0; + int32_t prtTimeOff = 0; + bool prtKey = false; + it->getPart(partNo, &prtBpos, &prtBlen, &prtTime, &prtTimeOff, &prtKey); + meta.update(prtTime, prtTimeOff, tNumber, prtBlen, moovPos, prtKey && it->trackType != "audio"); + sawParts = true; + } + bps += M.getBps(tNumber); + } + parsedInitial = true; + } + MP4::Box moofBox(readBuffer, false); + moofPos = readPos; + // Indicate that we're reading the next moof box to all track headers + for (std::deque::iterator t = trackHeaders.begin(); t != trackHeaders.end(); ++t){ + t->nextMoof(); + } + // Loop over traf boxes inside the moof box, but them in our header parser + std::deque trafs = ((MP4::MOOF*)&moofBox)->getChildren(); + for (std::deque::iterator t = trafs.begin(); t != trafs.end(); ++t){ + if (!(t->getChild())){ + WARN_MSG("Could not find thfd box inside traf box!"); + continue; + } + uint32_t trackId = t->getChild().getTrackID(); + headerData(trackId).read(*t); + } + + // Parse data from moof header into our header + for (std::deque::iterator it = trackHeaders.begin(); it != trackHeaders.end(); it++){ + if (!it->compatible()){continue;} + tNumber = M.trackIDToIndex(it->trackId); + for (uint64_t partNo = 0; partNo < it->size(); ++partNo){ + uint64_t prtBpos = 0, prtTime = 0; + uint32_t prtBlen = 0; + int32_t prtTimeOff = 0; + bool prtKey = false; + it->getPart(partNo, &prtBpos, &prtBlen, &prtTime, &prtTimeOff, &prtKey, moofPos); + // Skip any parts that are outside the file limits + if (inFile.getSize() != std::string::npos && prtBpos + prtBlen > inFile.getSize()){continue;} + // Note: we set the byte position to the position of the moof, so we can re-read it later with ease + meta.update(prtTime, prtTimeOff, tNumber, prtBlen, moofPos, prtKey && it->trackType != "audio"); + sawParts = true; + } + } + } + activityCounter = Util::bootSecs(); + //Skip to next box + if (readBuffer.size() > boxSize){ + readBuffer.shift(boxSize); + readPos += boxSize; + }else{ + readBuffer.truncate(0); + if (!inFile.seek(readPos + boxSize)){ + FAIL_MSG("Seek to %" PRIu64 " failed! Aborting load", readPos+boxSize); + } + readPos = inFile.getPos(); + } + } + if (!sawParts){ + WARN_MSG("Could not find any MOOF boxes with data, either! Considering load failed and aborting."); + return false; + } + // Mark file as fmp4 so we know to treat it as one + if (sawMoof){meta.inputLocalVars["fmp4"] = true;} + if (!parsedInitial){ + for (std::deque::iterator it = trackHeaders.begin(); it != trackHeaders.end(); ++it){ + tNumber = M.trackIDToIndex(it->trackId); + for (uint64_t partNo = 0; partNo < it->size(); ++partNo){ + uint64_t prtBpos = 0, prtTime = 0; + uint32_t prtBlen = 0; + int32_t prtTimeOff = 0; + bool prtKey = false; + it->getPart(partNo, &prtBpos, &prtBlen, &prtTime, &prtTimeOff, &prtKey); + meta.update(prtTime, prtTimeOff, tNumber, prtBlen, prtBpos, prtKey && it->trackType != "audio"); + sawParts = true; + } + bps += M.getBps(tNumber); + } + } + return true; } void InputMP4::getNext(size_t idx){// get next part from track in stream + thisPacket.null(); + if (curPositions.empty()){ - thisPacket.null(); - return; + // fMP4 file? Seek to the right header and read it in + if (nextBox){ + uint32_t trackId = M.getID(idx); + MP4::TrackHeader & thisHeader = headerData(trackId); + + std::string boxType; + while (boxType != "moof"){ + if (!shiftTo(nextBox, 12)){return;} + boxType = std::string(readBuffer+(nextBox - readPos)+4, 4); + if (boxType == "moof"){break;} + if (boxType != "mdat"){INFO_MSG("Skipping box: %s", boxType.c_str());} + uint64_t boxSize = MP4::calcBoxSize(readBuffer + (nextBox - readPos)); + nextBox += boxSize; + } + + uint64_t boxSize = MP4::calcBoxSize(readBuffer + (nextBox - readPos)); + if (!shiftTo(nextBox, boxSize)){return;} + uint64_t moofPos = nextBox; + + { + MP4::Box moofBox(readBuffer + (nextBox-readPos), false); + + thisHeader.nextMoof(); + // Loop over traf boxes inside the moof box, but them in our header parser + std::deque trafs = ((MP4::MOOF*)&moofBox)->getChildren(); + for (std::deque::iterator t = trafs.begin(); t != trafs.end(); ++t){ + if (!(t->getChild())){ + WARN_MSG("Could not find thfd box inside traf box!"); + continue; + } + if (t->getChild().getTrackID() == trackId){thisHeader.read(*t);} + } + } + + size_t headerDataSize = thisHeader.size(); + MP4::PartTime addPart; + addPart.trackID = idx; + for (size_t i = 0; i < headerDataSize; i++){ + thisHeader.getPart(i, &addPart.bpos, &addPart.size, &addPart.time, &addPart.offset, &addPart.keyframe, moofPos); + addPart.index = i; + curPositions.insert(addPart); + } + nextBox += boxSize; + } } + // pop uit set - mp4PartTime curPart = *curPositions.begin(); + MP4::PartTime curPart = *curPositions.begin(); curPositions.erase(curPositions.begin()); - bool isKeyframe = false; - DTSC::Keys keys(M.keys(curPart.trackID)); - DTSC::Parts parts(M.parts(curPart.trackID)); - uint32_t nextKeyNum = nextKeyframe[curPart.trackID]; - if (nextKeyNum < keys.getEndValid()){ - // checking if this is a keyframe - if (meta.getType(curPart.trackID) == "video" && curPart.time == keys.getTime(nextKeyNum)){ - isKeyframe = true; - } - // if a keyframe has passed, we find the next keyframe - if (keys.getTime(nextKeyNum) <= curPart.time){ - ++nextKeyframe[curPart.trackID]; - ++nextKeyNum; - } - } - if (curPart.bpos < readPos || curPart.bpos > readPos + readBuffer.size() + 512*1024 + bps){ - INFO_MSG("Buffer contains %" PRIu64 "-%" PRIu64 ", but we need %" PRIu64 "; seeking!", readPos, readPos + readBuffer.size(), curPart.bpos); - readBuffer.truncate(0); - if (!inFile.seek(curPart.bpos)){ - Util::logExitReason(ER_FORMAT_SPECIFIC, "seek unsuccessful @bpos %" PRIu64 ": %s", curPart.bpos, strerror(errno)); - thisPacket.null(); - return; - } - readPos = inFile.getPos(); - }else{ - //If we have more than 5MiB buffered and are more than 5MiB into the buffer, shift the first 4MiB off the buffer. - //This prevents infinite growth of the read buffer for large files - if (readBuffer.size() >= 5*1024*1024 && curPart.bpos > readPos + 5*1024*1024 + bps){ - readBuffer.shift(4*1024*1024); - readPos += 4*1024*1024; - } - } - - while (readPos+readBuffer.size() < curPart.bpos+curPart.size && inFile && keepRunning()){ - inFile.readSome((curPart.bpos+curPart.size) - (readPos+readBuffer.size()), *this); - } - if (readPos+readBuffer.size() < curPart.bpos+curPart.size){ - FAIL_MSG("Read unsuccessful at %" PRIu64 ", seeking to retry...", readPos+readBuffer.size()); - readBuffer.truncate(0); - if (!inFile.seek(curPart.bpos)){ - Util::logExitReason(ER_FORMAT_SPECIFIC, "seek unsuccessful @bpos %" PRIu64 ": %s", curPart.bpos, strerror(errno)); - thisPacket.null(); - return; - } - readPos = inFile.getPos(); - while (readPos+readBuffer.size() < curPart.bpos+curPart.size && inFile && keepRunning()){ - inFile.readSome((curPart.bpos+curPart.size) - (readPos+readBuffer.size()), *this); - } - if (readPos+readBuffer.size() < curPart.bpos+curPart.size){ - Util::logExitReason(ER_FORMAT_SPECIFIC, "Read retry unsuccessful at %" PRIu64 ", aborting", readPos+readBuffer.size()); - thisPacket.null(); - return; - } - } + if (!shiftTo(curPart.bpos, curPart.size)){return;} if (M.getCodec(curPart.trackID) == "subtitle"){ static JSON::Value thisPack; - thisPack.null(); thisPack["trackid"] = (uint64_t)curPart.trackID; thisPack["bpos"] = curPart.bpos; //(long long)fileSource.tellg(); thisPack["data"] = std::string(readBuffer + (curPart.bpos-readPos), curPart.size); @@ -615,61 +370,129 @@ namespace Mist{ std::string tmpStr = thisPack.toNetPacked(); thisPacket.reInit(tmpStr.data(), tmpStr.size()); }else{ + bool isKeyframe = (curPart.keyframe && meta.getType(curPart.trackID) == "video"); thisPacket.genericFill(curPart.time, curPart.offset, curPart.trackID, readBuffer + (curPart.bpos-readPos), curPart.size, 0, isKeyframe); } thisTime = curPart.time; thisIdx = curPart.trackID; - // get the next part for this track - curPart.index++; - if (curPart.index < headerData(M.getID(curPart.trackID)).size()){ - headerData(M.getID(curPart.trackID)).getPart(curPart.index, curPart.bpos); - if (M.getCodec(curPart.trackID) == "subtitle"){curPart.bpos += 2;} - curPart.size = parts.getSize(curPart.index); - curPart.offset = parts.getOffset(curPart.index); - curPart.time = M.getPartTime(curPart.index, thisIdx); - curPart.duration = parts.getDuration(curPart.index); - curPositions.insert(curPart); + if (!nextBox){ + // get the next part for this track + curPart.index++; + if (curPart.index < headerData(M.getID(curPart.trackID)).size()){ + headerData(M.getID(curPart.trackID)).getPart(curPart.index, &curPart.bpos, &curPart.size, &curPart.time, &curPart.offset, &curPart.keyframe); + curPositions.insert(curPart); + } } } void InputMP4::seek(uint64_t seekTime, size_t idx){// seek to a point - nextKeyframe.clear(); curPositions.clear(); - if (idx != INVALID_TRACK_ID){ - handleSeek(seekTime, idx); - }else{ - std::set tracks = M.getValidTracks(); - for (std::set::iterator it = tracks.begin(); it != tracks.end(); it++){ - handleSeek(seekTime, *it); + if (idx == INVALID_TRACK_ID){ + FAIL_MSG("Seeking more than 1 track at a time in MP4 input is unsupported"); + return; + } + + MP4::PartTime addPart; + addPart.trackID = idx; + size_t trackId = M.getID(idx); + MP4::TrackHeader &thisHeader = headerData(M.getID(idx)); + uint64_t moofPos = 0; + + // fMP4 file? Seek to the right header and read it in + if (M.inputLocalVars.isMember("fmp4")){ + uint32_t keyIdx = M.getKeyIndexForTime(idx, seekTime); + size_t bPos = M.getKeys(idx).getBpos(keyIdx); + if (bPos == moovPos){ + thisHeader.revertToMoov(); + if (!shiftTo(bPos, 12)){return;} + uint64_t boxSize = MP4::calcBoxSize(readBuffer + (bPos - readPos)); + nextBox = bPos + boxSize; + }else{ + if (!shiftTo(bPos, 12)){return;} + std::string boxType = std::string(readBuffer+(bPos - readPos)+4, 4); + if (boxType != "moof"){ + FAIL_MSG("Read %s box instead of moof box at %zub!", boxType.c_str(), bPos); + Util::logExitReason(ER_FORMAT_SPECIFIC, "Did not find moof box at expected position"); + return; + } + uint64_t boxSize = MP4::calcBoxSize(readBuffer + (bPos - readPos)); + if (!shiftTo(bPos, boxSize)){return;} + + MP4::Box moofBox(readBuffer + (bPos-readPos), false); + moofPos = bPos; + + thisHeader.nextMoof(); + // Loop over traf boxes inside the moof box, put them in our header parser + std::deque trafs = ((MP4::MOOF*)&moofBox)->getChildren(); + for (std::deque::iterator t = trafs.begin(); t != trafs.end(); ++t){ + if (!(t->getChild())){ + WARN_MSG("Could not find thfd box inside traf box!"); + continue; + } + if (t->getChild().getTrackID() == trackId){thisHeader.read(*t);} + } + nextBox = bPos + boxSize; } } - } - void InputMP4::handleSeek(uint64_t seekTime, size_t idx){ - nextKeyframe[idx] = 0; - mp4PartTime addPart; - addPart.trackID = idx; - // for all stsz samples in those tracks - mp4TrackHeader &thisHeader = headerData(M.getID(idx)); size_t headerDataSize = thisHeader.size(); - DTSC::Keys keys(M.keys(idx)); - DTSC::Parts parts(M.parts(idx)); for (size_t i = 0; i < headerDataSize; i++){ + thisHeader.getPart(i, &addPart.bpos, &addPart.size, &addPart.time, &addPart.offset, &addPart.keyframe, moofPos); - thisHeader.getPart(i, addPart.bpos); - if (M.getCodec(idx) == "subtitle"){addPart.bpos += 2;} - addPart.size = parts.getSize(i); - addPart.offset = parts.getOffset(i); - addPart.time = M.getPartTime(i, idx); - addPart.duration = parts.getDuration(i); + // Skip any parts that are outside the file limits + if (inFile.getSize() != std::string::npos && addPart.bpos + addPart.size > inFile.getSize()){continue;} - if (keys.getTime(nextKeyframe[idx]) < addPart.time){nextKeyframe[idx]++;} if (addPart.time >= seekTime){ addPart.index = i; curPositions.insert(addPart); - break; + if (!nextBox){break;} } } } + + /// Shifts the read buffer (if needed) so that bytes pos through pos+len are currently buffered. + /// Returns true on success, false on failure. + bool InputMP4::shiftTo(size_t pos, size_t len){ + if (pos < readPos || pos > readPos + readBuffer.size() + 512*1024 + bps){ + INFO_MSG("Buffer contains %" PRIu64 "-%" PRIu64 ", but we need %" PRIu64 "; seeking!", readPos, readPos + readBuffer.size(), pos); + readBuffer.truncate(0); + if (!inFile.seek(pos)){ + return false; + } + readPos = inFile.getPos(); + }else{ + //If we have more than 5MiB buffered and are more than 5MiB into the buffer, shift the first 4MiB off the buffer. + //This prevents infinite growth of the read buffer for large files + if (readBuffer.size() >= 5*1024*1024 && pos > readPos + 5*1024*1024 + bps){ + readBuffer.shift(4*1024*1024); + readPos += 4*1024*1024; + } + } + + while (readPos+readBuffer.size() < pos+len && inFile && keepRunning()){ + inFile.readSome((pos+len) - (readPos+readBuffer.size()), *this); + } + if (readPos+readBuffer.size() < pos+len){ + if (inFile.getSize() != std::string::npos || inFile.getSize() > readPos+readBuffer.size()){ + FAIL_MSG("Read unsuccessful at %" PRIu64 ", seeking to retry...", readPos+readBuffer.size()); + readBuffer.truncate(0); + if (!inFile.seek(pos)){ + return false; + } + readPos = inFile.getPos(); + while (readPos+readBuffer.size() < pos+len && inFile && keepRunning()){ + inFile.readSome((pos+len) - (readPos+readBuffer.size()), *this); + } + if (readPos+readBuffer.size() < pos+len){ + return false; + } + }else{ + WARN_MSG("Attempt to read past end of file!"); + return false; + } + } + return true; + } + }// namespace Mist diff --git a/src/input/input_mp4.h b/src/input/input_mp4.h index a46d3cc4..be5ada1c 100644 --- a/src/input/input_mp4.h +++ b/src/input/input_mp4.h @@ -2,71 +2,9 @@ #include #include #include +#include #include namespace Mist{ - class mp4PartTime{ - public: - mp4PartTime() : time(0), duration(0), offset(0), trackID(0), bpos(0), size(0), index(0){} - bool operator<(const mp4PartTime &rhs) const{ - if (time < rhs.time){return true;} - if (time > rhs.time){return false;} - if (trackID < rhs.trackID){return true;} - return (trackID == rhs.trackID && bpos < rhs.bpos); - } - uint64_t time; - uint64_t duration; - int32_t offset; - size_t trackID; - uint64_t bpos; - uint32_t size; - uint64_t index; - }; - - struct mp4PartBpos{ - bool operator<(const mp4PartBpos &rhs) const{ - if (time < rhs.time){return true;} - if (time > rhs.time){return false;} - if (trackID < rhs.trackID){return true;} - return (trackID == rhs.trackID && bpos < rhs.bpos); - } - uint64_t time; - size_t trackID; - uint64_t bpos; - uint64_t size; - uint64_t stcoNr; - int32_t timeOffset; - bool keyframe; - }; - - class mp4TrackHeader{ - public: - mp4TrackHeader(); - size_t trackId; - void read(MP4::TRAK &trakBox); - MP4::STCO stcoBox; - MP4::CO64 co64Box; - MP4::STSZ stszBox; - MP4::STSC stscBox; - uint64_t timeScale; - void getPart(uint64_t index, uint64_t &offset); - uint64_t size(); - - private: - bool initialised; - // next variables are needed for the stsc/stco loop - uint64_t stscStart; - uint64_t sampleIndex; - // next variables are needed for the stts loop - uint64_t deltaIndex; ///< Index in STTS box - uint64_t deltaPos; ///< Sample counter for STTS box - uint64_t deltaTotal; ///< Total timestamp for STTS box - // for CTTS box loop - uint64_t offsetIndex; ///< Index in CTTS box - uint64_t offsetPos; ///< Sample counter for CTTS box - - bool stco64; - }; - class InputMP4 : public Input, public Util::DataCallback { public: InputMP4(Util::Config *cfg); @@ -81,20 +19,19 @@ namespace Mist{ bool needHeader(); void getNext(size_t idx = INVALID_TRACK_ID); void seek(uint64_t seekTime, size_t idx = INVALID_TRACK_ID); - void handleSeek(uint64_t seekTime, size_t idx); + bool shiftTo(size_t pos, size_t len); HTTP::URIReader inFile; Util::ResizeablePointer readBuffer; uint64_t readPos; + uint64_t moovPos; uint64_t bps; + uint64_t nextBox; - mp4TrackHeader &headerData(size_t trackID); + MP4::TrackHeader &headerData(size_t trackID); - std::deque trackHeaders; - std::set curPositions; - - // remember last seeked keyframe; - std::map nextKeyframe; + std::deque trackHeaders; + std::set curPositions; }; }// namespace Mist