Spun out MP4 parser into new mp4_stream.cpp; updated MP4 input to use it; added support for fMP4 to this new MP4 Stream library

2023-11-30 18:28:57 +01:00 · 2023-11-30 18:28:57 +01:00 · 1dd27f215a
commit 1dd27f215a
parent 6a3ae16b2d
10 changed files with 1105 additions and 770 deletions
--- a/lib/meson.build
+++ b/lib/meson.build
@ -27,6 +27,7 @@ headers = [
  'mp4_generic.h',
  'mp4.h',
  'mp4_ms.h',
+  'mp4_stream.h',
  'mpeg.h',
  'nal.h',
  'ogg.h',
@ -96,6 +97,7 @@ libmist = library('mist',
  'mp4_encryption.cpp',
  'mp4_generic.cpp',
  'mp4_ms.cpp',
+  'mp4_stream.cpp',
  'mpeg.cpp',
  'nal.cpp',
  'ogg.cpp',
--- a/lib/mp4.cpp
+++ b/lib/mp4.cpp
@ -619,7 +619,7 @@ namespace MP4{

  void containerBox::setContent(Box &newContent, uint32_t no){
    int tempLoc = 0;
-    unsigned int contentCount = getContentCount();
+    uint32_t contentCount = getContentCount();
    for (unsigned int i = 0; i < no; i++){
      if (i < contentCount){
        tempLoc += getBoxLen(tempLoc);
@ -646,20 +646,24 @@ namespace MP4{
  }

  Box containerBox::getChild(const char *boxName){
-    uint32_t count = getContentCount();
-    for (uint32_t i = 0; i < count; i++){
-      Box &thisChild = getContent(i);
-      if (thisChild.isType(boxName)){return Box(thisChild.asBox(), false);}
+    size_t maxLoc = boxedSize() - 8;
+    size_t tempLoc = payloadOffset;
+    while (tempLoc < maxLoc){
+      Box thisChild(data+tempLoc, false);
+      if (thisChild.isType(boxName)){return thisChild;}
+      tempLoc += calcBoxSize(data+tempLoc);
    }
    return Box((char *)"\000\000\000\010erro", false);
  }

  std::deque<Box> containerBox::getChildren(const char *boxName){
    std::deque<Box> res;
-    uint32_t count = getContentCount();
-    for (uint32_t i = 0; i < count; i++){
-      Box &thisChild = getContent(i);
-      if (thisChild.isType(boxName)){res.push_back(Box(thisChild.asBox(), false));}
+    size_t maxLoc = boxedSize() - 8;
+    size_t tempLoc = payloadOffset;
+    while (tempLoc < maxLoc){
+      Box thisChild(data+tempLoc, false);
+      if (thisChild.isType(boxName)){res.push_back(thisChild);}
+      tempLoc += calcBoxSize(data+tempLoc);
    }
    return res;
  }
@ -707,8 +711,8 @@ namespace MP4{
  Box &containerFullBox::getContent(uint32_t no){
    static Box ret = Box((char *)"\000\000\000\010erro", false);
    if (no > getContentCount()){return ret;}
-    unsigned int i = 0;
-    int tempLoc = 4;
+    uint32_t i = 0;
+    size_t tempLoc = 4;
    while (i < no){
      tempLoc += getBoxLen(tempLoc);
      i++;
--- a/lib/mp4_generic.cpp
+++ b/lib/mp4_generic.cpp
@ -61,13 +61,13 @@ namespace MP4{

  void TRUN::setFlags(uint32_t newFlags){setInt24(newFlags, 1);}

-  uint32_t TRUN::getFlags(){return getInt24(1);}
+  uint32_t TRUN::getFlags() const {return getInt24(1);}

  void TRUN::setDataOffset(uint32_t newOffset){
    if (getFlags() & trundataOffset){setInt32(newOffset, 8);}
  }

-  uint32_t TRUN::getDataOffset(){
+  uint32_t TRUN::getDataOffset() const {
    if (getFlags() & trundataOffset){
      return getInt32(8);
    }else{
@ -84,7 +84,7 @@ namespace MP4{
    }
  }

-  uint32_t TRUN::getFirstSampleFlags(){
+  uint32_t TRUN::getFirstSampleFlags() const {
    if (!(getFlags() & trunfirstSampleFlags)){return 0;}
    if (getFlags() & trundataOffset){
      return getInt32(12);
@ -93,7 +93,7 @@ namespace MP4{
    }
  }

-  uint32_t TRUN::getSampleInformationCount(){return getInt32(4);}
+  uint32_t TRUN::getSampleInformationCount() const {return getInt32(4);}

  void TRUN::setSampleInformation(trunSampleInformation newSample, uint32_t no){
    uint32_t flags = getFlags();
@ -125,7 +125,7 @@ namespace MP4{
    if (getSampleInformationCount() < no + 1){setInt32(no + 1, 4);}
  }

-  trunSampleInformation TRUN::getSampleInformation(uint32_t no){
+  trunSampleInformation TRUN::getSampleInformation(uint32_t no, TFHD * tfhd) const{
    trunSampleInformation ret;
    ret.sampleDuration = 0;
    ret.sampleSize = 0;
@ -140,19 +140,30 @@ namespace MP4{
    if (flags & trunsampleOffsets){sampInfoSize += 4;}
    uint32_t offset = 8;
    if (flags & trundataOffset){offset += 4;}
-    if (flags & trunfirstSampleFlags){offset += 4;}
+    if (flags & trunfirstSampleFlags){
+      if (!no){ret.sampleFlags = getFirstSampleFlags();}
+      offset += 4;
+    }
    uint32_t innerOffset = 0;
    if (flags & trunsampleDuration){
      ret.sampleDuration = getInt32(offset + no * sampInfoSize + innerOffset);
      innerOffset += 4;
+    }else if (tfhd){
+      ret.sampleDuration = tfhd->getDefaultSampleDuration();
    }
    if (flags & trunsampleSize){
      ret.sampleSize = getInt32(offset + no * sampInfoSize + innerOffset);
      innerOffset += 4;
+    }else if (tfhd){
+      ret.sampleSize = tfhd->getDefaultSampleSize();
    }
    if (flags & trunsampleFlags){
      ret.sampleFlags = getInt32(offset + no * sampInfoSize + innerOffset);
      innerOffset += 4;
+    }else if ((flags & trunfirstSampleFlags) && !no){
+      ret.sampleFlags = getFirstSampleFlags();
+    }else if (tfhd){
+      ret.sampleFlags = tfhd->getDefaultSampleFlags();
    }
    if (flags & trunsampleOffsets){
      ret.sampleOffset = getInt32(offset + no * sampInfoSize + innerOffset);
@ -161,7 +172,7 @@ namespace MP4{
    return ret;
  }

-  std::string TRUN::toPrettyString(uint32_t indent){
+  std::string TRUN::toPrettyString(uint32_t indent) const {
    std::stringstream r;
    r << std::string(indent, ' ') << "[trun] Track Fragment Run (" << boxedSize() << ")" << std::endl;
    r << std::string(indent + 1, ' ') << "Version " << (int)getInt8(0) << std::endl;
@ -201,17 +212,17 @@ namespace MP4{

  std::string prettySampleFlags(uint32_t flag){
    std::stringstream r;
+    if (flag & noKeySample){
+      r << "noKeySample";
+    }else{
+      r << "isKeySample";
+    }
    if (flag & noIPicture){r << " noIPicture";}
    if (flag & isIPicture){r << " isIPicture";}
    if (flag & noDisposable){r << " noDisposable";}
    if (flag & isDisposable){r << " isDisposable";}
    if (flag & isRedundant){r << " isRedundant";}
    if (flag & noRedundant){r << " noRedundant";}
-    if (flag & noKeySample){
-      r << " noKeySample";
-    }else{
-      r << " isKeySample";
-    }
    return r.str();
  }

@ -2610,11 +2621,11 @@ namespace MP4{

  void STSZ::setSampleSize(uint32_t newSampleSize){setInt32(newSampleSize, 4);}

-  uint32_t STSZ::getSampleSize(){return getInt32(4);}
+  uint32_t STSZ::getSampleSize() const {return getInt32(4);}

  void STSZ::setSampleCount(uint32_t newSampleCount){setInt32(newSampleCount, 8);}

-  uint32_t STSZ::getSampleCount(){return getInt32(8);}
+  uint32_t STSZ::getSampleCount() const {return getInt32(8);}

  void STSZ::setEntrySize(uint32_t newEntrySize, uint32_t no){
    if (no + 1 > getSampleCount()){
@ -2626,7 +2637,7 @@ namespace MP4{
    setInt32(newEntrySize, 12 + no * 4);
  }

-  uint32_t STSZ::getEntrySize(uint32_t no){
+  uint32_t STSZ::getEntrySize(uint32_t no) const {
    if (no >= getSampleCount()){return 0;}
    long unsigned int retVal = getInt32(12 + no * 4);
    if (retVal == 0){
--- a/lib/mp4_generic.h
+++ b/lib/mp4_generic.h
@ -39,6 +39,36 @@ namespace MP4{
      std::string toPrettyString(uint32_t indent = 0);
  };

+  enum tfhdflags{
+    tfhdBaseOffset = 0x000001,
+    tfhdSampleDesc = 0x000002,
+    tfhdSampleDura = 0x000008,
+    tfhdSampleSize = 0x000010,
+    tfhdSampleFlag = 0x000020,
+    tfhdNoDuration = 0x010000,
+    tfhdBaseIsMoof = 0x020000,
+  };
+  class TFHD : public Box{
+  public:
+    TFHD();
+    void setFlags(uint32_t newFlags);
+    uint32_t getFlags();
+    void setTrackID(uint32_t newID);
+    uint32_t getTrackID();
+    void setBaseDataOffset(uint64_t newOffset);
+    uint64_t getBaseDataOffset();
+    void setSampleDescriptionIndex(uint32_t newIndex);
+    uint32_t getSampleDescriptionIndex();
+    void setDefaultSampleDuration(uint32_t newDuration);
+    uint32_t getDefaultSampleDuration();
+    void setDefaultSampleSize(uint32_t newSize);
+    uint32_t getDefaultSampleSize();
+    void setDefaultSampleFlags(uint32_t newFlags);
+    uint32_t getDefaultSampleFlags();
+    bool getDefaultBaseIsMoof();
+    std::string toPrettyString(uint32_t indent = 0);
+  };
+
  struct trunSampleInformation {
    uint32_t sampleDuration;
    uint32_t sampleSize;
@ -69,45 +99,15 @@ namespace MP4{
  public:
    TRUN();
    void setFlags(uint32_t newFlags);
-    uint32_t getFlags();
+    uint32_t getFlags() const;
    void setDataOffset(uint32_t newOffset);
-    uint32_t getDataOffset();
+    uint32_t getDataOffset() const;
    void setFirstSampleFlags(uint32_t newSampleFlags);
-    uint32_t getFirstSampleFlags();
-    uint32_t getSampleInformationCount();
+    uint32_t getFirstSampleFlags() const;
+    uint32_t getSampleInformationCount() const;
    void setSampleInformation(trunSampleInformation newSample, uint32_t no);
-    trunSampleInformation getSampleInformation(uint32_t no);
-    std::string toPrettyString(uint32_t indent = 0);
-  };
-
-  enum tfhdflags{
-    tfhdBaseOffset = 0x000001,
-    tfhdSampleDesc = 0x000002,
-    tfhdSampleDura = 0x000008,
-    tfhdSampleSize = 0x000010,
-    tfhdSampleFlag = 0x000020,
-    tfhdNoDuration = 0x010000,
-    tfhdBaseIsMoof = 0x020000,
-  };
-  class TFHD : public Box{
-  public:
-    TFHD();
-    void setFlags(uint32_t newFlags);
-    uint32_t getFlags();
-    void setTrackID(uint32_t newID);
-    uint32_t getTrackID();
-    void setBaseDataOffset(uint64_t newOffset);
-    uint64_t getBaseDataOffset();
-    void setSampleDescriptionIndex(uint32_t newIndex);
-    uint32_t getSampleDescriptionIndex();
-    void setDefaultSampleDuration(uint32_t newDuration);
-    uint32_t getDefaultSampleDuration();
-    void setDefaultSampleSize(uint32_t newSize);
-    uint32_t getDefaultSampleSize();
-    void setDefaultSampleFlags(uint32_t newFlags);
-    uint32_t getDefaultSampleFlags();
-    bool getDefaultBaseIsMoof();
-    std::string toPrettyString(uint32_t indent = 0);
+    trunSampleInformation getSampleInformation(uint32_t no, TFHD * tfhd = 0) const;
+    std::string toPrettyString(uint32_t indent = 0) const;
  };

  class AVCC : public Box{
@ -641,11 +641,11 @@ namespace MP4{
  public:
    STSZ(char v = 1, uint32_t f = 0);
    void setSampleSize(uint32_t newSampleSize);
-    uint32_t getSampleSize();
+    uint32_t getSampleSize() const;
    void setSampleCount(uint32_t newSampleCount);
-    uint32_t getSampleCount();
+    uint32_t getSampleCount() const;
    void setEntrySize(uint32_t newEntrySize, uint32_t no);
-    uint32_t getEntrySize(uint32_t no);
+    uint32_t getEntrySize(uint32_t no) const;
    std::string toPrettyString(uint32_t indent = 0);
  };

--- a/lib/mp4_stream.cpp
+++ b/lib/mp4_stream.cpp
@ -0,0 +1,501 @@
+#include "mp4_stream.h"
+#include "h264.h"
+#include "mp4_dash.h"
+
+
+namespace MP4{
+
+  Stream::Stream(){
+  }
+
+  Stream::~Stream(){
+  }
+
+  void Stream::open(Util::ResizeablePointer & ptr){
+    
+  }
+
+  bool Stream::hasPacket(size_t tid) const{
+    return false;
+  }
+
+  bool Stream::hasPacket() const{
+    return !curPositions.empty();
+  }
+
+  void Stream::getPacket(size_t tid, DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx){
+  }
+
+  uint32_t Stream::getEarliestPID(){
+    return INVALID_TRACK_ID;
+  }
+
+  void Stream::getEarliestPacket(DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx){
+    if (curPositions.empty()){
+      pack.null();
+      return;
+    }
+    // pop uit set
+    MP4::PartTime curPart = *curPositions.begin();
+    curPositions.erase(curPositions.begin());
+
+    thisTime = curPart.time;
+    thisIdx = curPart.trackID;
+    pack.genericFill(curPart.time, curPart.offset, curPart.trackID, 0/*readBuffer + (curPart.bpos-readPos)*/, curPart.size, 0, curPart.keyframe);
+
+    // get the next part for this track
+    curPart.index++;
+    if (curPart.index < trkHdrs[curPart.trackID].size()){
+      trkHdrs[curPart.trackID].getPart(curPart.index, &curPart.bpos, &curPart.size, &curPart.time, &curPart.offset, &curPart.keyframe);
+      curPositions.insert(curPart);
+    }
+  }
+
+  void Stream::initializeMetadata(DTSC::Meta &meta, size_t tid, size_t mappingId){
+  }
+
+  TrackHeader::TrackHeader(){
+    timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0;
+    bposIndex = bposSample = 0;
+    offsetIndex = offsetSample = 0;
+    keyIndex = keySample = 0;
+    hasOffsets = false;
+    hasKeys = false;
+    isVideo = false;
+    sttsBox.clear();
+    cttsBox.clear();
+    stszBox.clear();
+    stcoBox.clear();
+    co64Box.clear();
+    stscBox.clear();
+    stssBox.clear();
+    stco64 = false;
+    trafMode = false;
+    trackId = 0;
+  }
+
+  void TrackHeader::nextMoof(){
+    timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0;
+    bposIndex = bposSample = 0;
+    offsetIndex = offsetSample = 0;
+
+    trafMode = true;
+    trafs.clear();
+  }
+
+  /// Switch back to non-moof reading mode, disabling TRAF mode and wiping all TRAF boxes
+  void TrackHeader::revertToMoov(){
+    timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0;
+    bposIndex = bposSample = 0;
+    offsetIndex = offsetSample = 0;
+    keyIndex = keySample = 0;
+
+    trafMode = false;
+    trafs.clear();
+  }
+
+  void TrackHeader::read(TRAK &trakBox){
+    vidWidth = vidHeight = audChannels = audRate = audSize = 0;
+    codec.clear();
+
+    MDIA mdiaBox = trakBox.getChild<MDIA>();
+    timeScale = mdiaBox.getChild<MDHD>().getTimeScale();
+    lang = mdiaBox.getChild<MP4::MDHD>().getLanguage();
+
+    TKHD tkhd = trakBox.getChild<TKHD>();
+    trackId = tkhd.getTrackID();
+    if (tkhd.getWidth()){
+      vidWidth = tkhd.getWidth();
+      vidHeight = tkhd.getHeight();
+    }
+
+    STBL stblBox = mdiaBox.getChild<MINF>().getChild<STBL>();
+
+    sttsBox.copyFrom(stblBox.getChild<STTS>());
+
+    cttsBox.copyFrom(stblBox.getChild<CTTS>());
+    hasOffsets = cttsBox.isType("ctts");
+
+    stszBox.copyFrom(stblBox.getChild<STSZ>());
+
+    stcoBox.copyFrom(stblBox.getChild<STCO>());
+    co64Box.copyFrom(stblBox.getChild<CO64>());
+    stco64 = co64Box.isType("co64");
+
+    stscBox.copyFrom(stblBox.getChild<STSC>());
+
+    stssBox.copyFrom(stblBox.getChild<STSS>());
+    hasKeys = stssBox.isType("stss");
+
+    Box sEntryBox = stblBox.getChild<MP4::STSD>().getEntry(0);
+    sType = sEntryBox.getType();
+
+    std::string handler = mdiaBox.getChild<MP4::HDLR>().getHandlerType();
+    isVideo = false;
+    if (handler == "vide"){
+      isVideo = true;
+      trackType = "video";
+    }else if (handler == "soun"){
+      trackType = "audio";
+    }else if (handler == "sbtl"){
+      trackType = "meta";
+    }else{
+      INFO_MSG("Unsupported handler: %s", handler.c_str());
+    }
+
+    isCompatible = false;
+
+    if (sType == "avc1" || sType == "h264" || sType == "mp4v"){
+      codec = "H264";
+      isCompatible = true;
+      VisualSampleEntry &vEntryBox = (VisualSampleEntry &)sEntryBox;
+      if (!vidWidth){
+        vidWidth = vEntryBox.getWidth();
+        vidHeight = vEntryBox.getHeight();
+      }
+      MP4::Box initBox = vEntryBox.getCLAP();
+      if (initBox.isType("avcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
+      initBox = vEntryBox.getPASP();
+      if (initBox.isType("avcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
+      // Read metadata from init data if not set
+      if (!vidWidth){
+        h264::sequenceParameterSet sps;
+        sps.fromDTSCInit(initData);
+        h264::SPSMeta spsChar = sps.getCharacteristics();
+        vidWidth = spsChar.width;
+        vidHeight = spsChar.height;
+      }
+    }
+    if (sType == "hev1" || sType == "hvc1"){
+      codec = "HEVC";
+      isCompatible = true;
+      MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox;
+      if (!vidWidth){
+        vidWidth = vEntryBox.getWidth();
+        vidHeight = vEntryBox.getHeight();
+      }
+      MP4::Box initBox = vEntryBox.getCLAP();
+      if (initBox.isType("hvcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
+      initBox = vEntryBox.getPASP();
+      if (initBox.isType("hvcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
+    }
+    if (sType == "av01"){
+      codec = "AV1";
+      isCompatible = true;
+      MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox;
+      if (!vidWidth){
+        vidWidth = vEntryBox.getWidth();
+        vidHeight = vEntryBox.getHeight();
+      }
+      MP4::Box initBox = vEntryBox.getCLAP();
+      if (initBox.isType("av1C")){initData.assign(initBox.payload(), initBox.payloadSize());}
+      initBox = vEntryBox.getPASP();
+      if (initBox.isType("av1C")){initData.assign(initBox.payload(), initBox.payloadSize());}
+    }
+    if (sType == "mp4a" || sType == "aac " || sType == "ac-3"){
+      MP4::AudioSampleEntry &aEntryBox = (MP4::AudioSampleEntry &)sEntryBox;
+      audRate = aEntryBox.getSampleRate();
+      audChannels = aEntryBox.getChannelCount();
+      audSize = 16; /// \TODO Actually get this from somewhere, probably..?
+
+      if (sType == "ac-3"){
+        codec = "AC3";
+        isCompatible = true;
+      }else{
+        MP4::Box codingBox = aEntryBox.getCodecBox();
+        if (codingBox.getType() == "esds"){
+          MP4::ESDS & esdsBox = (MP4::ESDS &)codingBox;
+          codec = esdsBox.getCodec();
+          isCompatible = true;
+          initData = esdsBox.getInitData();
+        }
+        if (codingBox.getType() == "wave"){
+          MP4::WAVE & waveBox = (MP4::WAVE &)codingBox;
+          for (size_t c = 0; c < waveBox.getContentCount(); ++c){
+            MP4::Box content = waveBox.getContent(c);
+            if (content.getType() == "esds"){
+              MP4::ESDS & esdsBox = (MP4::ESDS &)content;
+              codec = esdsBox.getCodec();
+              isCompatible = true;
+              initData = esdsBox.getInitData();
+            }
+          }
+        }
+      }
+    }
+    if (sType == "tx3g"){// plain text subtitles
+      codec = "subtitle";
+      isCompatible = true;
+    }
+  }
+
+  void TrackHeader::read(TRAF &trafBox){
+    if (!trafMode){
+      // Warn anyone that forgot to call nextMoof(), hopefully preventing future issues
+      WARN_MSG("Reading TRAF box header without signalling start of next MOOF box first!");
+    }
+    TRAF tBox;
+    trafs.push_back(tBox);
+    trafs.rbegin()->copyFrom(trafBox);
+  }
+
+  void TrackHeader::increaseTime(uint32_t delta){
+    // Calculate millisecond-time for current timestamp
+    uint64_t timePrev = (timeTotal * 1000) / timeScale;
+    timeTotal += delta;
+    
+    //Undo time shifts as much as possible
+    if (timeExtra){
+      timeTotal -= timeExtra;
+      timeExtra = 0;
+    }
+
+    //Make sure our timestamps go up by at least 1ms for every packet
+    if (timePrev >= (uint64_t)((timeTotal * 1000) / timeScale)){
+      uint32_t wantSamples = ((timePrev+1) * timeScale) / 1000;
+      timeExtra += wantSamples - timeTotal;
+      timeTotal = wantSamples;
+    }
+    ++timeSample;
+  }
+
+
+  uint64_t TrackHeader::size() const {
+    if (!trafMode){
+      return (stszBox ? stszBox.getSampleCount() : 0);
+    }
+    if (!trafs.size()){return 0;}
+    uint64_t parts = 0;
+    for (std::deque<TRAF>::const_iterator t = trafs.begin(); t != trafs.end(); ++t){
+      std::deque<TRUN> runs = ((TRAF)(*t)).getChildren<TRUN>();
+      for (std::deque<TRUN>::const_iterator r = runs.begin(); r != runs.end(); ++r){
+        parts += r->getSampleInformationCount();
+      }
+    }
+    return parts;
+  }
+
+  /// Retrieves the information associated with a specific part (=frame).
+  /// The index is the zero-based part number, all other arguments are optional and if non-zero will be filled.
+  void TrackHeader::getPart(uint64_t index, uint64_t * byteOffset, uint32_t * byteLen, uint64_t * time, int32_t * timeOffset, bool * keyFrame, uint64_t moofPos){
+    // Switch between reading TRAF boxes or global headers
+    if (!trafMode){
+      // Reading global headers
+
+      // Calculate time, if requested
+      if (time){
+        // If we went backwards, reset our current position
+        if (index < timeSample){
+          timeIndex = timeFirstSample = timeSample = timeExtra = timeTotal = 0;
+        }
+        // Find the packet count per chunk entry for this sample
+        uint64_t eCnt = sttsBox.getEntryCount();
+        STTSEntry entry;
+        while (timeIndex < eCnt){
+          entry = sttsBox.getSTTSEntry(timeIndex);
+          // check where the next index starts
+          uint64_t nextSampleIndex = timeFirstSample + entry.sampleCount;
+          // If the next chunk starts with a higher sample than we want, we can stop here
+          if (nextSampleIndex > index){break;}
+          timeFirstSample = nextSampleIndex;
+          // Increase timestamp by delta for each sample with the same delta
+          while (timeSample < nextSampleIndex){increaseTime(entry.sampleDelta);}
+          ++timeIndex;
+        }
+
+        // Inside the samples with the same delta, we may still need to increase the timestamp.
+        while (timeSample < index){increaseTime(entry.sampleDelta);}
+        *time = (timeTotal * 1000) / timeScale;
+      }
+
+      // Look up time offset, if requested and available
+      if (timeOffset){
+        if (hasOffsets){
+          // If we went backwards, reset our current position
+          if (index < offsetSample){
+            offsetIndex = offsetSample = 0;
+          }
+          // Find the packet count per chunk entry for this sample
+          uint64_t eCnt = cttsBox.getEntryCount();
+          CTTSEntry entry;
+          while (offsetIndex < eCnt){
+            entry = cttsBox.getCTTSEntry(offsetIndex);
+            // check where the next index starts
+            uint64_t nextSampleIndex = offsetSample + entry.sampleCount;
+            // If the next chunk starts with a higher sample than we want, we can stop here
+            if (nextSampleIndex > index){break;}
+            offsetSample = nextSampleIndex;
+            ++offsetIndex;
+          }
+          *timeOffset = (entry.sampleOffset * 1000) / timeScale;
+        }else{
+          // Default to zero if there are no offsets for this track
+          *timeOffset = 0;
+        }
+      }
+
+      // Look up keyframe-ness, if requested and available
+      if (keyFrame){
+        if (!isVideo){
+          // Non-video tracks are never keyframes
+          *keyFrame = false;
+        }else{
+          // Video tracks with keys follow them
+          if (hasKeys){
+            // If we went backwards, reset our current position
+            if (index < keySample){
+              keyIndex = keySample = 0;
+            }
+            // Find the packet count per chunk entry for this sample
+            uint64_t eCnt = stssBox.getEntryCount();
+            while (keyIndex < eCnt){
+              // check where the next index starts
+              uint64_t nextSampleIndex;
+              if (keyIndex + 1 < eCnt){
+                nextSampleIndex = stssBox.getSampleNumber(keyIndex + 1) - 1;
+              }else{
+                nextSampleIndex = stszBox.getSampleCount();
+              }
+              // If the next key has a higher sample than we want, we can stop here
+              if (nextSampleIndex > index){break;}
+              keySample = nextSampleIndex;
+              ++keyIndex;
+            }
+            *keyFrame = (keySample == index);
+          }else{
+            // Everything is a keyframe if there are no keys listed for a video track
+            *keyFrame = true;
+          }
+        }
+      }
+
+      // Calculate byte position of packet, if requested
+      if (byteOffset){
+        // If we went backwards, reset our current position
+        if (index < bposSample){
+          bposIndex = bposSample = 0;
+        }
+        // Find the packet count per chunk entry for this sample
+        uint64_t eCnt = stscBox.getEntryCount();
+        STSCEntry entry;
+        while (bposIndex < eCnt){
+          entry = stscBox.getSTSCEntry(bposIndex);
+          // check where the next index starts
+          uint64_t nextSampleIndex;
+          if (bposIndex + 1 < eCnt){
+            nextSampleIndex = bposSample + (stscBox.getSTSCEntry(bposIndex + 1).firstChunk - entry.firstChunk) *
+                                                entry.samplesPerChunk;
+          }else{
+            nextSampleIndex = stszBox.getSampleCount();
+          }
+          // If the next chunk starts with a higher sample than we want, we can stop here
+          if (nextSampleIndex > index){break;}
+          bposSample = nextSampleIndex;
+          ++bposIndex;
+        }
+
+        // Find the chunk index the sample is in
+        uint64_t chunkIndex = (entry.firstChunk - 1) + ((index - bposSample) / entry.samplesPerChunk);
+        // Set offset to position of start of this chunk
+        *byteOffset = (stco64 ? co64Box.getChunkOffset(chunkIndex) : stcoBox.getChunkOffset(chunkIndex));
+        // Increase the offset by all samples in the chunk we already passed to arrive at our current sample
+        uint64_t sampleStart = bposSample + (chunkIndex - (entry.firstChunk - 1)) * entry.samplesPerChunk;
+        for (int j = sampleStart; j < index; j++){*byteOffset += stszBox.getEntrySize(j);}
+      }
+
+      // Look up byte length of packet, if requested
+      if (byteLen){
+        *byteLen = stszBox.getEntrySize(index);
+      }
+
+      // Specifically for text tracks, remove the 2-byte header if possible
+      if (byteOffset && byteLen && *byteLen >= 2 && sType == "tx3g"){
+        *byteLen -= 2;
+        *byteOffset += 2;
+      }
+    }else{
+      // Reading from TRAF boxes
+      size_t skipped = 0;
+      for (std::deque<TRAF>::const_iterator t = trafs.begin(); t != trafs.end(); ++t){
+        size_t firstTRAFIndex = skipped;
+        std::deque<TRUN> runs = ((TRAF)(*t)).getChildren<TRUN>();
+        for (std::deque<TRUN>::const_iterator r = runs.begin(); r != runs.end(); ++r){
+          uint32_t count = r->getSampleInformationCount();
+          if (index >= skipped + count){
+            skipped += count;
+            continue;
+          }
+          // Okay, our index is inside this TRUN!
+          // Let's pull the TFHD box into this as well...
+          TFHD tfhd = ((TRAF)(*t)).getChild<TFHD>();
+          trunSampleInformation si = r->getSampleInformation(index - skipped, &tfhd);
+          if (byteOffset){
+            size_t offset = 0;
+            if (tfhd.getDefaultBaseIsMoof()){
+              offset += moofPos;
+            }
+            if (r->getFlags() & MP4::trundataOffset){
+              offset += r->getDataOffset();
+              size_t target = index - skipped;
+              for (size_t i = 0; i < target; ++i){
+                offset += r->getSampleInformation(i, &tfhd).sampleSize;
+              }
+            }else{
+              FAIL_MSG("Unimplemented: trun box does not contain a data offset!");
+            }
+            *byteOffset = offset;
+          }
+          if (time){
+            // If we went backwards, reset our current position
+            if (!index || index < timeSample){
+              timeIndex = timeFirstSample = timeSample = timeExtra = 0;
+              TFDT tfdt = ((TRAF)(*t)).getChild<TFDT>();
+              timeTotal = tfdt.getBaseMediaDecodeTime();
+            }
+            std::deque<TRUN>::const_iterator runIt = runs.begin();
+            uint32_t locCount = runIt->getSampleInformationCount();
+            size_t locSkipped = firstTRAFIndex;
+            while (timeSample < index){
+              // Most common case: timeSample is in the current TRUN box
+              if (timeSample >= skipped && timeSample < skipped + count){
+                trunSampleInformation i = r->getSampleInformation(timeSample - skipped, &tfhd);
+                increaseTime(i.sampleDuration);
+                continue;
+              }
+              // Less common case: everything else
+              // Ensure "runIt" points towards the TRUN box that index "timeSample" is in
+              while (timeSample >= locSkipped + locCount && runIt != runs.end()){
+                locSkipped += locCount;
+                runIt++;
+                locCount = runIt->getSampleInformationCount();
+              }
+              // Abort increase if we can't find the box. This _should_ never happen...
+              if (runIt == runs.end()){
+                WARN_MSG("Attempted to read time information from a TRAF box that did not contain the sample we're reading!");
+                break;
+              }
+              // Cool, now we know it's valid, increase the time accordingly.
+              trunSampleInformation i = runIt->getSampleInformation(timeSample - locSkipped, &tfhd);
+              increaseTime(i.sampleDuration);
+            }
+            *time = (timeTotal * 1000) / timeScale;
+          }
+          if (byteLen){
+            *byteLen = si.sampleSize;
+          }
+          if (timeOffset){
+            *timeOffset = (si.sampleOffset * 1000) / timeScale;
+          }
+          if (keyFrame){
+            *keyFrame = !(si.sampleFlags & MP4::noKeySample);
+          }
+          return;
+        }
+      }
+    }
+
+  }
+
+
+} // namespace MP4
+
--- a/lib/mp4_stream.h
+++ b/lib/mp4_stream.h
@ -0,0 +1,119 @@
+#include "dtsc.h"
+#include "util.h"
+#include "mp4_generic.h"
+
+namespace MP4{
+
+  class PartTime{
+  public:
+    PartTime() : time(0), duration(0), offset(0), trackID(0), bpos(0), size(0), index(0){}
+    bool operator<(const PartTime &rhs) const{
+      if (time < rhs.time){return true;}
+      if (time > rhs.time){return false;}
+      if (trackID < rhs.trackID){return true;}
+      return (trackID == rhs.trackID && bpos < rhs.bpos);
+    }
+    uint64_t time;
+    uint64_t duration;
+    int32_t offset;
+    size_t trackID;
+    uint64_t bpos;
+    uint32_t size;
+    uint64_t index;
+    bool keyframe;
+  };
+
+
+  class TrackHeader{
+  public:
+    TrackHeader();
+
+    /// Reads (new) track header information for processing
+    void read(TRAK &trakBox);
+    /// Reads (new) track header information for processing
+    void read(TRAF &trafBox);
+
+    /// Signal that we're going to be reading the next moof box now.
+    /// Wipes internal TRAF boxes, ensures TRAF mode is enabled so no reads happen from MOOV headers anymore.
+    void nextMoof();
+
+    /// Switch back to non-moof reading mode, disabling TRAF mode and wiping all TRAF boxes
+    void revertToMoov();
+
+    /// Returns true if we know how to parse this track, false otherwise
+    bool compatible() const {return isCompatible;}
+
+    /// Retrieves the information associated with a specific part (=frame).
+    void getPart(uint64_t index, uint64_t * byteOffset = 0, uint32_t * byteLen = 0, uint64_t * time = 0, int32_t * timeOffset = 0, bool * keyFrame = 0, uint64_t moofPos = 0);
+
+    /// Returns the number of parts this track header contains
+    uint64_t size() const;
+
+    // Information about the track. Public for convenience, but setting them has no effect.
+    // The exception is sType, which affects processing of the data in some cases and should not be written to.
+    // All of these are filled by the read() function when reading an MP4::TRAK box.
+    size_t trackId; ///< MP4-internal ID for this track
+    uint64_t timeScale; ///< Timescale in units per second
+    std::string sType; ///< MP4-internal codec name for this track - do not write to externally!
+    std::string codec; ///< Mist codec name for this track
+    std::string trackType; ///< Which Mist-compatible track type this is
+    std::string initData; ///< Initialization data for the track, in Mist-compatible format
+    std::string lang; ///< Language of the track
+    uint32_t vidWidth, vidHeight;
+    uint32_t audChannels, audRate, audSize;
+
+  private:
+    /// Internal function that increases the time of the current part to the next part
+    void increaseTime(uint32_t delta);
+
+    // next variables are needed for the stsc/stco loop
+    uint64_t bposIndex; ///< Current read index in stsc box
+    uint64_t bposSample; ///< First sample number in current chunk entry
+    // next variables are needed for the stts loop
+    uint64_t timeIndex; ///< Index in STTS box
+    uint64_t timeSample;   ///< Sample counter for STTS box
+    uint64_t timeFirstSample;   ///< First sample in STTS box entry
+    uint64_t timeTotal; ///< Total timestamp for STTS box
+    uint64_t timeExtra; ///< Extra timestamp for STTS box
+    uint64_t offsetIndex; ///< Index in CTTS box
+    uint64_t offsetSample; ///< First sample number in CTTS entry
+    uint64_t keyIndex; ///< Index in stss box
+    uint64_t keySample; ///< First sample number in stss entry
+
+    STSS stssBox; ///< keyframe list
+    STCO stcoBox; ///< positions of chunks (32-bit)
+    CO64 co64Box; ///< positions of chunks (64-bit)
+    STSZ stszBox; ///< packet sizes
+    STTS sttsBox; ///< packet durations
+    CTTS cttsBox; ///< packet time offsets (optional)
+    STSC stscBox; ///< packet count per chunk
+    std::deque<TRAF> trafs; ///< Current traf boxes, if any
+    bool stco64; // 64 bit chunk offsets?
+    bool hasOffsets; ///< Are time offsets present?
+    bool hasKeys; ///< Are keyframes listed?
+    bool isVideo; ///< Is this a video track?
+    bool isCompatible; ///< True if Mist supports this track type
+    bool trafMode; ///< True if we are ignoring the moov headers and only looking at traf headers
+  };
+
+  class Stream{
+  public:
+    Stream();
+    ~Stream();
+    void open(Util::ResizeablePointer & ptr);
+    bool hasPacket(size_t tid) const;
+    bool hasPacket() const;
+    void getPacket(size_t tid, DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx);
+    uint32_t getEarliestPID();
+    void getEarliestPacket(DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx);
+    void initializeMetadata(DTSC::Meta &meta, size_t tid = INVALID_TRACK_ID, size_t mappingId = INVALID_TRACK_ID);
+  private:
+    std::map<size_t, TrackHeader> trkHdrs;
+    std::map<size_t, std::string> codecs;
+    std::set<MP4::PartTime> curPositions;
+    MOOV moovBox;
+    Box mdatBox;
+  };
+
+} // namespace MP4
+