501 lines
18 KiB
C++
501 lines
18 KiB
C++
#include "mp4_stream.h"
|
|
#include "h264.h"
|
|
#include "mp4_dash.h"
|
|
|
|
|
|
namespace MP4{
|
|
|
|
Stream::Stream(){
|
|
}
|
|
|
|
Stream::~Stream(){
|
|
}
|
|
|
|
void Stream::open(Util::ResizeablePointer & ptr){
|
|
|
|
}
|
|
|
|
bool Stream::hasPacket(size_t tid) const{
|
|
return false;
|
|
}
|
|
|
|
bool Stream::hasPacket() const{
|
|
return !curPositions.empty();
|
|
}
|
|
|
|
void Stream::getPacket(size_t tid, DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx){
|
|
}
|
|
|
|
uint32_t Stream::getEarliestPID(){
|
|
return INVALID_TRACK_ID;
|
|
}
|
|
|
|
void Stream::getEarliestPacket(DTSC::Packet &pack, uint64_t &thisTime, size_t &thisIdx){
|
|
if (curPositions.empty()){
|
|
pack.null();
|
|
return;
|
|
}
|
|
// pop uit set
|
|
MP4::PartTime curPart = *curPositions.begin();
|
|
curPositions.erase(curPositions.begin());
|
|
|
|
thisTime = curPart.time;
|
|
thisIdx = curPart.trackID;
|
|
pack.genericFill(curPart.time, curPart.offset, curPart.trackID, 0/*readBuffer + (curPart.bpos-readPos)*/, curPart.size, 0, curPart.keyframe);
|
|
|
|
// get the next part for this track
|
|
curPart.index++;
|
|
if (curPart.index < trkHdrs[curPart.trackID].size()){
|
|
trkHdrs[curPart.trackID].getPart(curPart.index, &curPart.bpos, &curPart.size, &curPart.time, &curPart.offset, &curPart.keyframe);
|
|
curPositions.insert(curPart);
|
|
}
|
|
}
|
|
|
|
void Stream::initializeMetadata(DTSC::Meta &meta, size_t tid, size_t mappingId){
|
|
}
|
|
|
|
TrackHeader::TrackHeader(){
|
|
timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0;
|
|
bposIndex = bposSample = 0;
|
|
offsetIndex = offsetSample = 0;
|
|
keyIndex = keySample = 0;
|
|
hasOffsets = false;
|
|
hasKeys = false;
|
|
isVideo = false;
|
|
sttsBox.clear();
|
|
cttsBox.clear();
|
|
stszBox.clear();
|
|
stcoBox.clear();
|
|
co64Box.clear();
|
|
stscBox.clear();
|
|
stssBox.clear();
|
|
stco64 = false;
|
|
trafMode = false;
|
|
trackId = 0;
|
|
}
|
|
|
|
void TrackHeader::nextMoof(){
|
|
timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0;
|
|
bposIndex = bposSample = 0;
|
|
offsetIndex = offsetSample = 0;
|
|
|
|
trafMode = true;
|
|
trafs.clear();
|
|
}
|
|
|
|
/// Switch back to non-moof reading mode, disabling TRAF mode and wiping all TRAF boxes
|
|
void TrackHeader::revertToMoov(){
|
|
timeIndex = timeSample = timeFirstSample = timeTotal = timeExtra = 0;
|
|
bposIndex = bposSample = 0;
|
|
offsetIndex = offsetSample = 0;
|
|
keyIndex = keySample = 0;
|
|
|
|
trafMode = false;
|
|
trafs.clear();
|
|
}
|
|
|
|
void TrackHeader::read(TRAK &trakBox){
|
|
vidWidth = vidHeight = audChannels = audRate = audSize = 0;
|
|
codec.clear();
|
|
|
|
MDIA mdiaBox = trakBox.getChild<MDIA>();
|
|
timeScale = mdiaBox.getChild<MDHD>().getTimeScale();
|
|
lang = mdiaBox.getChild<MP4::MDHD>().getLanguage();
|
|
|
|
TKHD tkhd = trakBox.getChild<TKHD>();
|
|
trackId = tkhd.getTrackID();
|
|
if (tkhd.getWidth()){
|
|
vidWidth = tkhd.getWidth();
|
|
vidHeight = tkhd.getHeight();
|
|
}
|
|
|
|
STBL stblBox = mdiaBox.getChild<MINF>().getChild<STBL>();
|
|
|
|
sttsBox.copyFrom(stblBox.getChild<STTS>());
|
|
|
|
cttsBox.copyFrom(stblBox.getChild<CTTS>());
|
|
hasOffsets = cttsBox.isType("ctts");
|
|
|
|
stszBox.copyFrom(stblBox.getChild<STSZ>());
|
|
|
|
stcoBox.copyFrom(stblBox.getChild<STCO>());
|
|
co64Box.copyFrom(stblBox.getChild<CO64>());
|
|
stco64 = co64Box.isType("co64");
|
|
|
|
stscBox.copyFrom(stblBox.getChild<STSC>());
|
|
|
|
stssBox.copyFrom(stblBox.getChild<STSS>());
|
|
hasKeys = stssBox.isType("stss");
|
|
|
|
Box sEntryBox = stblBox.getChild<MP4::STSD>().getEntry(0);
|
|
sType = sEntryBox.getType();
|
|
|
|
std::string handler = mdiaBox.getChild<MP4::HDLR>().getHandlerType();
|
|
isVideo = false;
|
|
if (handler == "vide"){
|
|
isVideo = true;
|
|
trackType = "video";
|
|
}else if (handler == "soun"){
|
|
trackType = "audio";
|
|
}else if (handler == "sbtl"){
|
|
trackType = "meta";
|
|
}else{
|
|
INFO_MSG("Unsupported handler: %s", handler.c_str());
|
|
}
|
|
|
|
isCompatible = false;
|
|
|
|
if (sType == "avc1" || sType == "h264" || sType == "mp4v"){
|
|
codec = "H264";
|
|
isCompatible = true;
|
|
VisualSampleEntry &vEntryBox = (VisualSampleEntry &)sEntryBox;
|
|
if (!vidWidth){
|
|
vidWidth = vEntryBox.getWidth();
|
|
vidHeight = vEntryBox.getHeight();
|
|
}
|
|
MP4::Box initBox = vEntryBox.getCLAP();
|
|
if (initBox.isType("avcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
|
|
initBox = vEntryBox.getPASP();
|
|
if (initBox.isType("avcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
|
|
// Read metadata from init data if not set
|
|
if (!vidWidth){
|
|
h264::sequenceParameterSet sps;
|
|
sps.fromDTSCInit(initData);
|
|
h264::SPSMeta spsChar = sps.getCharacteristics();
|
|
vidWidth = spsChar.width;
|
|
vidHeight = spsChar.height;
|
|
}
|
|
}
|
|
if (sType == "hev1" || sType == "hvc1"){
|
|
codec = "HEVC";
|
|
isCompatible = true;
|
|
MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox;
|
|
if (!vidWidth){
|
|
vidWidth = vEntryBox.getWidth();
|
|
vidHeight = vEntryBox.getHeight();
|
|
}
|
|
MP4::Box initBox = vEntryBox.getCLAP();
|
|
if (initBox.isType("hvcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
|
|
initBox = vEntryBox.getPASP();
|
|
if (initBox.isType("hvcC")){initData.assign(initBox.payload(), initBox.payloadSize());}
|
|
}
|
|
if (sType == "av01"){
|
|
codec = "AV1";
|
|
isCompatible = true;
|
|
MP4::VisualSampleEntry &vEntryBox = (MP4::VisualSampleEntry &)sEntryBox;
|
|
if (!vidWidth){
|
|
vidWidth = vEntryBox.getWidth();
|
|
vidHeight = vEntryBox.getHeight();
|
|
}
|
|
MP4::Box initBox = vEntryBox.getCLAP();
|
|
if (initBox.isType("av1C")){initData.assign(initBox.payload(), initBox.payloadSize());}
|
|
initBox = vEntryBox.getPASP();
|
|
if (initBox.isType("av1C")){initData.assign(initBox.payload(), initBox.payloadSize());}
|
|
}
|
|
if (sType == "mp4a" || sType == "aac " || sType == "ac-3"){
|
|
MP4::AudioSampleEntry &aEntryBox = (MP4::AudioSampleEntry &)sEntryBox;
|
|
audRate = aEntryBox.getSampleRate();
|
|
audChannels = aEntryBox.getChannelCount();
|
|
audSize = 16; /// \TODO Actually get this from somewhere, probably..?
|
|
|
|
if (sType == "ac-3"){
|
|
codec = "AC3";
|
|
isCompatible = true;
|
|
}else{
|
|
MP4::Box codingBox = aEntryBox.getCodecBox();
|
|
if (codingBox.getType() == "esds"){
|
|
MP4::ESDS & esdsBox = (MP4::ESDS &)codingBox;
|
|
codec = esdsBox.getCodec();
|
|
isCompatible = true;
|
|
initData = esdsBox.getInitData();
|
|
}
|
|
if (codingBox.getType() == "wave"){
|
|
MP4::WAVE & waveBox = (MP4::WAVE &)codingBox;
|
|
for (size_t c = 0; c < waveBox.getContentCount(); ++c){
|
|
MP4::Box content = waveBox.getContent(c);
|
|
if (content.getType() == "esds"){
|
|
MP4::ESDS & esdsBox = (MP4::ESDS &)content;
|
|
codec = esdsBox.getCodec();
|
|
isCompatible = true;
|
|
initData = esdsBox.getInitData();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (sType == "tx3g"){// plain text subtitles
|
|
codec = "subtitle";
|
|
isCompatible = true;
|
|
}
|
|
}
|
|
|
|
void TrackHeader::read(TRAF &trafBox){
|
|
if (!trafMode){
|
|
// Warn anyone that forgot to call nextMoof(), hopefully preventing future issues
|
|
WARN_MSG("Reading TRAF box header without signalling start of next MOOF box first!");
|
|
}
|
|
TRAF tBox;
|
|
trafs.push_back(tBox);
|
|
trafs.rbegin()->copyFrom(trafBox);
|
|
}
|
|
|
|
void TrackHeader::increaseTime(uint32_t delta){
|
|
// Calculate millisecond-time for current timestamp
|
|
uint64_t timePrev = (timeTotal * 1000) / timeScale;
|
|
timeTotal += delta;
|
|
|
|
//Undo time shifts as much as possible
|
|
if (timeExtra){
|
|
timeTotal -= timeExtra;
|
|
timeExtra = 0;
|
|
}
|
|
|
|
//Make sure our timestamps go up by at least 1ms for every packet
|
|
if (timePrev >= (uint64_t)((timeTotal * 1000) / timeScale)){
|
|
uint32_t wantSamples = ((timePrev+1) * timeScale) / 1000;
|
|
timeExtra += wantSamples - timeTotal;
|
|
timeTotal = wantSamples;
|
|
}
|
|
++timeSample;
|
|
}
|
|
|
|
|
|
uint64_t TrackHeader::size() const {
|
|
if (!trafMode){
|
|
return (stszBox ? stszBox.getSampleCount() : 0);
|
|
}
|
|
if (!trafs.size()){return 0;}
|
|
uint64_t parts = 0;
|
|
for (std::deque<TRAF>::const_iterator t = trafs.begin(); t != trafs.end(); ++t){
|
|
std::deque<TRUN> runs = ((TRAF)(*t)).getChildren<TRUN>();
|
|
for (std::deque<TRUN>::const_iterator r = runs.begin(); r != runs.end(); ++r){
|
|
parts += r->getSampleInformationCount();
|
|
}
|
|
}
|
|
return parts;
|
|
}
|
|
|
|
/// Retrieves the information associated with a specific part (=frame).
|
|
/// The index is the zero-based part number, all other arguments are optional and if non-zero will be filled.
|
|
void TrackHeader::getPart(uint64_t index, uint64_t * byteOffset, uint32_t * byteLen, uint64_t * time, int32_t * timeOffset, bool * keyFrame, uint64_t moofPos){
|
|
// Switch between reading TRAF boxes or global headers
|
|
if (!trafMode){
|
|
// Reading global headers
|
|
|
|
// Calculate time, if requested
|
|
if (time){
|
|
// If we went backwards, reset our current position
|
|
if (index < timeSample){
|
|
timeIndex = timeFirstSample = timeSample = timeExtra = timeTotal = 0;
|
|
}
|
|
// Find the packet count per chunk entry for this sample
|
|
uint64_t eCnt = sttsBox.getEntryCount();
|
|
STTSEntry entry;
|
|
while (timeIndex < eCnt){
|
|
entry = sttsBox.getSTTSEntry(timeIndex);
|
|
// check where the next index starts
|
|
uint64_t nextSampleIndex = timeFirstSample + entry.sampleCount;
|
|
// If the next chunk starts with a higher sample than we want, we can stop here
|
|
if (nextSampleIndex > index){break;}
|
|
timeFirstSample = nextSampleIndex;
|
|
// Increase timestamp by delta for each sample with the same delta
|
|
while (timeSample < nextSampleIndex){increaseTime(entry.sampleDelta);}
|
|
++timeIndex;
|
|
}
|
|
|
|
// Inside the samples with the same delta, we may still need to increase the timestamp.
|
|
while (timeSample < index){increaseTime(entry.sampleDelta);}
|
|
*time = (timeTotal * 1000) / timeScale;
|
|
}
|
|
|
|
// Look up time offset, if requested and available
|
|
if (timeOffset){
|
|
if (hasOffsets){
|
|
// If we went backwards, reset our current position
|
|
if (index < offsetSample){
|
|
offsetIndex = offsetSample = 0;
|
|
}
|
|
// Find the packet count per chunk entry for this sample
|
|
uint64_t eCnt = cttsBox.getEntryCount();
|
|
CTTSEntry entry;
|
|
while (offsetIndex < eCnt){
|
|
entry = cttsBox.getCTTSEntry(offsetIndex);
|
|
// check where the next index starts
|
|
uint64_t nextSampleIndex = offsetSample + entry.sampleCount;
|
|
// If the next chunk starts with a higher sample than we want, we can stop here
|
|
if (nextSampleIndex > index){break;}
|
|
offsetSample = nextSampleIndex;
|
|
++offsetIndex;
|
|
}
|
|
*timeOffset = (entry.sampleOffset * 1000) / timeScale;
|
|
}else{
|
|
// Default to zero if there are no offsets for this track
|
|
*timeOffset = 0;
|
|
}
|
|
}
|
|
|
|
// Look up keyframe-ness, if requested and available
|
|
if (keyFrame){
|
|
if (!isVideo){
|
|
// Non-video tracks are never keyframes
|
|
*keyFrame = false;
|
|
}else{
|
|
// Video tracks with keys follow them
|
|
if (hasKeys){
|
|
// If we went backwards, reset our current position
|
|
if (index < keySample){
|
|
keyIndex = keySample = 0;
|
|
}
|
|
// Find the packet count per chunk entry for this sample
|
|
uint64_t eCnt = stssBox.getEntryCount();
|
|
while (keyIndex < eCnt){
|
|
// check where the next index starts
|
|
uint64_t nextSampleIndex;
|
|
if (keyIndex + 1 < eCnt){
|
|
nextSampleIndex = stssBox.getSampleNumber(keyIndex + 1) - 1;
|
|
}else{
|
|
nextSampleIndex = stszBox.getSampleCount();
|
|
}
|
|
// If the next key has a higher sample than we want, we can stop here
|
|
if (nextSampleIndex > index){break;}
|
|
keySample = nextSampleIndex;
|
|
++keyIndex;
|
|
}
|
|
*keyFrame = (keySample == index);
|
|
}else{
|
|
// Everything is a keyframe if there are no keys listed for a video track
|
|
*keyFrame = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate byte position of packet, if requested
|
|
if (byteOffset){
|
|
// If we went backwards, reset our current position
|
|
if (index < bposSample){
|
|
bposIndex = bposSample = 0;
|
|
}
|
|
// Find the packet count per chunk entry for this sample
|
|
uint64_t eCnt = stscBox.getEntryCount();
|
|
STSCEntry entry;
|
|
while (bposIndex < eCnt){
|
|
entry = stscBox.getSTSCEntry(bposIndex);
|
|
// check where the next index starts
|
|
uint64_t nextSampleIndex;
|
|
if (bposIndex + 1 < eCnt){
|
|
nextSampleIndex = bposSample + (stscBox.getSTSCEntry(bposIndex + 1).firstChunk - entry.firstChunk) *
|
|
entry.samplesPerChunk;
|
|
}else{
|
|
nextSampleIndex = stszBox.getSampleCount();
|
|
}
|
|
// If the next chunk starts with a higher sample than we want, we can stop here
|
|
if (nextSampleIndex > index){break;}
|
|
bposSample = nextSampleIndex;
|
|
++bposIndex;
|
|
}
|
|
|
|
// Find the chunk index the sample is in
|
|
uint64_t chunkIndex = (entry.firstChunk - 1) + ((index - bposSample) / entry.samplesPerChunk);
|
|
// Set offset to position of start of this chunk
|
|
*byteOffset = (stco64 ? co64Box.getChunkOffset(chunkIndex) : stcoBox.getChunkOffset(chunkIndex));
|
|
// Increase the offset by all samples in the chunk we already passed to arrive at our current sample
|
|
uint64_t sampleStart = bposSample + (chunkIndex - (entry.firstChunk - 1)) * entry.samplesPerChunk;
|
|
for (int j = sampleStart; j < index; j++){*byteOffset += stszBox.getEntrySize(j);}
|
|
}
|
|
|
|
// Look up byte length of packet, if requested
|
|
if (byteLen){
|
|
*byteLen = stszBox.getEntrySize(index);
|
|
}
|
|
|
|
// Specifically for text tracks, remove the 2-byte header if possible
|
|
if (byteOffset && byteLen && *byteLen >= 2 && sType == "tx3g"){
|
|
*byteLen -= 2;
|
|
*byteOffset += 2;
|
|
}
|
|
}else{
|
|
// Reading from TRAF boxes
|
|
size_t skipped = 0;
|
|
for (std::deque<TRAF>::const_iterator t = trafs.begin(); t != trafs.end(); ++t){
|
|
size_t firstTRAFIndex = skipped;
|
|
std::deque<TRUN> runs = ((TRAF)(*t)).getChildren<TRUN>();
|
|
for (std::deque<TRUN>::const_iterator r = runs.begin(); r != runs.end(); ++r){
|
|
uint32_t count = r->getSampleInformationCount();
|
|
if (index >= skipped + count){
|
|
skipped += count;
|
|
continue;
|
|
}
|
|
// Okay, our index is inside this TRUN!
|
|
// Let's pull the TFHD box into this as well...
|
|
TFHD tfhd = ((TRAF)(*t)).getChild<TFHD>();
|
|
trunSampleInformation si = r->getSampleInformation(index - skipped, &tfhd);
|
|
if (byteOffset){
|
|
size_t offset = 0;
|
|
if (tfhd.getDefaultBaseIsMoof()){
|
|
offset += moofPos;
|
|
}
|
|
if (r->getFlags() & MP4::trundataOffset){
|
|
offset += r->getDataOffset();
|
|
size_t target = index - skipped;
|
|
for (size_t i = 0; i < target; ++i){
|
|
offset += r->getSampleInformation(i, &tfhd).sampleSize;
|
|
}
|
|
}else{
|
|
FAIL_MSG("Unimplemented: trun box does not contain a data offset!");
|
|
}
|
|
*byteOffset = offset;
|
|
}
|
|
if (time){
|
|
// If we went backwards, reset our current position
|
|
if (!index || index < timeSample){
|
|
timeIndex = timeFirstSample = timeSample = timeExtra = 0;
|
|
TFDT tfdt = ((TRAF)(*t)).getChild<TFDT>();
|
|
timeTotal = tfdt.getBaseMediaDecodeTime();
|
|
}
|
|
std::deque<TRUN>::const_iterator runIt = runs.begin();
|
|
uint32_t locCount = runIt->getSampleInformationCount();
|
|
size_t locSkipped = firstTRAFIndex;
|
|
while (timeSample < index){
|
|
// Most common case: timeSample is in the current TRUN box
|
|
if (timeSample >= skipped && timeSample < skipped + count){
|
|
trunSampleInformation i = r->getSampleInformation(timeSample - skipped, &tfhd);
|
|
increaseTime(i.sampleDuration);
|
|
continue;
|
|
}
|
|
// Less common case: everything else
|
|
// Ensure "runIt" points towards the TRUN box that index "timeSample" is in
|
|
while (timeSample >= locSkipped + locCount && runIt != runs.end()){
|
|
locSkipped += locCount;
|
|
runIt++;
|
|
locCount = runIt->getSampleInformationCount();
|
|
}
|
|
// Abort increase if we can't find the box. This _should_ never happen...
|
|
if (runIt == runs.end()){
|
|
WARN_MSG("Attempted to read time information from a TRAF box that did not contain the sample we're reading!");
|
|
break;
|
|
}
|
|
// Cool, now we know it's valid, increase the time accordingly.
|
|
trunSampleInformation i = runIt->getSampleInformation(timeSample - locSkipped, &tfhd);
|
|
increaseTime(i.sampleDuration);
|
|
}
|
|
*time = (timeTotal * 1000) / timeScale;
|
|
}
|
|
if (byteLen){
|
|
*byteLen = si.sampleSize;
|
|
}
|
|
if (timeOffset){
|
|
*timeOffset = (si.sampleOffset * 1000) / timeScale;
|
|
}
|
|
if (keyFrame){
|
|
*keyFrame = !(si.sampleFlags & MP4::noKeySample);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
} // namespace MP4
|
|
|