From ccee512b3d731d4fbf9e58b28c1573be951bb856 Mon Sep 17 00:00:00 2001 From: Siddarth Tegginamani Date: Wed, 5 Jan 2022 15:03:38 +0100 Subject: [PATCH] Bug Fix: CMAF DASH playback works - syntax error fixed - removed unnecessary track id simplification --- lib/cmaf.cpp | 54 +++++++++++++++----------------------- src/output/output_cmaf.cpp | 38 ++++++++++++++++----------- 2 files changed, 43 insertions(+), 49 deletions(-) diff --git a/lib/cmaf.cpp b/lib/cmaf.cpp index bb59f9f2..266b0353 100644 --- a/lib/cmaf.cpp +++ b/lib/cmaf.cpp @@ -13,14 +13,6 @@ namespace CMAF{ return payloadSize; } - size_t simplifiedTrackId(const DTSC::Meta & M, size_t idx) { - std::string type = M.getType(idx); - if (type == "video") {return 1;} - if (type == "audio") {return 2;} - if (type == "meta") {return 3;} - return idx; - } - std::string trackHeader(const DTSC::Meta &M, size_t track, bool simplifyTrackIds){ std::string tType = M.getType(track); @@ -43,9 +35,6 @@ namespace CMAF{ MP4::TRAK trakBox; MP4::TKHD tkhdBox(M, track); - if (simplifyTrackIds){ - tkhdBox.setTrackID(simplifiedTrackId(M, track)); - } tkhdBox.setDuration(0); trakBox.setContent(tkhdBox, 0); @@ -87,7 +76,7 @@ namespace CMAF{ btrtBox.setAverageBitrate(M.getBps(track)); btrtBox.setMaxBitrate(M.getMaxBps(track)); - sampleEntry.setBoxEntry(sampleEntry.getBoxEntryCount(),btrtBox); + sampleEntry.setBoxEntry(sampleEntry.getBoxEntryCount(), btrtBox); stsdBox.setEntry(sampleEntry, 0); }else if (tType == "audio"){ MP4::AudioSampleEntry sampleEntry(M, track); @@ -96,7 +85,7 @@ namespace CMAF{ btrtBox.setAverageBitrate(M.getBps(track)); btrtBox.setMaxBitrate(M.getMaxBps(track)); - sampleEntry.setBoxEntry(sampleEntry.getBoxEntryCount(),btrtBox); + sampleEntry.setBoxEntry(sampleEntry.getBoxEntryCount(), btrtBox); stsdBox.setEntry(sampleEntry, 0); }else if (tType == "meta"){ MP4::TextSampleEntry sampleEntry(M, track); @@ -131,9 +120,6 @@ namespace CMAF{ } MP4::TREX trexBox(track + 1); - if (simplifyTrackIds){ - trexBox.setTrackID(simplifiedTrackId(M, track)); - } trexBox.setDefaultSampleDuration(1000); mvexBox.setContent(trexBox, M.getVod() ? 1 : 0); @@ -148,7 +134,8 @@ namespace CMAF{ MP4::SIDX sidxBox; sidxBox.setReferenceID(track + 1); sidxBox.setTimescale(1000); - sidxBox.setEarliestPresentationTime(keys.getTime(0) + parts.getOffset(0) - M.getFirstms(track)); + sidxBox.setEarliestPresentationTime(keys.getTime(0) + parts.getOffset(0) - + M.getFirstms(track)); for (size_t i = 0; i < fragments.getEndValid(); i++){ size_t firstKey = fragments.getFirstKey(i); @@ -156,9 +143,12 @@ namespace CMAF{ ((i + 1 < fragments.getEndValid()) ? fragments.getFirstKey(i + 1) : keys.getEndValid()); MP4::sidxReference refItem; - refItem.referencedSize = payloadSize(M, track, keys.getTime(firstKey), keys.getTime(endKey)) + keyHeaderSize(M, track, i) + 8; + refItem.referencedSize = + payloadSize(M, track, keys.getTime(firstKey), keys.getTime(endKey)) + + keyHeaderSize(M, track, i) + 8; refItem.subSegmentDuration = - (endKey == keys.getEndValid() ? M.getLastms(track) : keys.getTime(endKey)) - keys.getTime(firstKey); + (endKey == keys.getEndValid() ? M.getLastms(track) : keys.getTime(endKey)) - + keys.getTime(firstKey); refItem.sapStart = true; refItem.sapType = 16; refItem.sapDeltaTime = 0; @@ -209,7 +199,8 @@ namespace CMAF{ } /// Generates the 'moof' box for a DTSC::Key based CMAF fragment. - std::string keyHeader(const DTSC::Meta &M, size_t track, uint64_t startTime, uint64_t endTime, uint64_t segmentNum, bool simplifyTrackIds, bool UTCTime){ + std::string keyHeader(const DTSC::Meta &M, size_t track, uint64_t startTime, uint64_t endTime, + uint64_t segmentNum, bool simplifyTrackIds, bool UTCTime){ size_t firstPart = M.getPartIndex(startTime, track); size_t endPart = M.getPartIndex(endTime, track); @@ -218,10 +209,9 @@ namespace CMAF{ MP4::MFHD mfhdBox(segmentNum); moofBox.setContent(mfhdBox, 0); - std::set trunOrder; - //We use keyHeaderSize here to determine the relative offsets of the data in the 'mdat' box. + // We use keyHeaderSize here to determine the relative offsets of the data in the 'mdat' box. uint64_t relativeOffset = keyHeaderSize(M, track, startTime, endTime) + 8; sortPart temp; @@ -242,13 +232,11 @@ namespace CMAF{ tfhdBox.setFlags(MP4::tfhdSampleFlag | MP4::tfhdBaseIsMoof | MP4::tfhdSampleDesc); tfhdBox.setTrackID(track + 1); - if (simplifyTrackIds){ - tfhdBox.setTrackID(simplifiedTrackId(M, track)); - } tfhdBox.setDefaultSampleDuration(444); tfhdBox.setDefaultSampleSize(444); - tfhdBox.setDefaultSampleFlags((M.getType(track) == "video") ? (MP4::noIPicture | MP4::noKeySample) - : (MP4::isIPicture | MP4::isKeySample)); + tfhdBox.setDefaultSampleFlags((M.getType(track) == "video") + ? (MP4::noIPicture | MP4::noKeySample) + : (MP4::isIPicture | MP4::isKeySample)); tfhdBox.setSampleDescriptionIndex(1); trafBox.setContent(tfhdBox, 0); @@ -256,7 +244,8 @@ namespace CMAF{ if (M.getVod()){ tfdtBox.setBaseMediaDecodeTime(startTime - M.getFirstms(track)); }else{ - tfdtBox.setBaseMediaDecodeTime((UTCTime ? startTime + M.getBootMsOffset() + unixBootDiff : startTime)); + tfdtBox.setBaseMediaDecodeTime( + (UTCTime ? startTime + M.getBootMsOffset() + unixBootDiff : startTime)); } trafBox.setContent(tfdtBox, 1); @@ -277,15 +266,14 @@ namespace CMAF{ MP4::trunSampleInformation sampleInfo; sampleInfo.sampleSize = parts.getSize(it->partIndex); sampleInfo.sampleDuration = parts.getDuration(it->partIndex); - if (it == lastOne){ - sampleInfo.sampleDuration = endTime - it->time; - } + if (it == lastOne){sampleInfo.sampleDuration = endTime - it->time;} sampleInfo.sampleOffset = parts.getOffset(it->partIndex); trunBox.setSampleInformation(sampleInfo, trunOffset++); } }else{ - WARN_MSG("Empty CMAF header for track %zu: %" PRIu64 "-%" PRIu64 " contains no packets (first: %" PRIu64 - ", last: %" PRIu64 "), firstPart=%zu, lastPart=%zu", + WARN_MSG("Empty CMAF header for track %zu: %" PRIu64 "-%" PRIu64 + " contains no packets (first: %" PRIu64 ", last: %" PRIu64 + "), firstPart=%zu, lastPart=%zu", track, startTime, endTime, M.getFirstms(track), M.getLastms(track), firstPart, endPart); } diff --git a/src/output/output_cmaf.cpp b/src/output/output_cmaf.cpp index fa063488..756fceb8 100644 --- a/src/output/output_cmaf.cpp +++ b/src/output/output_cmaf.cpp @@ -489,32 +489,32 @@ namespace Mist{ } void OutCMAF::generateSegmentlist(size_t idx, std::stringstream &s, - void callBack(uint64_t, uint64_t, std::stringstream &, bool)){ - DTSC::Fragments fragments(M.fragments(idx)); + void dashSegmentCallBack(uint64_t, uint64_t, + std::stringstream &, bool)){ + // NOTE: Weirdly making the 0th track as the reference track fixed everything. + // Looks like a nomenclature issue. + // TODO: Investigate with spec and refactor stuff appropriately. + + size_t mainTrack = *M.getValidTracks().begin(); // M.mainTrack(); + + if (mainTrack == INVALID_TRACK_ID){return;} + DTSC::Fragments fragments(M.fragments(mainTrack)); uint32_t firstFragment = fragments.getFirstValid(); uint32_t lastFragment = fragments.getEndValid(); bool first = true; // skip the first two fragments if live if (M.getLive() && (lastFragment - firstFragment) > 6){firstFragment += 2;} - if (M.getType(idx) == "audio"){ - uint32_t mainTrack = M.mainTrack(); - if (mainTrack == INVALID_TRACK_ID){return;} - DTSC::Fragments f(M.fragments(mainTrack)); - uint64_t firstVidTime = M.getTimeForFragmentIndex(mainTrack, f.getFirstValid()); - firstFragment = M.getFragmentIndexForTime(idx, firstVidTime); - } - - DTSC::Keys keys(M.keys(idx)); + DTSC::Keys keys(M.keys(mainTrack)); for (; firstFragment < lastFragment; ++firstFragment){ uint32_t duration = fragments.getDuration(firstFragment); uint64_t starttime = keys.getTime(fragments.getFirstKey(firstFragment)); if (!duration){ if (M.getLive()){continue;}// skip last fragment when live - duration = M.getLastms(idx) - starttime; + duration = M.getLastms(mainTrack) - starttime; } - if (M.getVod()){starttime -= M.getFirstms(idx);} - callBack(starttime, duration, s, first); + if (M.getVod()){starttime -= M.getFirstms(mainTrack);} + dashSegmentCallBack(starttime, duration, s, first); first = false; } @@ -662,7 +662,7 @@ namespace Mist{ it++){ if (M.getType(it->first) == "video"){vTracks.insert(it->first);} if (M.getType(it->first) == "audio"){aTracks.insert(it->first);} - if (M.getType(it->first) == "subtitle"){sTracks.insert(it->first);} + if (M.getCodec(it->first) == "subtitle"){sTracks.insert(it->first);} } if (!vTracks.size() && !aTracks.size()){return "";} @@ -685,12 +685,18 @@ namespace Mist{ << "\" suggestedPresentationDelay=\"PT5.0S\" minBufferTime=\"PT2.0S\" publishTime=\"" << Util::getUTCString(Util::epoch()) << "\" "; } + + r << "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "; + r << "xmlns:xlink=\"http://www.w3.org/1999/xlink\" "; + r << "xsi:schemaLocation=\"urn:mpeg:DASH:schema:MPD:2011 " + "http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/" + "DASH-MPD.xsd\" "; r << "profiles=\"urn:mpeg:dash:profile:isoff-live:2011\" " "xmlns=\"urn:mpeg:dash:schema:mpd:2011\" >" << std::endl; r << "" << streamName << "" << std::endl; - r << "" << std::endl; + r << "" << std::endl; dashAdaptation(1, vTracks, videoAligned, r); dashAdaptation(2, aTracks, audioAligned, r);