EBML updates:

- AV1 support
- Support for outputting fragments longer than 30 seconds in duration
- Fixed FireFox support for Opus audio tracks
- Added support for stdin live input of EBML
- Fixed broken timestamps when seeking in VoD EBML files
- Analyser now calculates offsets for (manual) double-checking
- Added JSON track support to EBML input and output
- Added basic input support for SRT/ASS/SSA subtitles
- Opus CODECDELAY now actually calculated.
- Fixed Opus in Firefox
- Improved MP3 support, more robust handling of corruption, support for non-standard timescale sources
This commit is contained in:
Thulinma 2018-04-03 14:36:00 +02:00
parent 7f770b27b7
commit 68a1bff34f
8 changed files with 351 additions and 91 deletions

View file

@ -4,9 +4,14 @@
#include <mist/bitfields.h>
namespace Mist{
uint16_t maxEBMLFrameOffset = 0;
bool frameOffsetKnown = false;
InputEBML::InputEBML(Util::Config *cfg) : Input(cfg){
timeScale = 1.0;
capa["name"] = "EBML";
capa["desc"] = "Allows loading MKV, MKA, MK3D, MKS and WebM files for Video on Demand.";
capa["desc"] = "Allows loading MKV, MKA, MK3D, MKS and WebM files for Video on Demand, or accepts live streams in those formats over standard input.";
capa["source_match"].append("/*.mkv");
capa["source_match"].append("/*.mka");
capa["source_match"].append("/*.mk3d");
@ -18,6 +23,7 @@ namespace Mist{
capa["codecs"].append("HEVC");
capa["codecs"].append("VP8");
capa["codecs"].append("VP9");
capa["codecs"].append("AV1");
capa["codecs"].append("opus");
capa["codecs"].append("vorbis");
capa["codecs"].append("theora");
@ -30,16 +36,40 @@ namespace Mist{
capa["codecs"].append("MP3");
capa["codecs"].append("AC3");
capa["codecs"].append("FLOAT");
capa["codecs"].append("JSON");
capa["codecs"].append("subtitle");
lastClusterBPos = 0;
lastClusterTime = 0;
bufferedPacks = 0;
}
bool InputEBML::checkArguments(){
if (config->getString("input") == "-"){
std::cerr << "Input from stdin not yet supported" << std::endl;
return false;
std::string ASStoSRT(const char * ptr, uint32_t len){
uint16_t commas = 0;
uint16_t brackets = 0;
std::string tmpStr;
tmpStr.reserve(len);
for (uint32_t i = 0; i < len; ++i){
//Skip everything until the 8th comma
if (commas < 8){
if (ptr[i] == ','){commas++;}
continue;
}
if (ptr[i] == '{'){brackets++; continue;}
if (ptr[i] == '}'){brackets--; continue;}
if (!brackets){
if (ptr[i] == '\\' && i < len-1 && (ptr[i+1] == 'N' || ptr[i+1] == 'n')){
tmpStr += '\n';
++i;
continue;
}
tmpStr += ptr[i];
}
}
return tmpStr;
}
bool InputEBML::checkArguments(){
if (!config->getString("streamname").size()){
if (config->getString("output") == "-"){
std::cerr << "Output to stdout not yet supported" << std::endl;
@ -54,10 +84,23 @@ namespace Mist{
return true;
}
bool InputEBML::needsLock() {
//Standard input requires no lock, everything else does.
if (config->getString("input") != "-"){
return true;
}else{
return false;
}
}
bool InputEBML::preRun(){
// open File
inFile = fopen(config->getString("input").c_str(), "r");
if (!inFile){return false;}
if (config->getString("input") == "-"){
inFile = stdin;
}else{
// open File
inFile = fopen(config->getString("input").c_str(), "r");
if (!inFile){return false;}
}
return true;
}
@ -68,7 +111,10 @@ namespace Mist{
while (ptr.size() < needed){
if (!ptr.allocate(needed)){return false;}
if (!fread(ptr + ptr.size(), needed - ptr.size(), 1, inFile)){
FAIL_MSG("Could not read more data!");
//We assume if there is no current data buffered, that we are at EOF and don't print a warning
if (ptr.size()){
FAIL_MSG("Could not read more data! (have %lu, need %lu)", ptr.size(), needed);
}
return false;
}
ptr.size() = needed;
@ -82,8 +128,18 @@ namespace Mist{
}
}
EBML::Element E(ptr);
if (E.getID() == EBML::EID_CLUSTER){lastClusterBPos = Util::ftell(inFile);}
if (E.getID() == EBML::EID_TIMECODE){lastClusterTime = E.getValUInt();}
if (E.getID() == EBML::EID_CLUSTER){
if (inFile == stdin){
lastClusterBPos = 0;
}else{
lastClusterBPos = Util::ftell(inFile);
}
DONTEVEN_MSG("Found a cluster at position %llu", lastClusterBPos);
}
if (E.getID() == EBML::EID_TIMECODE){
lastClusterTime = E.getValUInt();
DONTEVEN_MSG("Cluster time %llu ms", lastClusterTime);
}
return true;
}
@ -96,6 +152,13 @@ namespace Mist{
swapEndianness.insert(it->first);
}
}
if (myMeta.inputLocalVars.isMember("timescale")){
timeScale = ((double)myMeta.inputLocalVars["timescale"].asInt()) / 1000000.0;
}
if (myMeta.inputLocalVars.isMember("maxframeoffset")){
maxEBMLFrameOffset = myMeta.inputLocalVars["maxframeoffset"].asInt();
frameOffsetKnown = true;
}
return true;
}
@ -131,6 +194,10 @@ namespace Mist{
tmpElem = E.findChild(EBML::EID_CODECPRIVATE);
if (tmpElem){init = tmpElem.getValString();}
}
if (codec == "V_AV1"){
trueCodec = "AV1";
trueType = "video";
}
if (codec == "V_VP9"){
trueCodec = "VP9";
trueType = "video";
@ -191,6 +258,20 @@ namespace Mist{
trueCodec = "FLOAT";
trueType = "audio";
}
if (codec == "M_JSON"){
trueCodec = "JSON";
trueType = "meta";
}
if (codec == "S_TEXT/UTF8"){
trueCodec = "subtitle";
trueType = "meta";
}
if (codec == "S_TEXT/ASS" || codec == "S_TEXT/SSA"){
trueCodec = "subtitle";
trueType = "meta";
tmpElem = E.findChild(EBML::EID_CODECPRIVATE);
if (tmpElem){init = tmpElem.getValString();}
}
if (codec == "A_MS/ACM"){
tmpElem = E.findChild(EBML::EID_CODECPRIVATE);
if (tmpElem){
@ -248,6 +329,13 @@ namespace Mist{
}
INFO_MSG("Detected track: %s", Trk.getIdentifier().c_str());
}
if (E.getID() == EBML::EID_TIMECODESCALE){
uint64_t timeScaleVal = E.getValUInt();
myMeta.inputLocalVars["timescale"] = (long long)timeScaleVal;
timeScale = ((double)timeScaleVal) / 1000000.0;
}
//Live streams stop parsing the header as soon as the first Cluster is encountered
if (E.getID() == EBML::EID_CLUSTER && !needsLock()){return true;}
if (E.getType() == EBML::ELEM_BLOCK){
EBML::Block B(ptr);
uint64_t tNum = B.getTrackNum();
@ -255,21 +343,32 @@ namespace Mist{
trackPredictor &TP = packBuf[tNum];
DTSC::Track &Trk = myMeta.tracks[tNum];
bool isVideo = (Trk.type == "video");
bool isAudio = (Trk.type == "audio");
bool isASS = (Trk.codec == "subtitle" && Trk.init.size());
for (uint64_t frameNo = 0; frameNo < B.getFrameCount(); ++frameNo){
if (frameNo){
if (Trk.codec == "AAC"){
newTime += 1000000 / Trk.rate;//assume ~1000 samples per frame
newTime += (1000000 / Trk.rate)/timeScale;//assume ~1000 samples per frame
} else if (Trk.codec == "MP3"){
newTime += (1152000 / Trk.rate)/timeScale;//1152 samples per frame
}else{
newTime += 1/timeScale;
ERROR_MSG("Unknown frame duration for codec %s - timestamps WILL be wrong!", Trk.codec.c_str());
}
}
uint32_t frameSize = B.getFrameSize(frameNo);
if (isASS){
char * ptr = (char *)B.getFrameData(frameNo);
std::string assStr = ASStoSRT(ptr, frameSize);
frameSize = assStr.size();
}
if (frameSize){
TP.add(newTime, 0, tNum, frameSize, lastClusterBPos,
B.isKeyframe() && isVideo);
TP.add(newTime*timeScale, 0, tNum, frameSize, lastClusterBPos,
B.isKeyframe() && !isAudio, isVideo);
}
}
while (TP.hasPackets()){
while (TP.hasPackets() && (isVideo || frameOffsetKnown)){
frameOffsetKnown = true;
packetData &C = TP.getPacketData(isVideo);
myMeta.update(C.time, C.offset, C.track, C.dsize, C.bpos, C.key);
TP.remove();
@ -289,6 +388,8 @@ namespace Mist{
}
}
myMeta.inputLocalVars["maxframeoffset"] = (long long)maxEBMLFrameOffset;
bench = Util::getMicros(bench);
INFO_MSG("Header generated in %llu ms", bench / 1000);
packBuf.clear();
@ -386,19 +487,31 @@ namespace Mist{
trackPredictor &TP = packBuf[tNum];
DTSC::Track & Trk = myMeta.tracks[tNum];
bool isVideo = (Trk.type == "video");
bool isAudio = (Trk.type == "audio");
bool isASS = (Trk.codec == "subtitle" && Trk.init.size());
for (uint64_t frameNo = 0; frameNo < B.getFrameCount(); ++frameNo){
if (frameNo){
if (Trk.codec == "AAC"){
newTime += 1000000 / Trk.rate;//assume ~1000 samples per frame
newTime += (1000000 / Trk.rate)/timeScale;//assume ~1000 samples per frame
} else if (Trk.codec == "MP3"){
newTime += (1152000 / Trk.rate)/timeScale;//1152 samples per frame
}else{
ERROR_MSG("Unknown frame duration for codec %s - timestamps WILL be wrong!", Trk.codec.c_str());
}
}
uint32_t frameSize = B.getFrameSize(frameNo);
if (frameSize){
TP.add(newTime, 0, tNum, frameSize, lastClusterBPos,
B.isKeyframe() && isVideo, (void *)B.getFrameData(frameNo));
++bufferedPacks;
char * ptr = (char *)B.getFrameData(frameNo);
if (isASS){
std::string assStr = ASStoSRT(ptr, frameSize);
frameSize = assStr.size();
memcpy(ptr, assStr.data(), frameSize);
}
if (frameSize){
TP.add(newTime*timeScale, 0, tNum, frameSize, lastClusterBPos,
B.isKeyframe() && !isAudio, isVideo, (void *)ptr);
++bufferedPacks;
}
}
}
if (TP.hasPackets()){
@ -416,10 +529,29 @@ namespace Mist{
void InputEBML::seek(int seekTime){
packBuf.clear();
bufferedPacks = 0;
DTSC::Track Trk = myMeta.tracks[getMainSelectedTrack()];
uint64_t mainTrack = getMainSelectedTrack();
DTSC::Track Trk = myMeta.tracks[mainTrack];
bool isVideo = (Trk.type == "video");
uint64_t seekPos = Trk.keys[0].getBpos();
// Replay the parts of the previous keyframe, so the timestaps match up
uint64_t partCount = 0;
for (unsigned int i = 0; i < Trk.keys.size(); i++){
if (Trk.keys[i].getTime() > seekTime){break;}
if (Trk.keys[i].getTime() > seekTime){
if (i > 1){
partCount -= Trk.keys[i-1].getParts() + Trk.keys[i-2].getParts();
uint64_t partEnd = partCount + Trk.keys[i-2].getParts();
uint64_t partTime = Trk.keys[i-2].getTime();
for (uint64_t prt = partCount; prt < partEnd; ++prt){
INSANE_MSG("Replay part %llu, timestamp: %llu+%llu", prt, partTime, Trk.parts[prt].getOffset());
packBuf[mainTrack].add(partTime, Trk.parts[prt].getOffset(), mainTrack, 0, 0, false, isVideo, (void *)0);
packBuf[mainTrack].remove();
partTime += Trk.parts[prt].getDuration();
}
}
break;
}
partCount += Trk.keys[i].getParts();
DONTEVEN_MSG("Seeking to %lu, found %llu...", seekTime, Trk.keys[i].getTime());
seekPos = Trk.keys[i].getBpos();
}
Util::fseek(inFile, seekPos, SEEK_SET);

View file

@ -3,6 +3,11 @@
namespace Mist{
extern uint16_t maxEBMLFrameOffset;
extern bool frameOffsetKnown;
#define PKT_COUNT 64
class packetData{
public:
uint64_t time, offset, track, dsize, bpos;
@ -33,7 +38,7 @@ namespace Mist{
};
class trackPredictor{
public:
packetData pkts[16];
packetData pkts[PKT_COUNT];
uint16_t smallestFrame;
uint64_t lastTime;
uint64_t ctr;
@ -48,31 +53,49 @@ namespace Mist{
if (finished){
return (ctr - rem > 0);
}else{
return (ctr - rem > 8);
return (ctr - rem > 12);
}
}
packetData & getPacketData(bool mustCalcOffsets){
packetData & p = pkts[rem % 16];
if (rem && mustCalcOffsets){
if (p.time > lastTime + smallestFrame){
while (p.time - (lastTime + smallestFrame) > smallestFrame * 8){
lastTime += smallestFrame;
}
p.offset = p.time - (lastTime + smallestFrame);
p.time = lastTime + smallestFrame;
frameOffsetKnown = true;
//grab the next packet to output
packetData & p = pkts[rem % PKT_COUNT];
//Substract the max frame offset, so we know all offsets are positive, no matter what.
//if it's not the first and we're calculating offsets, see if we need an offset
if (!mustCalcOffsets){
p.time += maxEBMLFrameOffset;
DONTEVEN_MSG("Outputting %llu + %llu (%llu -> %llu)", p.time, maxEBMLFrameOffset, rem, rem % PKT_COUNT);
return p;
}else{
if (rem && !p.key){
p.offset = p.time + maxEBMLFrameOffset - (lastTime + smallestFrame);
//If we calculate an offset less than a frame away,
//we assume it's just time stamp drift due to lack of precision.
p.time = (lastTime + smallestFrame);
}else{
p.offset = maxEBMLFrameOffset;
}
}
lastTime = p.time;
return p;
}
void add(uint64_t packTime, uint64_t packOffset, uint64_t packTrack, uint64_t packDataSize, uint64_t packBytePos, bool isKeyframe, void * dataPtr = 0){
if (ctr && ctr > rem){
if ((pkts[(ctr-1)%16].time < packTime - 2) && (!smallestFrame || packTime - pkts[(ctr-1)%16].time < smallestFrame)){
smallestFrame = packTime - pkts[(ctr-1)%16].time;
void add(uint64_t packTime, uint64_t packOffset, uint64_t packTrack, uint64_t packDataSize, uint64_t packBytePos, bool isKeyframe, bool isVideo, void * dataPtr = 0){
if (isVideo && ctr && ctr >= rem){
int32_t currOffset = packTime - pkts[(ctr-1)%PKT_COUNT].time;
if (currOffset < 0){currOffset *= -1;}
if (!smallestFrame || currOffset < smallestFrame){
smallestFrame = currOffset;
HIGH_MSG("Smallest frame is now %u", smallestFrame);
}
if (!frameOffsetKnown && currOffset < 8*smallestFrame && currOffset*2 > maxEBMLFrameOffset && ctr < PKT_COUNT/2){
maxEBMLFrameOffset = currOffset*2;
INFO_MSG("Max frame offset is now %u", maxEBMLFrameOffset);
}
}
pkts[ctr % 16].set(packTime, packOffset, packTrack, packDataSize, packBytePos, isKeyframe, dataPtr);
DONTEVEN_MSG("Ingesting %llu (%llu -> %llu)", packTime, ctr, ctr % PKT_COUNT);
pkts[ctr % PKT_COUNT].set(packTime, packOffset, packTrack, packDataSize, packBytePos, isKeyframe, dataPtr);
++ctr;
if (ctr == PKT_COUNT-1){frameOffsetKnown = true;}
}
void remove(){
++rem;
@ -83,7 +106,7 @@ namespace Mist{
class InputEBML : public Input{
public:
InputEBML(Util::Config *cfg);
bool needsLock();
protected:
void fillPacket(packetData & C);
bool checkArguments();
@ -101,6 +124,12 @@ namespace Mist{
std::map<uint64_t, trackPredictor> packBuf;
std::set<uint64_t> swapEndianness;
bool readExistingHeader();
void parseStreamHeader(){
readHeader();
}
bool openStreamSource(){return true;}
bool needHeader(){return needsLock();}
double timeScale;
};
}