diff --git a/lib/rtp.cpp b/lib/rtp.cpp index 429a06f1..c0157723 100644 --- a/lib/rtp.cpp +++ b/lib/rtp.cpp @@ -107,24 +107,27 @@ namespace RTP { } } - void Packet::sendAAC(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel) { + void Packet::sendData(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel, std::string codec) { /// \todo This function probably belongs in DMS somewhere. data[1] |= 0x80;//setting the RTP marker bit to 1 - /// \todo This 0x100000 value - What is it? Why is it hardcoded? - /// \todo The least significant 3 bits are used to signal some stuff from RFC 3640. Why do we send them always as 000? - *((int *)(data + getHsize())) = htonl(((payloadlen << 3) & 0x0010fff8) | 0x00100000); - memcpy(data + getHsize() + 4, payload, payloadlen); - callBack(socket, data, getHsize() + 4 + payloadlen, channel); - sentPackets++; - sentBytes += payloadlen; - increaseSequence(); - } - - void Packet::sendRaw(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel) { - /// \todo This function probably belongs in DMS somewhere. - data[1] |= 0x80;//setting the RTP marker bit to 1 - memcpy(data + getHsize(), payload, payloadlen); - callBack(socket, data, getHsize() + payloadlen, channel); + long offsetLen = 0; + if (codec == "AAC"){ + INFO_MSG("send AAC codec"); + *((long *)(data + getHsize())) = htonl(((payloadlen << 3) & 0x0010fff8) | 0x00100000); + offsetLen = 4; + }else if (codec == "MP3"){ + INFO_MSG("send MP3 codec"); + *((long *)(data + getHsize())) = 0;//this is MBZ and Frag_Offset, which is always 0 + offsetLen = 4; + }else if (codec == "AC3"){ + INFO_MSG("send AC3 codec"); + *((short *)(data + getHsize())) = htons(0x0001) ;//this is 6 bits MBZ, 2 bits FT = 0 = full frames and 8 bits saying we send 1 frame + offsetLen = 2; + }else{ + INFO_MSG("send Raw"); + } + memcpy(data + getHsize() + offsetLen, payload, payloadlen); + callBack(socket, data, getHsize() + offsetLen + payloadlen, channel); sentPackets++; sentBytes += payloadlen; increaseSequence(); @@ -162,12 +165,12 @@ namespace RTP { ((int *)rtcpData)[2] = htonl(2208988800UL + Util::epoch()); //epoch is in seconds ((int *)rtcpData)[3] = htonl((Util::getMS() % 1000) * 4294967.295); - if (metadata.tracks[tid].codec == "H264") { + if (metadata.tracks[tid].codec == "H264" || metadata.tracks[tid].codec == "MP3") { ((int *)rtcpData)[4] = htonl((ntpTime - 0) * 90000); //rtpts - } else if (metadata.tracks[tid].codec == "AAC") { + } else if (metadata.tracks[tid].codec == "AAC" || metadata.tracks[tid].codec == "AC3") { ((int *)rtcpData)[4] = htonl((ntpTime - 0) * metadata.tracks[tid].rate); //rtpts } else { - DEBUG_MSG(DLVL_FAIL, "Unsupported codec"); + DEBUG_MSG(DLVL_FAIL, "Unsupported codec: %s", metadata.tracks[tid].codec.c_str()); return; } //it should be the time packet was sent maybe, after all? diff --git a/lib/rtp.h b/lib/rtp.h index 835ea170..e1b78914 100644 --- a/lib/rtp.h +++ b/lib/rtp.h @@ -45,8 +45,7 @@ namespace RTP { void setTimestamp(unsigned int t); void increaseSequence(); void sendH264(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel); - void sendAAC(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel); - void sendRaw(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel); + void sendData(void * socket, void callBack(void *, char *, unsigned int, unsigned int), const char * payload, unsigned int payloadlen, unsigned int channel, std::string codec); void sendRTCP(long long & connectedAt, void * socket, unsigned int tid, DTSC::Meta & metadata, void callBack(void *, char *, unsigned int, unsigned int)); diff --git a/src/input/input_mp4.cpp b/src/input/input_mp4.cpp index 278ed740..a9c3bf4b 100644 --- a/src/input/input_mp4.cpp +++ b/src/input/input_mp4.cpp @@ -71,6 +71,7 @@ namespace Mist { sttsBox.clear(); sttsBox.read(tmp); }else if (stblBoxType == "ctts"){ + ///\todo this box should not have to be read, since its information is taken from the DTSH tmp = std::string(stblLoopPeek.asBox() ,stblLoopPeek.boxedSize()); cttsBox.clear(); cttsBox.read(tmp); @@ -341,6 +342,8 @@ namespace Mist { MP4::STSZ stszBox; MP4::STCO stcoBox; MP4::STSC stscBox; + MP4::CTTS cttsBox;//optional ctts box + bool hasCTTS = false; for (uint32_t m = 0; m < ((MP4::STBL&)minfLoopPeek).getContentCount(); m++){ tmp = std::string(((MP4::STBL&)minfLoopPeek).getContent(m).asBox(),((MP4::STBL&)minfLoopPeek).getContent(m).boxedSize()); std::string stboxRead = tmp; @@ -356,6 +359,9 @@ namespace Mist { stcoBox.read(stboxRead); }else if (stblBoxType == "stsc"){ stscBox.read(stboxRead); + }else if (stblBoxType == "ctts"){ + cttsBox.read(stboxRead); + hasCTTS = true; }else if (stblBoxType == "stsd"){ //check for codec in here MP4::Box & tmpBox = ((MP4::STSD&)stblLoopPeek).getEntry(0); @@ -390,18 +396,15 @@ namespace Mist { myMeta.tracks[trackNo].codec = "AC3"; }else{ MP4::Box esds = ((MP4::AudioSampleEntry&)tmpBox).getCodecBox(); - if (((MP4::ESDS&)esds).isAAC()){ - myMeta.tracks[trackNo].codec = "AAC"; - myMeta.tracks[trackNo].init = ((MP4::ESDS&)esds).getInitData(); - }else{ - myMeta.tracks[trackNo].codec = "MP3"; - } + myMeta.tracks[trackNo].codec = ((MP4::ESDS&)esds).getCodec(); + myMeta.tracks[trackNo].init = ((MP4::ESDS&)esds).getInitData(); } myMeta.tracks[trackNo].size = 16;///\todo this might be nice to calculate from mp4 file; //get Visual sample entry -> esds -> startcodes }else{ myMeta.tracks.erase(trackNo); } + } }//rof stbl uint64_t totaldur = 0;///\todo note: set this to begin time @@ -415,6 +418,10 @@ namespace Mist { //change to for over all samples unsigned int stcoIndex = 0; unsigned int stscIndex = 0; + unsigned int cttsIndex = 0;//current ctts Index we are reading + unsigned int cttsEntryRead = 0;//current part of ctts we are reading + MP4::CTTSEntry cttsEntry; + unsigned int fromSTCOinSTSC = 0; long long unsigned int tempOffset; bool stcoIs64 = (stcoBox.getType() == "co64"); @@ -479,6 +486,19 @@ namespace Mist { tempSTTS = sttsBox.getSTTSEntry(entryNo); } } + //set time offset + if (hasCTTS){ + + cttsEntry = cttsBox.getCTTSEntry(cttsIndex); + cttsEntryRead++; + if (cttsEntryRead >= cttsEntry.sampleCount){ + cttsIndex++; + cttsEntryRead = 0; + } + BsetPart.timeOffset = (cttsEntry.sampleOffset * 1000)/timeScale; + }else{ + BsetPart.timeOffset = 0; + } //set size, that's easy BsetPart.size = stszBox.getEntrySize(sampleIndex); //trackid @@ -523,7 +543,7 @@ namespace Mist { int tmp = fread(data, it->size, 1, inFile); if (tmp == 1){ //add data - myMeta.update(it->time, 1, it->trackID, it->size, it->bpos, it->keyframe); + myMeta.update(it->time, it->timeOffset, it->trackID, it->size, it->bpos, it->keyframe); }else{ INFO_MSG("fread did not return 1, bpos: %llu size: %llu keyframe: %d error: %s", it->bpos, it->size, it->keyframe, strerror(errno)); return false; diff --git a/src/input/input_mp4.h b/src/input/input_mp4.h index 24a530a0..15e34180 100644 --- a/src/input/input_mp4.h +++ b/src/input/input_mp4.h @@ -44,6 +44,7 @@ namespace Mist { long long unsigned int bpos; long long unsigned int size; long long unsigned int stcoNr; + long unsigned int timeOffset; bool keyframe; }; diff --git a/src/output/output_dash_mp4.cpp b/src/output/output_dash_mp4.cpp index fa40e49e..bf719d56 100644 --- a/src/output/output_dash_mp4.cpp +++ b/src/output/output_dash_mp4.cpp @@ -113,7 +113,7 @@ namespace Mist { hev1Box.setCLAP(hvccBox); stsdBox.setEntry(hev1Box, 0); } - if (myMeta.tracks[tid].codec == "AAC"){ + if (myMeta.tracks[tid].codec == "AAC" || myMeta.tracks[tid].codec == "MP3"){ MP4::AudioSampleEntry ase; ase.setCodec("mp4a"); ase.setDataReferenceIndex(1); @@ -284,7 +284,7 @@ namespace Mist { if (myMeta.tracks[tid].codec == "H264" || myMeta.tracks[tid].codec == "HEVC"){ tfhdBox.setTrackID(1); } - if (myMeta.tracks[tid].codec == "AAC"){ + if (myMeta.tracks[tid].codec == "AAC" || myMeta.tracks[tid].codec == "AC3" || myMeta.tracks[tid].codec == "MP3"){ tfhdBox.setFlags(MP4::tfhdSampleFlag); tfhdBox.setTrackID(1); tfhdBox.setDefaultSampleFlags(MP4::isKeySample); @@ -347,7 +347,7 @@ namespace Mist { i++; } } - if (myMeta.tracks[tid].codec == "AAC"){ + if (myMeta.tracks[tid].codec == "AAC" || myMeta.tracks[tid].codec == "AC3" || myMeta.tracks[tid].codec == "MP3"){ trunBox.setFlags(MP4::trundataOffset | MP4::trunsampleSize | MP4::trunsampleDuration); trunBox.setDataOffset(88 + (8 * myMeta.tracks[tid].keys[keyNum].getParts()) + 8); for (int j = 0; j < myMeta.tracks[tid].keys[keyNum].getParts(); j++){ @@ -434,6 +434,7 @@ namespace Mist { int lastAudTime = 0; int audKeys = 0; int audInitTrack = 0; + ///\todo Dash automatically selects the last audio and video track for manifest, maybe make this expandable/selectable? for (std::map::iterator it = myMeta.tracks.begin(); it != myMeta.tracks.end(); it ++){ if (it->second.lastms > lastTime){ lastTime = it->second.lastms; @@ -448,7 +449,7 @@ namespace Mist { vidKeys = it->second.keys.size(); vidInitTrack = it->first; } - if (it->second.codec == "AAC" && it->second.lastms > lastAudTime){ + if ((it->second.codec == "AAC" || it->second.codec == "MP3" || it->second.codec == "AC3")&& it->second.lastms > lastAudTime){ lastAudTime = it->second.lastms; audKeys = it->second.keys.size(); audInitTrack = it->first; @@ -518,10 +519,17 @@ namespace Mist { r << " " << std::endl; for (std::map::iterator it = myMeta.tracks.begin(); it != myMeta.tracks.end(); it++){ - if (it->second.codec == "AAC"){ + if (it->second.codec == "AAC" || it->second.codec == "MP3" || it->second.codec == "AC3"){ r << " first << "\" "; - r << "codecs=\"mp4a.40.2\" "; + // (see RFC6381): sample description entry , ObjectTypeIndication [MP4RA, RFC], ObjectTypeIndication [MP4A ISO/IEC 14496-3:2009] + if (it->second.codec == "AAC" ){ + r << "codecs=\"mp4a.40.2\" "; + }else if (it->second.codec == "MP3" ){ + r << "codecs=\"mp4a.40.34\" "; + }else if (it->second.codec == "AC3" ){ + r << "codecs=\"ec-3\" "; + } r << "audioSamplingRate=\"" << it->second.rate << "\" "; r << "bandwidth=\"" << it->second.bps << "\">" << std::endl; r << " second.channels << "\" />" << std::endl; @@ -547,6 +555,7 @@ namespace Mist { capa["codecs"][0u][0u].append("HEVC"); capa["codecs"][0u][1u].append("AAC"); capa["codecs"][0u][1u].append("AC3"); + capa["codecs"][0u][1u].append("MP3"); capa["methods"][0u]["handler"] = "http"; capa["methods"][0u]["type"] = "dash/video/mp4"; capa["methods"][0u]["priority"] = 8ll; diff --git a/src/output/output_rtsp.cpp b/src/output/output_rtsp.cpp index dfb2b7cc..5fa9bfba 100644 --- a/src/output/output_rtsp.cpp +++ b/src/output/output_rtsp.cpp @@ -87,15 +87,15 @@ namespace Mist { callBack = sendTCP; } - if(myMeta.tracks[tid].codec == "AAC"){ - tracks[tid].rtpPacket.setTimestamp(timestamp * ((double) myMeta.tracks[tid].rate / 1000.0)); - tracks[tid].rtpPacket.sendAAC(socket, callBack, dataPointer, dataLen, tracks[tid].channel); + if(myMeta.tracks[tid].codec == "MP3"){ + tracks[tid].rtpPacket.setTimestamp(timestamp * 90); + tracks[tid].rtpPacket.sendData(socket, callBack, dataPointer, dataLen, tracks[tid].channel, "MP3"); return; } - if(myMeta.tracks[tid].codec == "MP3" || myMeta.tracks[tid].codec == "AC3"){ + if( myMeta.tracks[tid].codec == "AC3" || myMeta.tracks[tid].codec == "AAC"){ tracks[tid].rtpPacket.setTimestamp(timestamp * ((double) myMeta.tracks[tid].rate / 1000.0)); - tracks[tid].rtpPacket.sendRaw(socket, callBack, dataPointer, dataLen, tracks[tid].channel); + tracks[tid].rtpPacket.sendData(socket, callBack, dataPointer, dataLen, tracks[tid].channel,myMeta.tracks[tid].codec); return; } @@ -220,6 +220,7 @@ namespace Mist { //loop over all tracks, add them to the SDP. /// \todo Make sure this works correctly for multibitrate streams. for (std::map::iterator objIt = myMeta.tracks.begin(); objIt != myMeta.tracks.end(); objIt ++) { + INFO_MSG("Codec: %s", objIt->second.codec.c_str()); if (objIt->second.codec == "H264") { MP4::AVCC avccbox; avccbox.setPayload(objIt->second.init); @@ -248,11 +249,12 @@ namespace Mist { transportString << "; mode=AAC-hbr; SizeLength=13; IndexLength=3; IndexDeltaLength=3;\r\n" "a=control:track" << objIt->second.trackID << "\r\n"; }else if (objIt->second.codec == "MP3") { - transportString << "m=" << objIt->second.type << " 0 RTP/AVP 96" << "\r\n" - "a=rtpmap:14 MPA/" << objIt->second.rate << "/" << objIt->second.channels << "\r\n" - //"a=fmtp:96 streamtype=5; profile-level-id=15;"; - //these values are described in RFC 3640 - //transportString << " mode=AAC-hbr; SizeLength=13; IndexLength=3; IndexDeltaLength=3;\r\n" + transportString << "m=" << objIt->second.type << " 0 RTP/AVP 14" << "\r\n" + "a=rtpmap:14 MPA/90000/" << objIt->second.channels << "\r\n" + "a=control:track" << objIt->second.trackID << "\r\n"; + }else if ( objIt->second.codec == "AC3") { + transportString << "m=" << objIt->second.type << " 0 RTP/AVP 100" << "\r\n" + "a=rtpmap:100 AC3/" << objIt->second.rate << "/" << objIt->second.channels << "\r\n" "a=control:track" << objIt->second.trackID << "\r\n"; } }//for tracks iterator @@ -268,8 +270,12 @@ namespace Mist { unsigned int SSrc = rand(); if (myMeta.tracks[trId].codec == "H264") { tracks[trId].rtpPacket = RTP::Packet(97, 1, 0, SSrc); - }else if(myMeta.tracks[trId].codec == "AAC" || myMeta.tracks[trId].codec == "MP3"){ + }else if(myMeta.tracks[trId].codec == "AAC"){ tracks[trId].rtpPacket = RTP::Packet(96, 1, 0, SSrc); + }else if(myMeta.tracks[trId].codec == "AC3"){ + tracks[trId].rtpPacket = RTP::Packet(100, 1, 0, SSrc); + }else if(myMeta.tracks[trId].codec == "MP3"){ + tracks[trId].rtpPacket = RTP::Packet(14, 1, 0, SSrc); }else{ DEBUG_MSG(DLVL_FAIL,"Unsupported codec for RTSP: %s",myMeta.tracks[trId].codec.c_str()); }