1#![cfg_attr(feature = "docs", doc = "\n\nSee the [changelog][changelog] for a full release history.")]
3#![cfg_attr(feature = "docs", doc = "## Feature flags")]
4#![cfg_attr(feature = "docs", doc = document_features::document_features!())]
5#![allow(clippy::single_match)]
12#![deny(unsafe_code)]
14#![deny(unreachable_pub)]
15
16use std::collections::VecDeque;
17use std::fmt::Debug;
18use std::io;
19
20use byteorder::{BigEndian, ReadBytesExt};
21use bytes::{Buf, Bytes};
22use isobmff::boxes::{
23 Brand, ChunkOffsetBox, FileTypeBox, HandlerBox, HandlerType, MediaBox, MediaDataBox, MediaHeaderBox,
24 MediaInformationBox, MovieBox, MovieExtendsBox, MovieFragmentBox, MovieFragmentHeaderBox, MovieHeaderBox,
25 SampleDescriptionBox, SampleSizeBox, SampleTableBox, SampleToChunkBox, SoundMediaHeaderBox, TimeToSampleBox, TrackBox,
26 TrackExtendsBox, TrackFragmentBaseMediaDecodeTimeBox, TrackFragmentBox, TrackFragmentHeaderBox, TrackHeaderBox,
27 TrackRunBox, VideoMediaHeaderBox,
28};
29use isobmff::{IsoSized, UnknownBox};
30use scuffle_bytes_util::zero_copy::Serialize;
31use scuffle_flv::audio::AudioData;
32use scuffle_flv::audio::body::AudioTagBody;
33use scuffle_flv::audio::body::legacy::LegacyAudioTagBody;
34use scuffle_flv::audio::body::legacy::aac::AacAudioData;
35use scuffle_flv::audio::header::AudioTagHeader;
36use scuffle_flv::audio::header::legacy::{LegacyAudioTagHeader, SoundType};
37use scuffle_flv::script::{OnMetaData, ScriptData};
38use scuffle_flv::tag::{FlvTag, FlvTagData};
39use scuffle_flv::video::VideoData;
40use scuffle_flv::video::body::VideoTagBody;
41use scuffle_flv::video::body::enhanced::{ExVideoTagBody, VideoPacket, VideoPacketCodedFrames, VideoPacketSequenceStart};
42use scuffle_flv::video::body::legacy::LegacyVideoTagBody;
43use scuffle_flv::video::header::enhanced::VideoFourCc;
44use scuffle_flv::video::header::legacy::{LegacyVideoTagHeader, LegacyVideoTagHeaderAvcPacket};
45use scuffle_flv::video::header::{VideoFrameType, VideoTagHeader, VideoTagHeaderData};
46use scuffle_h264::Sps;
47
48mod codecs;
49mod define;
50mod errors;
51
52pub use define::*;
53pub use errors::TransmuxError;
54
55struct Tags<'a> {
56 video_sequence_header: Option<VideoSequenceHeader<'a>>,
57 audio_sequence_header: Option<AudioSequenceHeader>,
58 scriptdata_tag: Option<OnMetaData<'a>>,
59}
60
61#[derive(Debug, Clone)]
62pub struct Transmuxer<'a> {
63 audio_duration: u64,
66 video_duration: u64,
68 sequence_number: u32,
69 last_video_timestamp: u32,
70 settings: Option<(VideoSettings, AudioSettings)>,
71 tags: VecDeque<FlvTag<'a>>,
72}
73
74impl Default for Transmuxer<'_> {
75 fn default() -> Self {
76 Self::new()
77 }
78}
79
80impl<'a> Transmuxer<'a> {
81 pub fn new() -> Self {
82 Self {
83 sequence_number: 1,
84 tags: VecDeque::new(),
85 audio_duration: 0,
86 video_duration: 0,
87 last_video_timestamp: 0,
88 settings: None,
89 }
90 }
91
92 pub fn demux(&mut self, data: Bytes) -> Result<(), TransmuxError> {
94 let mut cursor = io::Cursor::new(data);
95 while cursor.has_remaining() {
96 cursor.read_u32::<BigEndian>()?; if !cursor.has_remaining() {
98 break;
99 }
100
101 let tag = FlvTag::demux(&mut cursor)?;
102 self.tags.push_back(tag);
103 }
104
105 Ok(())
106 }
107
108 pub fn add_tag(&mut self, tag: FlvTag<'a>) {
110 self.tags.push_back(tag);
111 }
112
113 pub fn mux(&mut self) -> Result<Option<TransmuxResult>, TransmuxError> {
116 let mut writer = Vec::new();
117
118 let Some((video_settings, _)) = &self.settings else {
119 let Some((video_settings, audio_settings)) = self.init_sequence(&mut writer)? else {
120 if self.tags.len() > 30 {
121 return Err(TransmuxError::NoSequenceHeaders);
123 }
124
125 return Ok(None);
127 };
128
129 self.settings = Some((video_settings.clone(), audio_settings.clone()));
130
131 return Ok(Some(TransmuxResult::InitSegment {
132 data: Bytes::from(writer),
133 audio_settings,
134 video_settings,
135 }));
136 };
137
138 loop {
139 let Some(tag) = self.tags.pop_front() else {
140 return Ok(None);
141 };
142
143 let mdat_data;
144 let total_duration;
145 let trun_sample;
146 let mut is_audio = false;
147 let mut is_keyframe = false;
148
149 let duration =
150 if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp {
151 1000 } else {
154 let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64;
164 let expected_delta = 1000.0 / video_settings.framerate;
165 if (delta - expected_delta).abs() <= 1.0 {
166 1000
167 } else {
168 (delta * video_settings.framerate) as u32
169 }
170 };
171
172 match tag.data {
173 FlvTagData::Audio(AudioData {
174 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::Raw(data))),
175 ..
176 }) => {
177 let (sample, duration) = codecs::aac::trun_sample(&data)?;
178
179 trun_sample = sample;
180 mdat_data = data;
181 total_duration = duration;
182 is_audio = true;
183 }
184 FlvTagData::Video(VideoData {
185 header:
186 VideoTagHeader {
187 frame_type,
188 data:
189 VideoTagHeaderData::Legacy(LegacyVideoTagHeader::AvcPacket(
190 LegacyVideoTagHeaderAvcPacket::Nalu { composition_time_offset },
191 )),
192 },
193 body: VideoTagBody::Legacy(LegacyVideoTagBody::Other { data }),
194 ..
195 }) => {
196 let composition_time =
197 ((composition_time_offset as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
198
199 let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?;
200
201 trun_sample = sample;
202 total_duration = duration;
203 mdat_data = data;
204
205 is_keyframe = frame_type == VideoFrameType::KeyFrame;
206 }
207 FlvTagData::Video(VideoData {
208 header: VideoTagHeader { frame_type, .. },
209 body:
210 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
211 video_four_cc: VideoFourCc::Av1,
212 packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(data)),
213 }),
214 ..
215 }) => {
216 let sample = codecs::av1::trun_sample(frame_type, duration, &data)?;
217
218 trun_sample = sample;
219 total_duration = duration;
220 mdat_data = data;
221
222 is_keyframe = frame_type == VideoFrameType::KeyFrame;
223 }
224 FlvTagData::Video(VideoData {
225 header: VideoTagHeader { frame_type, .. },
226 body:
227 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
228 video_four_cc: VideoFourCc::Hevc,
229 packet,
230 }),
231 ..
232 }) => {
233 let (composition_time, data) = match packet {
234 VideoPacket::CodedFrames(VideoPacketCodedFrames::Hevc {
235 composition_time_offset,
236 data,
237 }) => (Some(composition_time_offset), data),
238 VideoPacket::CodedFramesX { data } => (None, data),
239 _ => continue,
240 };
241
242 let composition_time =
243 ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
244
245 let sample = codecs::hevc::trun_sample(frame_type, composition_time as i32, duration, &data)?;
246
247 trun_sample = sample;
248 total_duration = duration;
249 mdat_data = data;
250
251 is_keyframe = frame_type == VideoFrameType::KeyFrame;
252 }
253 _ => {
254 continue;
256 }
257 }
258
259 let trafs = {
260 let (main_duration, main_id) = if is_audio {
261 (self.audio_duration, 2)
262 } else {
263 (self.video_duration, 1)
264 };
265
266 let traf = TrackFragmentBox {
267 tfhd: TrackFragmentHeaderBox::new(main_id, None, None, None, None, None),
268 trun: vec![TrackRunBox::new(vec![trun_sample], None)],
269 sbgp: vec![],
270 sgpd: vec![],
271 subs: vec![],
272 saiz: vec![],
273 saio: vec![],
274 tfdt: Some(TrackFragmentBaseMediaDecodeTimeBox::new(main_duration)),
275 meta: None,
276 udta: None,
277 };
278 vec![traf]
281 };
282
283 let mut moof = MovieFragmentBox {
284 mfhd: MovieFragmentHeaderBox::new(self.sequence_number),
285 meta: None,
286 traf: trafs,
287 udta: None,
288 };
289
290 let moof_size = moof.size();
292
293 let traf = moof.traf.first_mut().expect("we just created the moof with a traf");
297
298 let trun = traf.trun.first_mut().expect("we just created the video traf with a trun");
300
301 trun.data_offset = Some(moof_size as i32 + 8);
305
306 moof.serialize(&mut writer)?;
308
309 MediaDataBox::new(mdat_data.into()).serialize(&mut writer)?;
311
312 self.sequence_number += 1;
314
315 if is_audio {
316 self.audio_duration += total_duration as u64;
317 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
318 data: Bytes::from(writer),
319 ty: MediaType::Audio,
320 keyframe: false,
321 timestamp: self.audio_duration - total_duration as u64,
322 })));
323 } else {
324 self.video_duration += total_duration as u64;
325 self.last_video_timestamp = tag.timestamp_ms;
326 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
327 data: Bytes::from(writer),
328 ty: MediaType::Video,
329 keyframe: is_keyframe,
330 timestamp: self.video_duration - total_duration as u64,
331 })));
332 }
333 }
334 }
335
336 fn find_tags(&self) -> Tags<'a> {
338 let tags = self.tags.iter();
339 let mut video_sequence_header = None;
340 let mut audio_sequence_header = None;
341 let mut scriptdata_tag = None;
342
343 for tag in tags {
344 if video_sequence_header.is_some() && audio_sequence_header.is_some() && scriptdata_tag.is_some() {
345 break;
346 }
347
348 match &tag.data {
349 FlvTagData::Video(VideoData {
350 body: VideoTagBody::Legacy(LegacyVideoTagBody::AvcVideoPacketSeqHdr(data)),
351 ..
352 }) => {
353 video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone()));
354 }
355 FlvTagData::Video(VideoData {
356 body:
357 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
358 video_four_cc: VideoFourCc::Av1,
359 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Av1(config)),
360 }),
361 ..
362 }) => {
363 video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone()));
364 }
365 FlvTagData::Video(VideoData {
366 body:
367 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
368 video_four_cc: VideoFourCc::Hevc,
369 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Hevc(config)),
370 }),
371 ..
372 }) => {
373 video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone()));
374 }
375 FlvTagData::Audio(AudioData {
376 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::SequenceHeader(data))),
377 header:
378 AudioTagHeader::Legacy(LegacyAudioTagHeader {
379 sound_size, sound_type, ..
380 }),
381 ..
382 }) => {
383 audio_sequence_header = Some(AudioSequenceHeader {
384 data: AudioSequenceHeaderData::Aac(data.clone()),
385 sound_size: *sound_size,
386 sound_type: *sound_type,
387 });
388 }
389 FlvTagData::ScriptData(ScriptData::OnMetaData(metadata)) => {
390 scriptdata_tag = Some(*metadata.clone());
391 }
392 _ => {}
393 }
394 }
395
396 Tags {
397 video_sequence_header,
398 audio_sequence_header,
399 scriptdata_tag,
400 }
401 }
402
403 fn init_sequence(
405 &mut self,
406 writer: &mut impl io::Write,
407 ) -> Result<Option<(VideoSettings, AudioSettings)>, TransmuxError> {
408 let Tags {
411 video_sequence_header,
412 audio_sequence_header,
413 scriptdata_tag,
414 } = self.find_tags();
415
416 let Some(video_sequence_header) = video_sequence_header else {
417 return Ok(None);
418 };
419 let Some(audio_sequence_header) = audio_sequence_header else {
420 return Ok(None);
421 };
422
423 let video_codec;
424 let audio_codec;
425 let video_width;
426 let video_height;
427 let audio_channels;
428 let audio_sample_rate;
429 let mut video_fps = 0.0;
430
431 let mut estimated_video_bitrate = 0;
432 let mut estimated_audio_bitrate = 0;
433
434 if let Some(scriptdata_tag) = scriptdata_tag {
435 video_fps = scriptdata_tag.framerate.unwrap_or(0.0);
436 estimated_video_bitrate = scriptdata_tag.videodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
437 estimated_audio_bitrate = scriptdata_tag.audiodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
438 }
439
440 let mut compatible_brands = vec![Brand::Iso5, Brand::Iso6];
441
442 let video_stsd_entry = match video_sequence_header {
443 VideoSequenceHeader::Avc(config) => {
444 compatible_brands.push(Brand::Avc1);
445 video_codec = VideoCodec::Avc {
446 constraint_set: config.profile_compatibility,
447 level: config.level_indication,
448 profile: config.profile_indication,
449 };
450
451 let sps = Sps::parse_with_emulation_prevention(io::Cursor::new(&config.sps[0]))
452 .map_err(|_| TransmuxError::InvalidAVCDecoderConfigurationRecord)?;
453 video_width = sps.width() as u32;
454 video_height = sps.height() as u32;
455
456 let frame_rate = sps.frame_rate();
457 if let Some(frame_rate) = frame_rate {
458 video_fps = frame_rate;
459 }
460
461 UnknownBox::try_from_box(codecs::avc::stsd_entry(config, &sps)?)?
462 }
463 VideoSequenceHeader::Av1(config) => {
464 compatible_brands.push(Brand(*b"av01"));
465 let (entry, seq_obu) = codecs::av1::stsd_entry(config)?;
466
467 video_height = seq_obu.max_frame_height as u32;
468 video_width = seq_obu.max_frame_width as u32;
469
470 let op_point = &seq_obu.operating_points[0];
471
472 video_codec = VideoCodec::Av1 {
473 profile: seq_obu.seq_profile,
474 level: op_point.seq_level_idx,
475 tier: op_point.seq_tier,
476 depth: seq_obu.color_config.bit_depth as u8,
477 monochrome: seq_obu.color_config.mono_chrome,
478 sub_sampling_x: seq_obu.color_config.subsampling_x,
479 sub_sampling_y: seq_obu.color_config.subsampling_y,
480 color_primaries: seq_obu.color_config.color_primaries,
481 transfer_characteristics: seq_obu.color_config.transfer_characteristics,
482 matrix_coefficients: seq_obu.color_config.matrix_coefficients,
483 full_range_flag: seq_obu.color_config.full_color_range,
484 };
485
486 UnknownBox::try_from_box(entry)?
487 }
488 VideoSequenceHeader::Hevc(config) => {
489 compatible_brands.push(Brand(*b"hev1"));
490 video_codec = VideoCodec::Hevc {
491 constraint_indicator: config.general_constraint_indicator_flags,
492 level: config.general_level_idc,
493 profile: config.general_profile_idc,
494 profile_compatibility: config.general_profile_compatibility_flags,
495 tier: config.general_tier_flag,
496 general_profile_space: config.general_profile_space,
497 };
498
499 let (entry, sps) = codecs::hevc::stsd_entry(config)?;
500 if let Some(info) = sps.vui_parameters.as_ref().and_then(|p| p.vui_timing_info.as_ref()) {
501 video_fps = info.time_scale.get() as f64 / info.num_units_in_tick.get() as f64;
502 }
503
504 video_width = sps.cropped_width() as u32;
505 video_height = sps.cropped_height() as u32;
506
507 UnknownBox::try_from_box(entry)?
508 }
509 };
510
511 let audio_stsd_entry = match audio_sequence_header.data {
512 AudioSequenceHeaderData::Aac(data) => {
513 compatible_brands.push(Brand::Mp41);
514 let (entry, config) =
515 codecs::aac::stsd_entry(audio_sequence_header.sound_size, audio_sequence_header.sound_type, data)?;
516
517 audio_sample_rate = config.sampling_frequency;
518
519 audio_codec = AudioCodec::Aac {
520 object_type: config.audio_object_type,
521 };
522 audio_channels = match audio_sequence_header.sound_type {
523 SoundType::Mono => 1,
524 SoundType::Stereo => 2,
525 _ => return Err(TransmuxError::InvalidAudioChannels),
526 };
527
528 entry
529 }
530 };
531
532 if video_fps == 0.0 {
533 return Err(TransmuxError::InvalidVideoFrameRate);
534 }
535
536 if video_width == 0 || video_height == 0 {
537 return Err(TransmuxError::InvalidVideoDimensions);
538 }
539
540 if audio_sample_rate == 0 {
541 return Err(TransmuxError::InvalidAudioSampleRate);
542 }
543
544 let video_timescale = (1000.0 * video_fps) as u32;
550
551 FileTypeBox {
553 major_brand: Brand::Iso5,
554 minor_version: 512,
555 compatible_brands,
556 }
557 .serialize(&mut *writer)?;
558
559 MovieBox {
561 mvhd: MovieHeaderBox::new(0, 0, 1000, 0, 1),
562 meta: None,
563 trak: vec![
564 TrackBox::new(
565 TrackHeaderBox::new(0, 0, 1, 0, Some((video_width, video_height))), None, MediaBox::new(
569 MediaHeaderBox::new(0, 0, video_timescale, 0), HandlerBox::new(HandlerType::Video, "VideoHandler".to_string().into()), MediaInformationBox::new(
573 SampleTableBox::new(
575 SampleDescriptionBox::new(vec![video_stsd_entry]), TimeToSampleBox::default(), SampleToChunkBox::default(), Some(SampleSizeBox::default()), ChunkOffsetBox::default(), ),
581 Some(VideoMediaHeaderBox::default()), None, ),
584 ),
585 ),
586 TrackBox::new(
587 TrackHeaderBox::new(0, 0, 2, 0, None), None, MediaBox::new(
591 MediaHeaderBox::new(0, 0, audio_sample_rate, 0), HandlerBox::new(HandlerType::Audio, "SoundHandler".to_string().into()), MediaInformationBox::new(
595 SampleTableBox::new(
597 SampleDescriptionBox::new(vec![UnknownBox::try_from_box(audio_stsd_entry)?]), TimeToSampleBox::default(), SampleToChunkBox::default(), Some(SampleSizeBox::default()), ChunkOffsetBox::default(), ),
603 None, Some(SoundMediaHeaderBox::default()), ),
606 ),
607 ),
608 ],
609 mvex: Some(MovieExtendsBox {
610 mehd: None,
611 trex: vec![TrackExtendsBox::new(1), TrackExtendsBox::new(2)],
612 leva: None,
613 }),
614 unknown_boxes: vec![],
615 udta: None,
616 }
617 .serialize(writer)?;
618
619 Ok(Some((
620 VideoSettings {
621 width: video_width,
622 height: video_height,
623 framerate: video_fps,
624 codec: video_codec,
625 bitrate: estimated_video_bitrate,
626 timescale: video_timescale,
627 },
628 AudioSettings {
629 codec: audio_codec,
630 sample_rate: audio_sample_rate,
631 channels: audio_channels,
632 bitrate: estimated_audio_bitrate,
633 timescale: audio_sample_rate,
634 },
635 )))
636 }
637}
638
639#[cfg(feature = "docs")]
641#[scuffle_changelog::changelog]
642pub mod changelog {}
643
644#[cfg(test)]
645mod tests;