-
Notifications
You must be signed in to change notification settings - Fork 80
Fix audio decoding bug with FFmpeg4 by setting channel_layout #865
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -61,7 +61,15 @@ int getNumChannels(const UniqueAVFrame& avFrame) { | |||||
| (LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44) | ||||||
| return avFrame->ch_layout.nb_channels; | ||||||
| #else | ||||||
| return av_get_channel_layout_nb_channels(avFrame->channel_layout); | ||||||
| int numChannels = av_get_channel_layout_nb_channels(avFrame->channel_layout); | ||||||
| // Handle FFmpeg 4 bug where channel_layout is 0 or unset | ||||||
| // Set it to the default layout for the number of channels | ||||||
| // to allow successful initialization of SwrContext | ||||||
| if (numChannels == 0 && avFrame->channels > 0) { | ||||||
| avFrame->channel_layout = av_get_default_channel_layout(avFrame->channels); | ||||||
| return avFrame->channels; | ||||||
|
||||||
| return avFrame->channels; | |
| numChannels = avFrame->channels; |
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1276,16 +1276,20 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU( | |
| int outSampleRate = | ||
| streamInfo.audioStreamOptions.sampleRate.value_or(srcSampleRate); | ||
|
|
||
| int srcNumChannels = getNumChannels(streamInfo.codecContext); | ||
| TORCH_CHECK( | ||
| srcNumChannels == getNumChannels(srcAVFrame), | ||
| "The frame has ", | ||
| getNumChannels(srcAVFrame), | ||
| " channels, expected ", | ||
| srcNumChannels, | ||
| ". If you are hitting this, it may be because you are using " | ||
| "a buggy FFmpeg version. FFmpeg4 is known to fail here in some " | ||
| "valid scenarios. Try to upgrade FFmpeg?"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should still have this check, but perhaps relax it if
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added back the check, and excluded the case where |
||
| int srcNumChannels = getNumChannels(srcAVFrame); | ||
| int srcNumChannelsFromCodec = getNumChannels(streamInfo.codecContext); | ||
| // Use number of channels from codec if 0 returned from frame | ||
| if (srcNumChannels == 0 && srcNumChannelsFromCodec > 0) { | ||
| srcNumChannels = srcNumChannelsFromCodec; | ||
|
||
| } else { | ||
| // Check if the number of channels from the frame matches the codec | ||
| TORCH_CHECK( | ||
| srcNumChannels == srcNumChannelsFromCodec, | ||
| "The frame has ", | ||
| srcNumChannels, | ||
| " channels, expected ", | ||
| srcNumChannelsFromCodec); | ||
| }; | ||
| int outNumChannels = | ||
| streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1682,26 +1682,25 @@ def test_downsample_empty_frame(self): | |
| frames_44100_to_8000.data, frames_8000.data, atol=0.03, rtol=0 | ||
| ) | ||
|
|
||
| def test_s16_ffmpeg4_bug(self): | ||
| # s16 fails on FFmpeg4 but can be decoded on other versions. | ||
| # Debugging logs show that we're hitting: | ||
| # [SWR @ 0x560a7abdaf80] Input channel count and layout are unset | ||
| # which seems to point to: | ||
| # https://github.com/FFmpeg/FFmpeg/blob/40a6963fbd0c47be358a3760480180b7b532e1e9/libswresample/swresample.c#L293-L305 | ||
| # ¯\_(ツ)_/¯ | ||
| def test_decode_s16_ffmpeg4(self): | ||
| # Non-regression test for https://github.com/pytorch/torchcodec/issues/843 | ||
| # Ensures that decoding s16 on FFmpeg4 handles | ||
| # unset input channel count and layout | ||
|
|
||
| asset = SINE_MONO_S16 | ||
| decoder = AudioDecoder(asset.path) | ||
| assert decoder.metadata.sample_rate == asset.sample_rate | ||
| assert decoder.metadata.sample_format == asset.sample_format | ||
|
|
||
| cm = ( | ||
| pytest.raises(RuntimeError, match="The frame has 0 channels, expected 1.") | ||
| if get_ffmpeg_major_version() == 4 | ||
| else contextlib.nullcontext() | ||
| test_frames = decoder.get_samples_played_in_range() | ||
| assert test_frames.data.shape[0] == decoder.metadata.num_channels | ||
| assert test_frames.sample_rate == decoder.metadata.sample_rate | ||
|
||
| reference_frames = asset.get_frame_data_by_range( | ||
| start=0, stop=1, stream_index=0 | ||
| ) | ||
| torch.testing.assert_close( | ||
| test_frames.data[0], reference_frames, atol=0, rtol=0 | ||
|
||
| ) | ||
| with cm: | ||
| decoder.get_samples_played_in_range() | ||
|
|
||
| @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3)) | ||
| @pytest.mark.parametrize("sample_rate", (None, 8000, 16_000, 44_1000)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, I think the code has slightly changed since the comment was written, so this edit may be closer to what's happening (feel free to edit!)