-
Notifications
You must be signed in to change notification settings - Fork 80
Fix audio decoding bug with FFmpeg4 by setting channel_layout #865
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1277,15 +1277,6 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU( | |
| streamInfo.audioStreamOptions.sampleRate.value_or(srcSampleRate); | ||
|
|
||
| int srcNumChannels = getNumChannels(streamInfo.codecContext); | ||
| TORCH_CHECK( | ||
| srcNumChannels == getNumChannels(srcAVFrame), | ||
| "The frame has ", | ||
| getNumChannels(srcAVFrame), | ||
| " channels, expected ", | ||
| srcNumChannels, | ||
| ". If you are hitting this, it may be because you are using " | ||
| "a buggy FFmpeg version. FFmpeg4 is known to fail here in some " | ||
| "valid scenarios. Try to upgrade FFmpeg?"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should still have this check, but perhaps relax it if
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added back the check, and excluded the case where |
||
| int outNumChannels = | ||
| streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1682,26 +1682,25 @@ def test_downsample_empty_frame(self): | |
| frames_44100_to_8000.data, frames_8000.data, atol=0.03, rtol=0 | ||
| ) | ||
|
|
||
| def test_s16_ffmpeg4_bug(self): | ||
| # s16 fails on FFmpeg4 but can be decoded on other versions. | ||
| # Debugging logs show that we're hitting: | ||
| # [SWR @ 0x560a7abdaf80] Input channel count and layout are unset | ||
| # which seems to point to: | ||
| # https://github.com/FFmpeg/FFmpeg/blob/40a6963fbd0c47be358a3760480180b7b532e1e9/libswresample/swresample.c#L293-L305 | ||
| # ¯\_(ツ)_/¯ | ||
| def test_decode_s16_ffmpeg4(self): | ||
| # Non-regression test for https://github.com/pytorch/torchcodec/issues/843 | ||
| # Ensures that decoding s16 on FFmpeg4 handles | ||
| # unset input channel count and layout | ||
|
|
||
| asset = SINE_MONO_S16 | ||
| decoder = AudioDecoder(asset.path) | ||
| assert decoder.metadata.sample_rate == asset.sample_rate | ||
| assert decoder.metadata.sample_format == asset.sample_format | ||
|
|
||
| cm = ( | ||
| pytest.raises(RuntimeError, match="The frame has 0 channels, expected 1.") | ||
| if get_ffmpeg_major_version() == 4 | ||
| else contextlib.nullcontext() | ||
| test_frames = decoder.get_samples_played_in_range() | ||
| assert test_frames.data.shape[0] == decoder.metadata.num_channels | ||
| assert test_frames.sample_rate == decoder.metadata.sample_rate | ||
|
||
| reference_frames = asset.get_frame_data_by_range( | ||
| start=0, stop=1, stream_index=0 | ||
| ) | ||
| torch.testing.assert_close( | ||
| test_frames.data[0], reference_frames, atol=0, rtol=0 | ||
|
||
| ) | ||
| with cm: | ||
| decoder.get_samples_played_in_range() | ||
|
|
||
| @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3)) | ||
| @pytest.mark.parametrize("sample_rate", (None, 8000, 16_000, 44_1000)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, I think the code has slightly changed since the comment was written, so this edit may be closer to what's happening (feel free to edit!)