Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/torchcodec/_core/FFMPEGCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,15 @@ int getNumChannels(const UniqueAVFrame& avFrame) {
(LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
return avFrame->ch_layout.nb_channels;
#else
return av_get_channel_layout_nb_channels(avFrame->channel_layout);
int numChannels = av_get_channel_layout_nb_channels(avFrame->channel_layout);
// Handle FFmpeg 4 bug where channel_layout and numChannels are 0 or unset
// Set values based on avFrame->channels which appears to be correct
// to allow successful initialization of SwrContext
if (numChannels == 0 && avFrame->channels > 0) {
avFrame->channel_layout = av_get_default_channel_layout(avFrame->channels);
numChannels = avFrame->channels;
}
return numChannels;
#endif
}

Expand Down
Binary file not shown.
25 changes: 12 additions & 13 deletions test/test_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -1682,26 +1682,25 @@ def test_downsample_empty_frame(self):
frames_44100_to_8000.data, frames_8000.data, atol=0.03, rtol=0
)

def test_s16_ffmpeg4_bug(self):
# s16 fails on FFmpeg4 but can be decoded on other versions.
# Debugging logs show that we're hitting:
# [SWR @ 0x560a7abdaf80] Input channel count and layout are unset
# which seems to point to:
# https://github.com/FFmpeg/FFmpeg/blob/40a6963fbd0c47be358a3760480180b7b532e1e9/libswresample/swresample.c#L293-L305
# ¯\_(ツ)_/¯
def test_decode_s16_ffmpeg4(self):
# Non-regression test for https://github.com/pytorch/torchcodec/issues/843
# Ensures that decoding s16 on FFmpeg4 handles
# unset input channel count and layout

asset = SINE_MONO_S16
decoder = AudioDecoder(asset.path)
assert decoder.metadata.sample_rate == asset.sample_rate
assert decoder.metadata.sample_format == asset.sample_format

cm = (
pytest.raises(RuntimeError, match="The frame has 0 channels, expected 1.")
if get_ffmpeg_major_version() == 4
else contextlib.nullcontext()
test_samples = decoder.get_samples_played_in_range()
assert test_samples.data.shape[0] == decoder.metadata.num_channels
assert test_samples.sample_rate == decoder.metadata.sample_rate
reference_frames = asset.get_frame_data_by_range(
start=0, stop=1, stream_index=0
)
torch.testing.assert_close(
test_samples.data[0], reference_frames, atol=0, rtol=0
)
with cm:
decoder.get_samples_played_in_range()

@pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
@pytest.mark.parametrize("sample_rate", (None, 8000, 16_000, 44_1000))
Expand Down
Loading