diff --git a/Telegram/SourceFiles/media/media_audio.cpp b/Telegram/SourceFiles/media/media_audio.cpp index 8b5a0aca5..38bb5d0fe 100644 --- a/Telegram/SourceFiles/media/media_audio.cpp +++ b/Telegram/SourceFiles/media/media_audio.cpp @@ -1405,8 +1405,8 @@ void DetachFromDevice() { class FFMpegAttributesReader : public AbstractFFMpegLoader { public: - - FFMpegAttributesReader(const FileLocation &file, const QByteArray &data) : AbstractFFMpegLoader(file, data, base::byte_vector()) { + FFMpegAttributesReader(const FileLocation &file, const QByteArray &data) + : AbstractFFMpegLoader(file, data, base::byte_vector()) { } bool open(TimeMs positionMs) override { @@ -1424,15 +1424,22 @@ public: } for (int32 i = 0, l = fmtContext->nb_streams; i < l; ++i) { - AVStream *stream = fmtContext->streams[i]; + const auto stream = fmtContext->streams[i]; if (stream->disposition & AV_DISPOSITION_ATTACHED_PIC) { - const AVPacket &packet(stream->attached_pic); + const auto &packet = stream->attached_pic; if (packet.size) { - bool animated = false; - QByteArray cover((const char*)packet.data, packet.size), format; - _cover = App::readImage(cover, &format, true, &animated); + const auto coverBytes = QByteArray( + (const char*)packet.data, + packet.size); + auto format = QByteArray(); + auto animated = false; + _cover = App::readImage( + coverBytes, + &format, + true, + &animated); if (!_cover.isNull()) { - _coverBytes = cover; + _coverBytes = coverBytes; _coverFormat = format; break; } @@ -1464,7 +1471,7 @@ public: //} } - int32 format() override { + int format() override { return 0; } diff --git a/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.cpp b/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.cpp index 50f08ec20..79b998d17 100644 --- a/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.cpp +++ b/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.cpp @@ -20,24 +20,28 @@ Copyright (c) 2014-2017 John Preston, https://desktop.telegram.org */ #include "media/media_audio_ffmpeg_loader.h" -#include "core/crash_reports.h" - namespace { constexpr AVSampleFormat AudioToFormat = AV_SAMPLE_FMT_S16; constexpr int64_t AudioToChannelLayout = AV_CH_LAYOUT_STEREO; constexpr int32 AudioToChannels = 2; -bool IsPlanarFormat(int format) { - return (format == AV_SAMPLE_FMT_U8P) - || (format == AV_SAMPLE_FMT_S16P) - || (format == AV_SAMPLE_FMT_S32P) - || (format == AV_SAMPLE_FMT_FLTP) - || (format == AV_SAMPLE_FMT_DBLP) - || (format == AV_SAMPLE_FMT_S64P); +} // namespace + +uint64_t AbstractFFMpegLoader::ComputeChannelLayout( + uint64_t channel_layout, + int channels) { + if (channel_layout) { + if (av_get_channel_layout_nb_channels(channel_layout) == channels) { + return channel_layout; + } + } + return av_get_default_channel_layout(channels); } -} // namespace +int64 AbstractFFMpegLoader::Mul(int64 value, AVRational rational) { + return value * rational.num / rational.den; +} bool AbstractFFMpegLoader::open(TimeMs positionMs) { if (!AudioPlayerLoader::openFile()) { @@ -81,11 +85,18 @@ bool AbstractFFMpegLoader::open(TimeMs positionMs) { return false; } - _samplesFrequency = fmtContext->streams[streamId]->codecpar->sample_rate; - if (fmtContext->streams[streamId]->duration == AV_NOPTS_VALUE) { - _samplesCount = (fmtContext->duration * _samplesFrequency) / AV_TIME_BASE; + const auto stream = fmtContext->streams[streamId]; + const auto params = stream->codecpar; + _samplesFrequency = params->sample_rate; + if (stream->duration != AV_NOPTS_VALUE) { + _samplesCount = Mul( + stream->duration * _samplesFrequency, + stream->time_base); + } else { - _samplesCount = (fmtContext->streams[streamId]->duration * _samplesFrequency * fmtContext->streams[streamId]->time_base.num) / fmtContext->streams[streamId]->time_base.den; + _samplesCount = Mul( + fmtContext->duration * _samplesFrequency, + { 1, AV_TIME_BASE }); } return true; @@ -190,8 +201,12 @@ int64_t AbstractFFMpegLoader::_seek_file(void *opaque, int64_t offset, int whenc return -1; } -FFMpegLoader::FFMpegLoader(const FileLocation &file, const QByteArray &data, base::byte_vector &&bytes) : AbstractFFMpegLoader(file, data, std::move(bytes)) { - frame = av_frame_alloc(); +FFMpegLoader::FFMpegLoader( + const FileLocation &file, + const QByteArray &data, + base::byte_vector &&bytes) +: AbstractFFMpegLoader(file, data, std::move(bytes)) { + _frame = av_frame_alloc(); } bool FFMpegLoader::open(TimeMs positionMs) { @@ -202,115 +217,116 @@ bool FFMpegLoader::open(TimeMs positionMs) { int res = 0; char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; - auto codecParams = fmtContext->streams[streamId]->codecpar; - - codecContext = avcodec_alloc_context3(nullptr); - if (!codecContext) { - LOG(("Audio Error: Unable to avcodec_alloc_context3 for file '%1', data size '%2'").arg(_file.name()).arg(_data.size())); - return false; - } - if ((res = avcodec_parameters_to_context(codecContext, codecParams)) < 0) { - LOG(("Audio Error: Unable to avcodec_parameters_to_context for file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); - return false; - } - av_codec_set_pkt_timebase(codecContext, fmtContext->streams[streamId]->time_base); - av_opt_set_int(codecContext, "refcounted_frames", 1, 0); - - if ((res = avcodec_open2(codecContext, codec, 0)) < 0) { - LOG(("Audio Error: Unable to avcodec_open2 for file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); - return false; - } - - auto layout = codecParams->channel_layout; - if (!layout) { - switch (codecParams->channels) { - case 1: layout = AV_CH_LAYOUT_MONO; break; - case 2: layout = AV_CH_LAYOUT_STEREO; break; - default: - LOG(("Audio Error: Unknown channel layout for %1 channels.").arg(codecParams->channels)); - return false; - break; - } - } - inputFormat = codecContext->sample_fmt; - switch (layout) { - case AV_CH_LAYOUT_MONO: - switch (inputFormat) { - case AV_SAMPLE_FMT_U8: - case AV_SAMPLE_FMT_U8P: fmt = AL_FORMAT_MONO8; sampleSize = 1; break; - case AV_SAMPLE_FMT_S16: - case AV_SAMPLE_FMT_S16P: fmt = AL_FORMAT_MONO16; sampleSize = sizeof(uint16); break; - default: - sampleSize = -1; // convert needed - break; - } - break; - case AV_CH_LAYOUT_STEREO: - switch (inputFormat) { - case AV_SAMPLE_FMT_U8: fmt = AL_FORMAT_STEREO8; sampleSize = 2; break; - case AV_SAMPLE_FMT_S16: fmt = AL_FORMAT_STEREO16; sampleSize = 2 * sizeof(uint16); break; - default: - sampleSize = -1; // convert needed - break; - } - break; - default: - sampleSize = -1; // convert needed - break; - } - - if (av_popcount64(layout) != codecParams->channels) { - LOG(("Audio Error: Bad channel layout %1 for %2 channels." - ).arg(codecParams->channel_layout - ).arg(codecParams->channels + _codecContext = avcodec_alloc_context3(nullptr); + if (!_codecContext) { + LOG(("Audio Error: " + "Unable to avcodec_alloc_context3 for file '%1', data size '%2'" + ).arg(_file.name() + ).arg(_data.size() )); return false; } - if (_samplesFrequency != Media::Player::kDefaultFrequency) { - sampleSize = -1; // convert needed + const auto stream = fmtContext->streams[streamId]; + if ((res = avcodec_parameters_to_context( + _codecContext, + stream->codecpar)) < 0) { + LOG(("Audio Error: " + "Unable to avcodec_parameters_to_context for file '%1', " + "data size '%2', error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); + return false; + } + av_codec_set_pkt_timebase(_codecContext, stream->time_base); + av_opt_set_int(_codecContext, "refcounted_frames", 1, 0); + + if ((res = avcodec_open2(_codecContext, codec, 0)) < 0) { + LOG(("Audio Error: " + "Unable to avcodec_open2 for file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); + return false; } - if (sampleSize < 0) { - swrContext = swr_alloc(); - if (!swrContext) { - LOG(("Audio Error: Unable to swr_alloc for file '%1', data size '%2'").arg(_file.name()).arg(_data.size())); - return false; - } - int64_t src_ch_layout = layout, dst_ch_layout = AudioToChannelLayout; - srcRate = _samplesFrequency; - AVSampleFormat src_sample_fmt = inputFormat, dst_sample_fmt = AudioToFormat; - dstRate = Media::Player::kDefaultFrequency; - - av_opt_set_int(swrContext, "in_channel_layout", src_ch_layout, 0); - av_opt_set_int(swrContext, "in_sample_rate", srcRate, 0); - av_opt_set_sample_fmt(swrContext, "in_sample_fmt", src_sample_fmt, 0); - av_opt_set_int(swrContext, "out_channel_layout", dst_ch_layout, 0); - av_opt_set_int(swrContext, "out_sample_rate", dstRate, 0); - av_opt_set_sample_fmt(swrContext, "out_sample_fmt", dst_sample_fmt, 0); - - if ((res = swr_init(swrContext)) < 0) { - LOG(("Audio Error: Unable to swr_init for file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); - return false; - } - - sampleSize = AudioToChannels * sizeof(short); - _samplesFrequency = dstRate; - _samplesCount = av_rescale_rnd(_samplesCount, dstRate, srcRate, AV_ROUND_UP); - fmt = AL_FORMAT_STEREO16; - - maxResampleSamples = av_rescale_rnd(AVBlockSize / sampleSize, dstRate, srcRate, AV_ROUND_UP); - if ((res = av_samples_alloc_array_and_samples(&dstSamplesData, 0, AudioToChannels, maxResampleSamples, AudioToFormat, 0)) < 0) { - LOG(("Audio Error: Unable to av_samples_alloc for file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); - return false; - } + const auto layout = ComputeChannelLayout( + _codecContext->channel_layout, + _codecContext->channels); + if (!layout) { + LOG(("Audio Error: Unknown channel layout %1 for %2 channels." + ).arg(_codecContext->channel_layout + ).arg(_codecContext->channels + )); + return false; } + + _swrSrcFormat = _codecContext->sample_fmt; + switch (layout) { + case AV_CH_LAYOUT_MONO: + switch (_swrSrcFormat) { + case AV_SAMPLE_FMT_U8: + case AV_SAMPLE_FMT_U8P: + _swrDstFormat = _swrSrcFormat; + _swrDstChannelLayout = layout; + _swrDstChannels = 1; + _format = AL_FORMAT_MONO8; + sampleSize = 1; + break; + case AV_SAMPLE_FMT_S16: + case AV_SAMPLE_FMT_S16P: + _swrDstFormat = _swrSrcFormat; + _swrDstChannelLayout = layout; + _swrDstChannels = 1; + _format = AL_FORMAT_MONO16; + sampleSize = sizeof(uint16); + break; + } + break; + case AV_CH_LAYOUT_STEREO: + switch (_swrSrcFormat) { + case AV_SAMPLE_FMT_U8: + _swrDstFormat = _swrSrcFormat; + _swrDstChannelLayout = layout; + _swrDstChannels = 2; + _format = AL_FORMAT_STEREO8; + sampleSize = 2; + break; + case AV_SAMPLE_FMT_S16: + _swrDstFormat = _swrSrcFormat; + _swrDstChannelLayout = layout; + _swrDstChannels = 2; + _format = AL_FORMAT_STEREO16; + sampleSize = 2 * sizeof(uint16); + break; + } + break; + } + + if (_swrDstRate == _samplesFrequency) { + _swrDstSamplesCount = _samplesCount; + } else { + _swrDstSamplesCount = av_rescale_rnd( + _samplesCount, + _swrDstRate, + _samplesFrequency, + AV_ROUND_UP); + } + if (positionMs) { - const auto timeBase = fmtContext->streams[streamId]->time_base; + const auto timeBase = stream->time_base; const auto timeStamp = (positionMs * timeBase.den) / (1000LL * timeBase.num); - if (av_seek_frame(fmtContext, streamId, timeStamp, AVSEEK_FLAG_ANY) < 0) { - if (av_seek_frame(fmtContext, streamId, timeStamp, 0) < 0) { + const auto flags1 = AVSEEK_FLAG_ANY; + if (av_seek_frame(fmtContext, streamId, timeStamp, flags1) < 0) { + const auto flags2 = 0; + if (av_seek_frame(fmtContext, streamId, timeStamp, flags2) < 0) { } } } @@ -318,11 +334,13 @@ bool FFMpegLoader::open(TimeMs positionMs) { return true; } -AudioPlayerLoader::ReadResult FFMpegLoader::readMore(QByteArray &result, int64 &samplesAdded) { +AudioPlayerLoader::ReadResult FFMpegLoader::readMore( + QByteArray &result, + int64 &samplesAdded) { int res; - av_frame_unref(frame); - res = avcodec_receive_frame(codecContext, frame); + av_frame_unref(_frame); + res = avcodec_receive_frame(_codecContext, _frame); if (res >= 0) { return readFromReadyFrame(result, samplesAdded); } @@ -331,27 +349,48 @@ AudioPlayerLoader::ReadResult FFMpegLoader::readMore(QByteArray &result, int64 & return ReadResult::EndOfFile; } else if (res != AVERROR(EAGAIN)) { char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; - LOG(("Audio Error: Unable to avcodec_receive_frame() file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); + LOG(("Audio Error: " + "Unable to avcodec_receive_frame() file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); return ReadResult::Error; } - if ((res = av_read_frame(fmtContext, &avpkt)) < 0) { + if ((res = av_read_frame(fmtContext, &_packet)) < 0) { if (res != AVERROR_EOF) { char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; - LOG(("Audio Error: Unable to av_read_frame() file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); + LOG(("Audio Error: " + "Unable to av_read_frame() file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); return ReadResult::Error; } - avcodec_send_packet(codecContext, nullptr); // drain + avcodec_send_packet(_codecContext, nullptr); // drain return ReadResult::Ok; } - if (avpkt.stream_index == streamId) { - res = avcodec_send_packet(codecContext, &avpkt); + if (_packet.stream_index == streamId) { + res = avcodec_send_packet(_codecContext, &_packet); if (res < 0) { - av_packet_unref(&avpkt); + av_packet_unref(&_packet); char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; - LOG(("Audio Error: Unable to avcodec_send_packet() file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); + LOG(("Audio Error: " + "Unable to avcodec_send_packet() file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); // There is a sample voice message where skipping such packet // results in a crash (read_access to nullptr) in swr_convert(). //if (res == AVERROR_INVALIDDATA) { @@ -360,104 +399,203 @@ AudioPlayerLoader::ReadResult FFMpegLoader::readMore(QByteArray &result, int64 & return ReadResult::Error; } } - av_packet_unref(&avpkt); + av_packet_unref(&_packet); return ReadResult::Ok; } -AudioPlayerLoader::ReadResult FFMpegLoader::readFromReadyFrame(QByteArray &result, int64 &samplesAdded) { +bool FFMpegLoader::frameHasDesiredFormat() const { + const auto frameChannelLayout = ComputeChannelLayout( + _frame->channel_layout, + _frame->channels); + return true + && (_frame->format == _swrDstFormat) + && (frameChannelLayout == _swrDstChannelLayout) + && (_frame->sample_rate == _swrDstRate); +} + +bool FFMpegLoader::initResampleForFrame() { + const auto frameChannelLayout = ComputeChannelLayout( + _frame->channel_layout, + _frame->channels); + if (!frameChannelLayout) { + LOG(("Audio Error: " + "Unable to compute channel layout for frame in file '%1', " + "data size '%2', channel_layout %3, channels %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(_frame->channel_layout + ).arg(_frame->channels + )); + return false; + } else if (_frame->format == -1) { + LOG(("Audio Error: " + "Unknown frame format in file '%1', data size '%2'" + ).arg(_file.name() + ).arg(_data.size() + )); + return false; + } else if (_swrContext) { + if (true + && (_frame->format == _swrSrcFormat) + && (frameChannelLayout == _swrSrcChannelLayout) + && (_frame->sample_rate == _swrSrcRate)) { + return true; + } + swr_close(_swrContext); + } + + _swrSrcFormat = static_cast(_frame->format); + _swrSrcChannelLayout = frameChannelLayout; + _swrSrcRate = _frame->sample_rate; + return initResampleUsingFormat(); +} + +bool FFMpegLoader::initResampleUsingFormat() { int res = 0; - if (dstSamplesData) { // convert needed - int64_t dstSamples = av_rescale_rnd(swr_get_delay(swrContext, srcRate) + frame->nb_samples, dstRate, srcRate, AV_ROUND_UP); - if (dstSamples > maxResampleSamples) { - maxResampleSamples = dstSamples; - av_freep(&dstSamplesData[0]); - if ((res = av_samples_alloc(dstSamplesData, 0, AudioToChannels, maxResampleSamples, AudioToFormat, 1)) < 0) { - char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; - LOG(("Audio Error: Unable to av_samples_alloc for file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); - return ReadResult::Error; - } - } - - // There are crash reports of some files with swrContext->in.ch_count - // equal to 2 and frame with only one channel data provided. - // I'm not sure what to do with those files, could not get one for testing. - // Currently just abort the reading because it crashes in swr_convert. - // - // Samples included: - // - // codecpar->channel_layout = 3 - // codecpar->channels = 1 (but it is 2 by the channel_layout!) - // frame->channel_layout = 4 - // frame->channels = 1 - // - // So it looks like codecpar->channel_layout was wrong and frame - // really had only one channel, but swresample expected data for two channels. - // - // codecpar->channel_layout = 3 - // codecpar->channels = 2 - // frame->channel_layout = 4 - // frame->channels = 1 - // - // So the frame just wasn't consistent with the codec params. - if (frame->extended_data[1] == nullptr) { - const auto params = fmtContext->streams[streamId]->codecpar; - if (IsPlanarFormat(params->format) && params->channels > 1) { - LOG(("Audio Error: Inconsistent frame layout/channels in file, codec: (%1;%2;%3), frame: (%4;%5;%6)." - ).arg(params->channel_layout - ).arg(params->channels - ).arg(params->format - ).arg(frame->channel_layout - ).arg(frame->channels - ).arg(frame->format - )); - return ReadResult::Error; - } else { - const auto key = "ffmpeg_" + std::to_string(ptrdiff_t(this)); - const auto value = QString("codec: (%1;%2;%3), frame: (%4;%5;%6), ptrs: (%7;%8;%9)" - ).arg(params->channel_layout - ).arg(params->channels - ).arg(params->format - ).arg(frame->channel_layout - ).arg(frame->channels - ).arg(frame->format - ).arg(ptrdiff_t(frame->data[0]) - ).arg(ptrdiff_t(frame->extended_data[0]) - ).arg(ptrdiff_t(frame->data[1]) - ); - CrashReports::SetAnnotation(key, value); - } - } - - if ((res = swr_convert(swrContext, dstSamplesData, dstSamples, (const uint8_t**)frame->extended_data, frame->nb_samples)) < 0) { - char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; - LOG(("Audio Error: Unable to swr_convert for file '%1', data size '%2', error %3, %4").arg(_file.name()).arg(_data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res))); - return ReadResult::Error; - } - - if (frame->extended_data[1] == nullptr) { - const auto key = "ffmpeg_" + std::to_string(ptrdiff_t(this)); - CrashReports::ClearAnnotation(key); - } - - int32 resultLen = av_samples_get_buffer_size(0, AudioToChannels, res, AudioToFormat, 1); - result.append((const char*)dstSamplesData[0], resultLen); - samplesAdded += resultLen / sampleSize; - } else { - result.append((const char*)frame->extended_data[0], frame->nb_samples * sampleSize); - samplesAdded += frame->nb_samples; + _swrContext = swr_alloc_set_opts( + _swrContext, + _swrDstChannelLayout, + _swrDstFormat, + _swrDstRate, + _swrSrcChannelLayout, + _swrSrcFormat, + _swrSrcRate, + 0, + nullptr); + if (!_swrContext) { + LOG(("Audio Error: " + "Unable to swr_alloc for file '%1', data size '%2'" + ).arg(_file.name() + ).arg(_data.size())); + return false; + } else if ((res = swr_init(_swrContext)) < 0) { + char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; + LOG(("Audio Error: " + "Unable to swr_init for file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); + return false; } + if (_swrDstData) { + av_freep(&_swrDstData[0]); + _swrDstDataCapacity = -1; + } + return true; +} + +bool FFMpegLoader::ensureResampleSpaceAvailable(int samples) { + if (_swrDstData != nullptr && _swrDstDataCapacity >= samples) { + return true; + } + const auto allocate = std::max(samples, int(av_rescale_rnd( + AVBlockSize / sampleSize, + _swrDstRate, + _swrSrcRate, + AV_ROUND_UP))); + + if (_swrDstData) { + av_freep(&_swrDstData[0]); + } + const auto res = _swrDstData + ? av_samples_alloc( + _swrDstData, + nullptr, + _swrDstChannels, + allocate, + _swrDstFormat, + 0) + : av_samples_alloc_array_and_samples( + &_swrDstData, + nullptr, + _swrDstChannels, + allocate, + _swrDstFormat, + 0); + if (res < 0) { + char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; + LOG(("Audio Error: " + "Unable to av_samples_alloc for file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(res + ).arg(av_make_error_string(err, sizeof(err), res) + )); + return false; + } + _swrDstDataCapacity = allocate; + return true; +} + +AudioPlayerLoader::ReadResult FFMpegLoader::readFromReadyFrame( + QByteArray &result, + int64 &samplesAdded) { + if (frameHasDesiredFormat()) { + result.append( + reinterpret_cast(_frame->extended_data[0]), + _frame->nb_samples * sampleSize); + samplesAdded += _frame->nb_samples; + } else if (!initResampleForFrame()) { + return ReadResult::Error; + } + + const auto maxSamples = av_rescale_rnd( + swr_get_delay(_swrContext, _swrSrcRate) + _frame->nb_samples, + _swrDstRate, + _swrSrcRate, + AV_ROUND_UP); + if (!ensureResampleSpaceAvailable(maxSamples)) { + return ReadResult::Error; + } + const auto samples = swr_convert( + _swrContext, + _swrDstData, + maxSamples, + (const uint8_t**)_frame->extended_data, + _frame->nb_samples); + if (samples < 0) { + char err[AV_ERROR_MAX_STRING_SIZE] = { 0 }; + LOG(("Audio Error: " + "Unable to swr_convert for file '%1', data size '%2', " + "error %3, %4" + ).arg(_file.name() + ).arg(_data.size() + ).arg(samples + ).arg(av_make_error_string(err, sizeof(err), samples) + )); + return ReadResult::Error; + } + + const auto bytesCount = av_samples_get_buffer_size( + nullptr, + _swrDstChannels, + samples, + _swrDstFormat, + 1); + result.append( + reinterpret_cast(_swrDstData[0]), + bytesCount); + samplesAdded += bytesCount / sampleSize; return ReadResult::Ok; } FFMpegLoader::~FFMpegLoader() { - if (codecContext) avcodec_free_context(&codecContext); - if (swrContext) swr_free(&swrContext); - if (dstSamplesData) { - if (dstSamplesData[0]) { - av_freep(&dstSamplesData[0]); - } - av_freep(&dstSamplesData); + if (_codecContext) { + avcodec_free_context(&_codecContext); } - av_frame_free(&frame); + if (_swrContext) { + swr_free(&_swrContext); + } + if (_swrDstData) { + if (_swrDstData[0]) { + av_freep(&_swrDstData[0]); + } + av_freep(&_swrDstData); + } + av_frame_free(&_frame); } diff --git a/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.h b/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.h index 17d302fb7..cef1a6175 100644 --- a/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.h +++ b/Telegram/SourceFiles/media/media_audio_ffmpeg_loader.h @@ -34,7 +34,11 @@ extern "C" { class AbstractFFMpegLoader : public AudioPlayerLoader { public: - AbstractFFMpegLoader(const FileLocation &file, const QByteArray &data, base::byte_vector &&bytes) : AudioPlayerLoader(file, data, std::move(bytes)) { + AbstractFFMpegLoader( + const FileLocation &file, + const QByteArray &data, + base::byte_vector &&bytes) + : AudioPlayerLoader(file, data, std::move(bytes)) { } bool open(TimeMs positionMs) override; @@ -43,14 +47,20 @@ public: return _samplesCount; } - int32 samplesFrequency() override { + int samplesFrequency() override { return _samplesFrequency; } + static uint64_t ComputeChannelLayout( + uint64_t channel_layout, + int channels); + ~AbstractFFMpegLoader(); protected: - int32 _samplesFrequency = Media::Player::kDefaultFrequency; + static int64 Mul(int64 value, AVRational rational); + + int _samplesFrequency = Media::Player::kDefaultFrequency; int64 _samplesCount = 0; uchar *ioBuffer = nullptr; @@ -73,12 +83,23 @@ private: class FFMpegLoader : public AbstractFFMpegLoader { public: - FFMpegLoader(const FileLocation &file, const QByteArray &data, base::byte_vector &&bytes); + FFMpegLoader( + const FileLocation &file, + const QByteArray &data, + base::byte_vector &&bytes); bool open(TimeMs positionMs) override; - int32 format() override { - return fmt; + int64 samplesCount() override { + return _swrDstSamplesCount; + } + + int samplesFrequency() override { + return _swrDstRate; + } + + int format() override { + return _format; } ReadResult readMore(QByteArray &result, int64 &samplesAdded) override; @@ -86,22 +107,33 @@ public: ~FFMpegLoader(); protected: - int32 sampleSize = 2 * sizeof(uint16); + int sampleSize = 2 * sizeof(uint16); private: ReadResult readFromReadyFrame(QByteArray &result, int64 &samplesAdded); + bool frameHasDesiredFormat() const; + bool initResampleForFrame(); + bool initResampleUsingFormat(); + bool ensureResampleSpaceAvailable(int samples); - int32 fmt = AL_FORMAT_STEREO16; - int32 srcRate = Media::Player::kDefaultFrequency; - int32 dstRate = Media::Player::kDefaultFrequency; - int32 maxResampleSamples = 1024; - uint8_t **dstSamplesData = nullptr; + AVCodecContext *_codecContext = nullptr; + AVPacket _packet; + int _format = AL_FORMAT_STEREO16; + AVFrame *_frame = nullptr; - AVCodecContext *codecContext = nullptr; - AVPacket avpkt; - AVSampleFormat inputFormat; - AVFrame *frame = nullptr; + SwrContext *_swrContext = nullptr; - SwrContext *swrContext = nullptr; + int _swrSrcRate = 0; + AVSampleFormat _swrSrcFormat = AV_SAMPLE_FMT_NONE; + uint64_t _swrSrcChannelLayout = 0; + + const int _swrDstRate = Media::Player::kDefaultFrequency; + AVSampleFormat _swrDstFormat = AV_SAMPLE_FMT_S16; + uint64_t _swrDstChannelLayout = AV_CH_LAYOUT_STEREO; + int _swrDstChannels = 2; + + int64 _swrDstSamplesCount = 0; + uint8_t **_swrDstData = nullptr; + int _swrDstDataCapacity = 0; }; diff --git a/Telegram/SourceFiles/media/media_audio_loader.h b/Telegram/SourceFiles/media/media_audio_loader.h index d118bebd5..0dbf833e1 100644 --- a/Telegram/SourceFiles/media/media_audio_loader.h +++ b/Telegram/SourceFiles/media/media_audio_loader.h @@ -33,8 +33,8 @@ public: virtual bool open(TimeMs positionMs) = 0; virtual int64 samplesCount() = 0; - virtual int32 samplesFrequency() = 0; - virtual int32 format() = 0; + virtual int samplesFrequency() = 0; + virtual int format() = 0; enum class ReadResult { Error, diff --git a/Telegram/SourceFiles/media/media_child_ffmpeg_loader.h b/Telegram/SourceFiles/media/media_child_ffmpeg_loader.h index 144e78260..a3ca7fb8d 100644 --- a/Telegram/SourceFiles/media/media_child_ffmpeg_loader.h +++ b/Telegram/SourceFiles/media/media_child_ffmpeg_loader.h @@ -91,7 +91,7 @@ public: return true; } - int32 format() override { + int format() override { return _format; } @@ -118,7 +118,7 @@ private: bool _eofReached = false; int32 _sampleSize = 2 * sizeof(uint16); - int32 _format = AL_FORMAT_STEREO16; + int _format = AL_FORMAT_STEREO16; int32 _srcRate = Media::Player::kDefaultFrequency; int32 _dstRate = Media::Player::kDefaultFrequency; int32 _maxResampleSamples = 1024;