Sync video stream to audio stream.

2025-03-06 02:01:40 -05:00 · 2019-02-21 15:15:44 +04:00 · 2019-02-21 15:15:44 +04:00 · 99d05ba967
commit 99d05ba967
parent ec9512899e
9 changed files with 113 additions and 39 deletions
--- a/Telegram/SourceFiles/media/audio/media_audio.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio.cpp
@ -441,7 +441,7 @@ void Mixer::Track::clear() {

 	setVideoData(nullptr);
 	lastUpdateWhen = 0;
-	lastUpdateCorrectedMs = 0;
+	lastUpdatePosition = 0;
 }

 void Mixer::Track::started() {
@ -782,7 +782,7 @@ void Mixer::play(

 		current->state.id = audio;
 		current->lastUpdateWhen = 0;
-		current->lastUpdateCorrectedMs = 0;
+		current->lastUpdatePosition = 0;
 		if (videoData) {
 			current->setVideoData(std::move(videoData));
 		} else {
@ -823,6 +823,23 @@ void Mixer::feedFromVideo(const VideoSoundPart &part) {
 	_loader->feedFromVideo(part);
 }

+Mixer::TimeCorrection Mixer::getVideoTimeCorrection(
+		const AudioMsgId &audio) const {
+	Expects(audio.type() == AudioMsgId::Type::Video);
+	Expects(audio.playId() != 0);
+
+	auto result = TimeCorrection();
+	const auto playId = audio.playId();
+
+	QMutexLocker lock(&AudioMutex);
+	const auto track = trackForType(AudioMsgId::Type::Video);
+	if (track->state.id.playId() == playId && track->lastUpdateWhen > 0) {
+		result.audioPositionValue = track->lastUpdatePosition;
+		result.audioPositionTime = track->lastUpdateWhen;
+	}
+	return result;
+}
+
 crl::time Mixer::getVideoCorrectedTime(const AudioMsgId &audio, crl::time frameMs, crl::time systemMs) {
 	auto result = frameMs;

@ -830,7 +847,7 @@ crl::time Mixer::getVideoCorrectedTime(const AudioMsgId &audio, crl::time frameM
 	auto type = audio.type();
 	auto track = trackForType(type);
 	if (track && track->state.id == audio && track->lastUpdateWhen > 0) {
-		result = static_cast<crl::time>(track->lastUpdateCorrectedMs);
+		result = static_cast<crl::time>(track->lastUpdatePosition);
 		if (systemMs > track->lastUpdateWhen) {
 			result += (systemMs - track->lastUpdateWhen);
 		}
@ -848,7 +865,7 @@ void Mixer::videoSoundProgress(const AudioMsgId &audio) {
 	if (current && current->state.length && current->state.frequency) {
 		if (current->state.id == audio && current->state.state == State::Playing) {
 			current->lastUpdateWhen = crl::now();
-			current->lastUpdateCorrectedMs = (current->state.position * 1000ULL) / current->state.frequency;
+			current->lastUpdatePosition = (current->state.position * 1000ULL) / current->state.frequency;
 		}
 	}
 }
@ -906,7 +923,7 @@ void Mixer::pause(const AudioMsgId &audio, bool fast) {
 		emit faderOnTimer();

 		track->lastUpdateWhen = 0;
-		track->lastUpdateCorrectedMs = 0;
+		track->lastUpdatePosition = 0;
 	}
 	if (current) emit updated(current);
 }
--- a/Telegram/SourceFiles/media/audio/media_audio.h
+++ b/Telegram/SourceFiles/media/audio/media_audio.h
@ -7,6 +7,7 @@ https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
 */
 #pragma once

+#include "media/streaming/media_streaming_common.h"
 #include "storage/localimageloader.h"
 #include "base/bytes.h"

@ -121,6 +122,15 @@ public:

 	// Video player audio stream interface.
 	void feedFromVideo(const VideoSoundPart &part);
+	struct TimeCorrection {
+		crl::time audioPositionValue = kTimeUnknown;
+		crl::time audioPositionTime = kTimeUnknown;
+
+		explicit operator bool() const {
+			return (audioPositionValue != kTimeUnknown);
+		}
+	};
+	TimeCorrection getVideoTimeCorrection(const AudioMsgId &audio) const;
 	crl::time getVideoCorrectedTime(
 		const AudioMsgId &id,
 		crl::time frameMs,
@ -228,7 +238,7 @@ private:
 		};
 		std::unique_ptr<SpeedEffect> speedEffect;
 		crl::time lastUpdateWhen = 0;
-		crl::time lastUpdateCorrectedMs = 0;
+		crl::time lastUpdatePosition = 0;

 	private:
 		void createStream(AudioMsgId::Type type);
--- a/Telegram/SourceFiles/media/streaming/media_streaming_audio_track.cpp
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_audio_track.cpp
@ -18,14 +18,17 @@ namespace Streaming {
 AudioTrack::AudioTrack(
 	const PlaybackOptions &options,
 	Stream &&stream,
+	AudioMsgId audioId,
 	FnMut<void(const Information &)> ready,
 	Fn<void()> error)
 : _options(options)
 , _stream(std::move(stream))
+, _audioId(audioId)
 , _ready(std::move(ready))
 , _error(std::move(error)) {
 	Expects(_ready != nullptr);
 	Expects(_error != nullptr);
+	Expects(_audioId.playId() != 0);
 }

 int AudioTrack::streamIndex() const {
@ -39,13 +42,17 @@ AVRational AudioTrack::streamTimeBase() const {

 void AudioTrack::process(Packet &&packet) {
 	_noMoreData = packet.empty();
-	if (_audioMsgId.playId()) {
+	if (initialized()) {
 		mixerEnqueue(std::move(packet));
 	} else if (!tryReadFirstFrame(std::move(packet))) {
 		_error();
 	}
 }

+bool AudioTrack::initialized() const {
+	return !_ready;
+}
+
 bool AudioTrack::tryReadFirstFrame(Packet &&packet) {
 	// #TODO streaming fix seek to the end.
 	if (ProcessPacket(_stream, std::move(packet)).failed()) {
@ -73,17 +80,16 @@ bool AudioTrack::fillStateFromFrame() {
 }

 void AudioTrack::mixerInit() {
-	Expects(!_audioMsgId.playId());
-
-	_audioMsgId = AudioMsgId::ForVideo();
+	Expects(!initialized());

 	auto data = std::make_unique<VideoSoundData>();
+	data->frame = _stream.frame.release();
 	data->context = _stream.codec.release();
 	data->frequency = _stream.frequency;
 	data->length = (_stream.duration * data->frequency) / 1000LL;
 	data->speed = _options.speed;
 	Media::Player::mixer()->play(
-		_audioMsgId,
+		_audioId,
 		std::move(data),
 		_startedPosition);
 }
@ -103,17 +109,16 @@ void AudioTrack::callReady() {
 void AudioTrack::mixerEnqueue(Packet &&packet) {
 	Media::Player::mixer()->feedFromVideo({
 		&packet.fields(),
-		_audioMsgId
+		_audioId
 	});
 	packet.release();
 }

 void AudioTrack::start(crl::time startTime) {
-	Expects(_ready == nullptr);
-	Expects(_audioMsgId.playId() != 0);
+	Expects(initialized());

 	// #TODO streaming support start() when paused.
-	Media::Player::mixer()->resume(_audioMsgId, true);
+	Media::Player::mixer()->resume(_audioId, true);
 }

 rpl::producer<crl::time> AudioTrack::playPosition() {
@ -123,12 +128,12 @@ rpl::producer<crl::time> AudioTrack::playPosition() {
 		_subscription = Media::Player::Updated(
 		).add_subscription([=](const AudioMsgId &id) {
 			using State = Media::Player::State;
-			if (id != _audioMsgId) {
+			if (id != _audioId) {
 				return;
 			}
 			const auto type = AudioMsgId::Type::Video;
 			const auto state = Media::Player::mixer()->currentState(type);
-			if (state.id != _audioMsgId) {
+			if (state.id != _audioId) {
 				// #TODO streaming muted by other
 				return;
 			} else switch (state.state) {
@ -157,8 +162,8 @@ rpl::producer<crl::time> AudioTrack::playPosition() {
 }

 AudioTrack::~AudioTrack() {
-	if (_audioMsgId.playId()) {
-		Media::Player::mixer()->stop(_audioMsgId);
+	if (_audioId.playId()) {
+		Media::Player::mixer()->stop(_audioId);
 	}
 }

--- a/Telegram/SourceFiles/media/streaming/media_streaming_audio_track.h
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_audio_track.h
@ -20,6 +20,7 @@ public:
 	AudioTrack(
 		const PlaybackOptions &options,
 		Stream &&stream,
+		AudioMsgId audioId,
 		FnMut<void(const Information &)> ready,
 		Fn<void()> error);

@ -44,6 +45,7 @@ public:

 private:
 	// Called from the same unspecified thread.
+	[[nodiscard]] bool initialized() const;
 	[[nodiscard]] bool tryReadFirstFrame(Packet &&packet);
 	[[nodiscard]] bool fillStateFromFrame();
 	void mixerInit();
@ -54,6 +56,7 @@ private:

 	// Accessed from the same unspecified thread.
 	Stream _stream;
+	const AudioMsgId _audioId;
 	bool _noMoreData = false;

 	// Assumed to be thread-safe.
@ -62,7 +65,6 @@ private:

 	// First set from the same unspecified thread before _ready is called.
 	// After that is immutable.
-	AudioMsgId _audioMsgId;
 	crl::time _startedPosition = kTimeUnknown;

 	// Accessed from the main thread.
--- a/Telegram/SourceFiles/media/streaming/media_streaming_common.h
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_common.h
@ -8,10 +8,11 @@ https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
 #pragma once

 namespace Media {
-namespace Streaming {

 constexpr auto kTimeUnknown = std::numeric_limits<crl::time>::min();

+namespace Streaming {
+
 class VideoTrack;
 class AudioTrack;

--- a/Telegram/SourceFiles/media/streaming/media_streaming_player.cpp
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_player.cpp
@ -158,6 +158,7 @@ void Player::trackPlayedTill(
 void Player::audioReceivedTill(crl::time position) {
 	Expects(_audio != nullptr);

+	//LOG(("AUDIO TILL: %1").arg(position));
 	trackReceivedTill(*_audio, _information.audio.state, position);
 }

@ -170,6 +171,7 @@ void Player::audioPlayedTill(crl::time position) {
 void Player::videoReceivedTill(crl::time position) {
 	Expects(_video != nullptr);

+	//LOG(("VIDEO TILL: %1").arg(position));
 	trackReceivedTill(*_video, _information.video.state, position);
 }

@ -196,16 +198,21 @@ void Player::fileReady(Stream &&video, Stream &&audio) {
 	};
 	const auto mode = _options.mode;
 	if (audio.codec && (mode == Mode::Audio || mode == Mode::Both)) {
+		_audioId = AudioMsgId::ForVideo();
 		_audio = std::make_unique<AudioTrack>(
 			_options,
 			std::move(audio),
+			_audioId,
 			ready,
 			error(_audio));
+	} else {
+		_audioId = AudioMsgId();
 	}
 	if (video.codec && (mode == Mode::Video || mode == Mode::Both)) {
 		_video = std::make_unique<VideoTrack>(
 			_options,
 			std::move(video),
+			_audioId,
 			ready,
 			error(_video));
 	}
--- a/Telegram/SourceFiles/media/streaming/media_streaming_player.h
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_player.h
@ -98,9 +98,12 @@ private:
 	static constexpr auto kReceivedTillEnd
 		= std::numeric_limits<crl::time>::max();

-	// Immutable while File is active.
+	// Immutable while File is active after it is ready.
+	AudioMsgId _audioId;
 	std::unique_ptr<AudioTrack> _audio;
 	std::unique_ptr<VideoTrack> _video;
+
+	// Immutable while File is active.
 	base::has_weak_ptr _sessionGuard;
 	PlaybackOptions _options;

--- a/Telegram/SourceFiles/media/streaming/media_streaming_video_track.cpp
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_video_track.cpp
@ -7,6 +7,7 @@ https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
 */
 #include "media/streaming/media_streaming_video_track.h"

+#include "media/audio/media_audio.h"
 #include "base/concurrent_timer.h"

 namespace Media {
@ -28,6 +29,7 @@ public:
 		const PlaybackOptions &options,
 		not_null<Shared*> shared,
 		Stream &&stream,
+		const AudioMsgId &audioId,
 		FnMut<void(const Information &)> ready,
 		Fn<void()> error);

@ -52,7 +54,11 @@ private:
 	// Force frame position to be clamped to [0, duration] and monotonic.
 	[[nodiscard]] crl::time currentFramePosition() const;

-	[[nodiscard]] crl::time trackTime() const;
+	struct TrackTime {
+		crl::time worldNow = kTimeUnknown;
+		crl::time trackNow = kTimeUnknown;
+	};
+	[[nodiscard]] TrackTime trackTime() const;

 	const crl::weak_on_queue<VideoTrackObject> _weak;
 	const PlaybackOptions _options;
@ -62,13 +68,14 @@ private:
 	Shared *_shared = nullptr;

 	Stream _stream;
+	AudioMsgId _audioId;
 	bool _noMoreData = false;
 	FnMut<void(const Information &)> _ready;
 	Fn<void()> _error;
 	crl::time _startedTime = kTimeUnknown;
 	crl::time _startedPosition = kTimeUnknown;
 	mutable crl::time _previousFramePosition = kTimeUnknown;
-	rpl::variable<crl::time> _nextFrameDisplayPosition = kTimeUnknown;
+	rpl::variable<crl::time> _nextFrameDisplayTime = kTimeUnknown;

 	bool _queued = false;
 	base::ConcurrentTimer _readFramesTimer;
@ -80,12 +87,14 @@ VideoTrackObject::VideoTrackObject(
 	const PlaybackOptions &options,
 	not_null<Shared*> shared,
 	Stream &&stream,
+	const AudioMsgId &audioId,
 	FnMut<void(const Information &)> ready,
 	Fn<void()> error)
 : _weak(std::move(weak))
 , _options(options)
 , _shared(shared)
 , _stream(std::move(stream))
+, _audioId(audioId)
 , _ready(std::move(ready))
 , _error(std::move(error))
 , _readFramesTimer(_weak, [=] { readFrames(); }) {
@ -94,12 +103,7 @@ VideoTrackObject::VideoTrackObject(
 }

 rpl::producer<crl::time> VideoTrackObject::displayFrameAt() const {
-	return _nextFrameDisplayPosition.value(
-	) | rpl::map([=](crl::time displayPosition) {
-		return _startedTime
-			+ crl::time(std::round((displayPosition - _startedPosition)
-				/ _options.speed));
-	});
+	return _nextFrameDisplayTime.value();
 }

 void VideoTrackObject::process(Packet &&packet) {
@ -130,7 +134,7 @@ void VideoTrackObject::readFrames() {
 	if (interrupted()) {
 		return;
 	}
-	const auto state = _shared->prepareState(trackTime());
+	const auto state = _shared->prepareState(trackTime().trackNow);
 	state.match([&](Shared::PrepareFrame frame) {
 		if (readFrame(frame)) {
 			presentFrameIfNeeded();
@ -175,9 +179,12 @@ bool VideoTrackObject::readFrame(not_null<Frame*> frame) {
 }

 void VideoTrackObject::presentFrameIfNeeded() {
-	const auto presented = _shared->presentFrame(trackTime());
+	const auto time = trackTime();
+	const auto presented = _shared->presentFrame(time.trackNow);
 	if (presented.displayPosition != kTimeUnknown) {
-		_nextFrameDisplayPosition = presented.displayPosition;
+		const auto trackLeft = presented.displayPosition - time.trackNow;
+		_nextFrameDisplayTime = time.worldNow
+			+ crl::time(std::round(trackLeft / _options.speed));
 	}
 	queueReadFrames(presented.nextCheckDelay);
 }
@ -236,7 +243,7 @@ crl::time VideoTrackObject::currentFramePosition() const {

 bool VideoTrackObject::fillStateFromFrame() {
 	_startedPosition = currentFramePosition();
-	_nextFrameDisplayPosition = _startedPosition;
+	_nextFrameDisplayTime = _startedTime;
 	return (_startedPosition != kTimeUnknown);
 }

@ -261,11 +268,30 @@ void VideoTrackObject::callReady() {
 	base::take(_ready)({ data });
 }

-crl::time VideoTrackObject::trackTime() const {
-	return _startedPosition
-		+ crl::time((_startedTime != kTimeUnknown
-			? std::round((crl::now() - _startedTime) * _options.speed)
-			: 0.));
+VideoTrackObject::TrackTime VideoTrackObject::trackTime() const {
+	auto result = TrackTime();
+	const auto started = (_startedTime != kTimeUnknown);
+	if (!started) {
+		result.worldNow = crl::now();
+		result.trackNow = _startedPosition;
+		return result;
+	}
+
+	const auto correction = _audioId.playId()
+		? Media::Player::mixer()->getVideoTimeCorrection(_audioId)
+		: Media::Player::Mixer::TimeCorrection();
+	const auto knownValue = correction ? correction.audioPositionValue : 0;
+	const auto knownTime = correction
+		? correction.audioPositionTime
+		: _startedTime;
+	const auto worldNow = crl::now();
+	const auto sinceKnown = (worldNow - knownTime);
+
+	result.worldNow = worldNow;
+	result.trackNow = _startedPosition
+		+ knownValue
+		+ crl::time(std::round(sinceKnown * _options.speed));
+	return result;
 }

 void VideoTrackObject::interrupt() {
@ -419,6 +445,7 @@ not_null<VideoTrack::Frame*> VideoTrack::Shared::frameForPaint() {
 VideoTrack::VideoTrack(
 	const PlaybackOptions &options,
 	Stream &&stream,
+	const AudioMsgId &audioId,
 	FnMut<void(const Information &)> ready,
 	Fn<void()> error)
 : _streamIndex(stream.index)
@ -429,6 +456,7 @@ VideoTrack::VideoTrack(
 	options,
 	_shared.get(),
 	std::move(stream),
+	audioId,
 	std::move(ready),
 	std::move(error)) {
 }
--- a/Telegram/SourceFiles/media/streaming/media_streaming_video_track.h
+++ b/Telegram/SourceFiles/media/streaming/media_streaming_video_track.h
@ -23,6 +23,7 @@ public:
 	VideoTrack(
 		const PlaybackOptions &options,
 		Stream &&stream,
+		const AudioMsgId &audioId,
 		FnMut<void(const Information &)> ready,
 		Fn<void()> error);