2022-06-29 14:45:17 +03:00

506 lines
19 KiB
C#

// To enable SoundTouch library tempo change for audio frame shrinking when catching up:
// Define PHOTON_VOICE_SOUND_TOUCH_ENABLE
// Set PlayDelayConfig.TempoChangeHQ to true
// Add SoundTouch library https://gitlab.com/soundtouch/soundtouch
// Android: edit /Android-lib/jni/Android.mk:
// add ../../SoundTouchDLL/SoundTouchDLL.cpp to sources list
// add LOCAL_CFLAGS += -DDLL_EXPORTS
// Windows: http://soundtouch.surina.net/download.html
// Add SoundTouch library C# wrapper https://gitlab.com/soundtouch/soundtouch/-/blob/master/source/csharp-example/SoundTouch.cs
// Replace "SoundTouch.dll" with "soundtouch" in SoundTouch.cs
using System.Collections.Generic;
using System;
using System.Runtime.InteropServices;
namespace Photon.Voice
{
public interface IAudioOut<T>
{
bool IsPlaying { get; }
void Start(int frequency, int channels, int frameSamplesPerChannel);
void Flush();
void Stop();
void Push(T[] frame);
void Service();
int Lag { get; } // ms
}
public class AudioOutDelayControl
{
public class PlayDelayConfig
{
public PlayDelayConfig()
{
Low = 200;
High = 400;
Max = 1000;
SpeedUpPerc = 5;
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
TempoChangeHQ = false;
#endif
}
public int Low { get ; set; } // ms: (Target) Audio player initilizes the delay with this value on Start and after flush and moves to it during corrections
public int High { get; set; } // ms: Audio player tries to keep the delay below this value.
public int Max { get; set; } // ms: Audio player guarantees that the delay never exceeds this value.
public int SpeedUpPerc { get; set; } // playback speed-up to catch up the stream
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
public bool TempoChangeHQ { get; set; }
#endif
public PlayDelayConfig Clone()
{
return new PlayDelayConfig
{
Low = Low,
High = High,
Max = Max,
SpeedUpPerc = SpeedUpPerc,
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
TempoChangeHQ = TempoChangeHQ,
#endif
};
}
}
}
// Consumes audio frames via Push(), optionally resizes and writes (OutWrite) them to the output to keep constant delay
// between output playback position (OutPos) and input stream position (advanced with each write).
// Assumes output is always playing.
public abstract class AudioOutDelayControl<T> : AudioOutDelayControl, IAudioOut<T>
{
readonly int sizeofT = Marshal.SizeOf(default(T));
abstract public int OutPos { get; }
abstract public void OutCreate(int frequency, int channels, int bufferSamples);
abstract public void OutStart();
abstract public void OutWrite(T[] data, int offsetSamples);
const int TEMPO_UP_SKIP_GROUP = 6;
private int frameSamples;
private int frameSize;
protected int bufferSamples;
protected int frequency;
private int clipWriteSamplePos;
private int playSamplePosPrev;
private int sourceTimeSamplesPrev;
private int playLoopCount;
PlayDelayConfig playDelayConfig;
protected int channels;
private bool started;
private bool flushed = true;
private int targetDelaySamples;
private int upperTargetDelaySamples; // correct if higher: gradually move to target via input frames resampling
private int maxDelaySamples; // set delay to this value if delay is higher
private const int NO_PUSH_TIMEOUT_MS = 100; // should be greater than Push() call interval
int lastPushTime = Environment.TickCount - NO_PUSH_TIMEOUT_MS;
protected readonly ILogger logger;
protected readonly string logPrefix;
private readonly bool debugInfo;
readonly bool processInService = false; // enqueue frame in Push() in process it in Service(), otherwise process directly in Push()
T[] zeroFrame;
T[] resampledFrame;
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
soundtouch.SoundTouch st;
#endif
AudioUtil.TempoUp<T> tempoUp;
bool tempoChangeHQ; // true if library is available
public AudioOutDelayControl(bool processInService, PlayDelayConfig playDelayConfig, ILogger logger, string logPrefix, bool debugInfo)
{
this.processInService = processInService;
// make sure that settings are not mutable
this.playDelayConfig = playDelayConfig.Clone();
this.logger = logger;
this.logPrefix = logPrefix;
this.debugInfo = debugInfo;
}
public int Lag { get { return (int)((this.clipWriteSamplePos - (this.started ? (float)this.playLoopCount * this.bufferSamples + this.OutPos : 0.0f)) * 1000 / frequency); } }
public bool IsFlushed
{
get { return !started || this.flushed; }
}
public bool IsPlaying
{
get { return !IsFlushed && (Environment.TickCount - lastPushTime < NO_PUSH_TIMEOUT_MS); }
}
public void Start(int frequency, int channels, int frameSamples)
{
//frequency = (int)(frequency * 1.2); // underrun test
//frequency = (int)(frequency / 1.2); // overrun test
this.frequency = frequency;
this.channels = channels;
// add 1 frame samples to make sure that we have something to play when delay set to 0
this.targetDelaySamples = playDelayConfig.Low * frequency / 1000 + frameSamples;
this.upperTargetDelaySamples = playDelayConfig.High * frequency / 1000 + frameSamples;
if (this.upperTargetDelaySamples < targetDelaySamples + 2 * frameSamples)
{
this.upperTargetDelaySamples = targetDelaySamples + 2 * frameSamples;
}
int resampleRampEndMs = playDelayConfig.Max;
this.maxDelaySamples = playDelayConfig.Max * frequency / 1000;
if (this.maxDelaySamples < this.upperTargetDelaySamples)
{
this.maxDelaySamples = this.upperTargetDelaySamples;
}
this.bufferSamples = 3 * this.maxDelaySamples; // make sure we have enough space
this.frameSamples = frameSamples;
this.frameSize = frameSamples * channels;
this.clipWriteSamplePos = this.targetDelaySamples;
if (this.framePool.Info != this.frameSize)
{
this.framePool.Init(this.frameSize);
}
this.zeroFrame = new T[this.frameSize];
this.resampledFrame = new T[this.frameSize];
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
if (this.playDelayConfig.TempoChangeHQ)
{
try
{
st = new soundtouch.SoundTouch();
st.Channels = (uint)channels;
st.SampleRate = (uint)frequency;
tempoChangeHQ = true;
}
catch (DllNotFoundException e)
{
logger.LogError("{0} SoundTouch library not found, disabling HQ tempo mode: {1}", this.logPrefix, e);
tempoChangeHQ = false;
}
}
#else
tempoChangeHQ = false;
#endif
if (!tempoChangeHQ)
{
tempoUp = new AudioUtil.TempoUp<T>();
}
OutCreate(frequency, channels, bufferSamples);
OutStart();
this.started = true;
this.logger.LogInfo("{0} Start: {1} bs={2} ch={3} f={4} tds={5} utds={6} mds={7} speed={8} tempo={9}", this.logPrefix, sizeofT == 2 ? "short" : "float", bufferSamples, channels, frequency, targetDelaySamples, upperTargetDelaySamples, maxDelaySamples, playDelayConfig.SpeedUpPerc, tempoChangeHQ ? "HQ" : "LQ");
}
Queue<T[]> frameQueue = new Queue<T[]>();
public const int FRAME_POOL_CAPACITY = 50;
PrimitiveArrayPool<T> framePool = new PrimitiveArrayPool<T>(FRAME_POOL_CAPACITY, "AudioOutDelayControl");
bool catchingUp = false;
bool processFrame(T[] frame, int playSamplePos)
{
var lagSamples = this.clipWriteSamplePos - playSamplePos;
if (!this.flushed)
{
if (lagSamples > maxDelaySamples)
{
if (this.debugInfo)
{
this.logger.LogDebug("{0} overrun {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
this.clipWriteSamplePos = playSamplePos + maxDelaySamples;
lagSamples = maxDelaySamples;
}
else if (lagSamples < 0)
{
if (this.debugInfo)
{
this.logger.LogDebug("{0} underrun {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
this.clipWriteSamplePos = playSamplePos + targetDelaySamples;
lagSamples = targetDelaySamples;
}
}
if (frame == null) // flush signalled
{
this.flushed = true;
if (this.debugInfo)
{
this.logger.LogDebug("{0} stream flush pause {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
if (catchingUp)
{
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
if (tempoChangeHQ)
{
st.Flush();
writeTempoHQ();
}
#endif
catchingUp = false;
if (this.debugInfo)
{
this.logger.LogDebug("{0} stream sync reset {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
}
return true;
}
else
{
if (this.flushed)
{
this.clipWriteSamplePos = playSamplePos + targetDelaySamples;
lagSamples = targetDelaySamples;
this.flushed = false;
if (this.debugInfo)
{
this.logger.LogDebug("{0} stream unpause {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
}
}
// starting catching up
if (lagSamples > upperTargetDelaySamples && !catchingUp)
{
if (!tempoChangeHQ)
{
tempoUp.Begin(channels, playDelayConfig.SpeedUpPerc, TEMPO_UP_SKIP_GROUP);
}
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
else
{
st.Clear();
var tempo = (float)(100 + playDelayConfig.SpeedUpPerc) / 100;
st.Tempo = tempo;
}
#endif
catchingUp = true;
if (this.debugInfo)
{
this.logger.LogDebug("{0} stream sync started {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
}
// finishing catching up
bool frameIsWritten = false; // first frame after switching from catching up requires special processing to flush TempoUp (the end of skipping wave removed if required)
if (lagSamples <= targetDelaySamples && catchingUp)
{
if (!tempoChangeHQ)
{
int skipSamples = tempoUp.End(frame);
int resampledLenSamples = frame.Length / channels - skipSamples;
Buffer.BlockCopy(frame, skipSamples * channels * sizeofT, resampledFrame, 0, resampledLenSamples * channels * sizeofT);
writeResampled(resampledFrame, resampledLenSamples);
frameIsWritten = true;
}
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
else
{
st.Flush();
writeTempoHQ();
st.Clear();
}
#endif
catchingUp = false;
if (this.debugInfo)
{
this.logger.LogDebug("{0} stream sync finished {1} {2} {3} {4} {5}", this.logPrefix, upperTargetDelaySamples, lagSamples, playSamplePos, this.clipWriteSamplePos, playSamplePos + targetDelaySamples);
}
}
if (frameIsWritten)
{
return false;
}
if (catchingUp)
{
if (!tempoChangeHQ)
{
int resampledLenSamples = tempoUp.Process(frame, resampledFrame);
writeResampled(resampledFrame, resampledLenSamples);
}
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
else
{
if (sizeofT == 2)
{
st.PutSamplesI16(frame as short[], (uint)(frame.Length / channels));
}
else
{
st.PutSamples(frame as float[], (uint)(frame.Length / channels));
}
lagSamples -= writeTempoHQ();
}
#endif
}
else
{
OutWrite(frame, this.clipWriteSamplePos % this.bufferSamples);
this.clipWriteSamplePos += frame.Length / this.channels;
}
return false;
}
// should be called in Update thread
public void Service()
{
if (this.started)
{
// cache PlayerPos
int sourceTimeSamples = OutPos;
// loop detection (pcmsetpositioncallback not called when clip loops)
if (sourceTimeSamples < sourceTimeSamplesPrev)
{
playLoopCount++;
}
sourceTimeSamplesPrev = sourceTimeSamples;
var playSamplePos = this.playLoopCount * this.bufferSamples + sourceTimeSamples;
if (processInService)
{
lock (this.frameQueue)
{
while (frameQueue.Count > 0)
{
var frame = frameQueue.Dequeue();
if (processFrame(frame, playSamplePos))
{
return; // flush signalled
}
framePool.Release(frame, frame.Length);
}
}
}
// clear played back buffer segment
var clearStart = this.playSamplePosPrev;
var clearMin = playSamplePos - this.bufferSamples;
if (clearStart < clearMin)
{
clearStart = clearMin;
}
// round up
var framesToClear = (playSamplePos - clearStart - 1) / this.frameSamples + 1;
for (var offset = playSamplePos - framesToClear * this.frameSamples; offset < playSamplePos; offset += this.frameSamples)
{
var o = offset % this.bufferSamples;
if (o < 0) o += this.bufferSamples;
OutWrite(this.zeroFrame, o);
}
this.playSamplePosPrev = playSamplePos;
}
}
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
int writeTempoHQ()
{
int resampledLenSamples;
if (sizeofT == 2)
{
resampledLenSamples = (int)st.ReceiveSamplesI16(resampledFrame as short[], (uint)(resampledFrame.Length / channels));
}
else
{
resampledLenSamples = (int)st.ReceiveSamples(resampledFrame as float[], (uint)(resampledFrame.Length / channels));
}
return writeResampled(resampledFrame, resampledLenSamples);
}
#endif
int writeResampled(T[] f, int resampledLenSamples)
{
// zero not used part of the buffer because SetData applies entire frame
// if this frame is the last, grabage may be played back
var tailSize = (f.Length - resampledLenSamples * channels) * sizeofT;
if (tailSize > 0) // it may be 0 what BlockCopy does not like
{
Buffer.BlockCopy(this.zeroFrame, 0, f, resampledLenSamples * channels * sizeofT, tailSize);
}
OutWrite(f, this.clipWriteSamplePos % this.bufferSamples);
this.clipWriteSamplePos += resampledLenSamples;
return resampledLenSamples;
}
// may be called on any thread
public void Push(T[] frame)
{
if (!this.started)
{
return;
}
if (frame.Length == 0)
{
return;
}
if (frame.Length != this.frameSize)
{
logger.LogError("{0} audio frames are not of size: {1} != {2}", this.logPrefix, frame.Length, this.frameSize);
return;
}
if (processInService)
{
T[] b = framePool.AcquireOrCreate();
Buffer.BlockCopy(frame, 0, b, 0, frame.Length * sizeofT);
lock (this.frameQueue)
{
this.frameQueue.Enqueue(b);
}
}
else
{
processFrame(frame, this.playLoopCount * this.bufferSamples + OutPos);
}
lastPushTime = Environment.TickCount;
}
public void Flush()
{
if (processInService)
{
lock (this.frameQueue)
{
this.frameQueue.Enqueue(null);
}
}
else
{
processFrame(null, this.playLoopCount * this.bufferSamples + OutPos);
}
}
virtual public void Stop()
{
#if PHOTON_VOICE_SOUND_TOUCH_ENABLE
if (st != null)
{
st.Dispose();
st = null;
}
#endif
this.started = false;
}
}
}