2022-06-29 14:45:17 +03:00

1121 lines
42 KiB
C#

using System;
namespace Photon.Voice
{
/// <summary>Collection of Audio Utility functions and classes.</summary>
public static partial class AudioUtil
{
/// <summary>Resample audio data so that the complete src buffer fits into dstCount samples in the dst buffer.</summary>
/// This implements a primitive nearest-neighbor resampling algorithm for an arbitrary number of channels.
/// <param name="src">Source buffer.</param>
/// <param name="dst">Destination buffer.</param>
/// <param name="dstCount">Target size of destination buffer (in samples per channel).</param>
/// <param name="channels">Number of channels in the signal (1=mono, 2=stereo). Must be > 0.</param>
public static void Resample<T>(T[] src, T[] dst, int dstCount, int channels)
{
if (channels == 1)
{
for (int i = 0; i < dstCount; i++)
{
dst[i] = src[i * src.Length / dstCount];
}
}
else if (channels == 2)
{
for (int i = 0; i < dstCount / 2; i++)
{
var srcI = i * src.Length / dstCount;
var dstCh0I = i * 2;
var srcCh0I = srcI * 2;
dst[dstCh0I++] = src[srcCh0I++];
dst[dstCh0I] = src[srcCh0I];
}
}
else
{
for (int i = 0; i < dstCount / channels; i++)
{
var srcI = i * src.Length / dstCount;
var dstCh0I = i * channels;
var srcCh0I = srcI * channels;
for (int ch = 0; ch < channels; ch++)
{
dst[dstCh0I++] = src[srcCh0I++];
}
}
}
}
public static void Resample<T>(T[] src, int srcOffset, int srcCount, T[] dst, int dstOffset, int dstCount, int channels)
{
if (channels == 1)
{
for (int i = 0; i < dstCount; i++)
{
dst[dstOffset + i] = src[srcOffset + i * srcCount / dstCount];
}
}
else if (channels == 2)
{
for (int i = 0; i < dstCount / 2; i++)
{
var srcI = i * srcCount / dstCount;
var dstCh0I = i * 2;
var srcCh0I = srcI * 2;
dst[dstOffset + dstCh0I++] = src[srcOffset + srcCh0I++];
dst[dstOffset + dstCh0I] = src[srcOffset + srcCh0I];
}
}
else
{
for (int i = 0; i < dstCount / channels; i++)
{
var srcI = i * srcCount / dstCount;
var dstCh0I = i * channels;
var srcCh0I = srcI * channels;
for (int ch = 0; ch < channels; ch++)
{
dst[dstOffset + dstCh0I++] = src[srcOffset + srcCh0I++];
}
}
}
}
// does not sum channel values but only maps channel to channel (it's not possible to apply math to generic type T)
public static void Resample<T>(T[] src, int srcOffset, int srcCount, int srcChannels, T[] dst, int dstOffset, int dstCount, int dstChannels)
{
if (srcChannels == dstChannels)
{
Resample<T>(src, srcOffset, srcCount, dst, dstOffset, dstCount, dstChannels);
return;
}
if (srcChannels == 1 && dstChannels == 2)
{
for (int i = 0, j = 0; i < dstCount / 2; i++)
{
var v = src[srcOffset + i * srcCount * 2 / dstCount];
dst[dstOffset + j++] = v;
dst[dstOffset + j++] = v;
}
}
else if (srcChannels == 2 && dstChannels == 1)
{
for (int i = 0; i < dstCount; i++)
{
dst[dstOffset + i] = src[srcOffset + i * srcCount / dstCount / 2 * 2];
}
}
else
{
for (int i = 0, j = 0; i < dstCount / dstChannels; i++)
{
var srcI = srcOffset + i * srcCount * dstChannels / dstCount / srcChannels * srcChannels;
if (srcChannels >= dstChannels)
{
for (int ch = 0; ch < dstChannels; ch++)
{
dst[dstOffset + j++] = src[srcI + ch];
}
}
else
{
for (int ch = 0; ch < srcChannels; ch++)
{
dst[dstOffset + j++] = src[srcI + ch];
}
j += dstChannels - srcChannels;
}
}
}
}
/// <summary>Resample audio data so that the complete src buffer fits into dstCount samples in the dst buffer,
/// and convert short to float samples along the way.</summary>
/// This implements a primitive nearest-neighbor resampling algorithm for an arbitrary number of channels.
/// <param name="src">Source buffer.</param>
/// <param name="dst">Destination buffer.</param>
/// <param name="dstCount">Target size of destination buffer (in samples per channel).</param>
/// <param name="channels">Number of channels in the signal (1=mono, 2=stereo). Must be > 0.</param>
public static void ResampleAndConvert(short[] src, float[] dst, int dstCount, int channels)
{
if (channels == 1)
{
for (int i = 0; i < dstCount; i++)
{
dst[i] = src[i * src.Length / dstCount] / (float)short.MaxValue;
}
}
else if (channels == 2)
{
for (int i = 0; i < dstCount / 2; i++)
{
var srcI = i * src.Length / dstCount;
var dstCh0I = i * 2;
var srcCh0I = srcI * 2;
dst[dstCh0I++] = src[srcCh0I++] / (float)short.MaxValue;
dst[dstCh0I] = src[srcCh0I] / (float)short.MaxValue;
}
}
else
{
for (int i = 0; i < dstCount / channels; i++)
{
var srcI = i * src.Length / dstCount;
var dstCh0I = i * channels;
var srcCh0I = srcI * channels;
for (int ch = 0; ch < channels; ch++)
{
dst[dstCh0I++] = src[srcCh0I++] / (float)short.MaxValue;
}
}
}
}
/// <summary>Resample audio data so that the complete src buffer fits into dstCount samples in the dst buffer,
/// and convert float to short samples along the way.</summary>
/// This implements a primitive nearest-neighbor resampling algorithm for an arbitrary number of channels.
/// <param name="src">Source buffer.</param>
/// <param name="dst">Destination buffer.</param>
/// <param name="dstCount">Target size of destination buffer (in samples per channel).</param>
/// <param name="channels">Number of channels in the signal (1=mono, 2=stereo). Must be > 0.</param>
public static void ResampleAndConvert(float[] src, short[] dst, int dstCount, int channels)
{
if (channels == 1)
{
for (int i = 0; i < dstCount; i++)
{
dst[i] = (short)(src[i * src.Length / dstCount] * (float)short.MaxValue);
}
}
else if (channels == 2)
{
for (int i = 0; i < dstCount / 2; i++)
{
var srcI = i * src.Length / dstCount;
var dstCh0I = i * 2;
var srcCh0I = srcI * 2;
dst[dstCh0I++] = (short)(src[srcCh0I++] * (float)short.MaxValue);
dst[dstCh0I] = (short)(src[srcCh0I] * (float)short.MaxValue);
}
}
else
{
for (int i = 0; i < dstCount / channels; i++)
{
var srcI = i * src.Length / dstCount;
var dstCh0I = i * channels;
var srcCh0I = srcI * channels;
for (int ch = 0; ch < channels; ch++)
{
dst[dstCh0I++] = (short)(src[srcCh0I++] * (float)short.MaxValue);
}
}
}
}
/// <summary>Convert audio buffer from float to short samples.</summary>
/// <param name="src">Source buffer.</param>
/// <param name="dst">Destination buffer.</param>
/// <param name="dstCount">Size of destination buffer (in total samples), source buffer must be of same length or longer.</param>
public static void Convert(float[] src, short[] dst, int dstCount)
{
for (int i = 0; i < dstCount; i++)
{
dst[i] = (short)(src[i] * (float)short.MaxValue);
}
}
/// <summary>Convert audio buffer from short to float samples.</summary>
/// <param name="src">Source buffer.</param>
/// <param name="dst">Destination buffer.</param>
/// <param name="dstCount">Size of destination buffer (in total samples), source buffer must be of same length or longer.</param>
public static void Convert(short[] src, float[] dst, int dstCount)
{
for (int i = 0; i < dstCount; i++)
{
dst[i] = src[i] / (float)short.MaxValue;
}
}
/// <summary>Convert audio buffer with arbitrary number of channels to stereo.</summary>
/// For mono sources (srcChannels==1), the signal will be copied to both Left and Right stereo channels.
/// For all others, the first two available channels will be used, any other channels will be discarded.
/// <param name="src">Source buffer.</param>
/// <param name="dst">Destination buffer.</param>
/// <param name="srcChannels">Number of (interleaved) channels in src.</param>
public static void ForceToStereo<T>(T[] src, T[] dst, int srcChannels)
{
for (int i = 0, j = 0; j < dst.Length - 1; i += srcChannels, j += 2)
{
dst[j] = src[i];
dst[j + 1] = srcChannels > 1 ? src[i + 1] : src[i];
}
}
internal static string tostr<T>(T[] x, int lim = 10)
{
System.Text.StringBuilder b = new System.Text.StringBuilder();
for (var i = 0; i < (x.Length < lim ? x.Length : lim); i++)
{
b.Append("-");
b.Append(x[i]);
}
return b.ToString();
}
public class TempoUp<T>
{
readonly int sizeofT = System.Runtime.InteropServices.Marshal.SizeOf(default(T));
int channels;
int skipGroup;
int skipFactor;
int sign = 0;
int waveCnt;
bool skipping;
public void Begin(int channels, int changePerc, int skipGroup)
{
this.channels = channels;
this.skipFactor = 100 / changePerc;
this.skipGroup = skipGroup;
sign = 0;
skipping = false;
waveCnt = 0;
}
public int Process(T[] s, T[] d)
{
if (sizeofT == 2)
{
return processShort(s as short[], d as short[]);
}
else
{
return processFloat(s as float[], d as float[]);
}
}
// returns the number of samples required to skip in order to complete currently skipping wave
public int End(T[] s)
{
if (!skipping)
{
return 0;
}
if (sizeofT == 2)
{
return endShort(s as short[]);
}
else
{
return endFloat(s as float[]);
}
}
int processFloat(float[] s, float[] d)
{
int dPos = 0;
if (channels == 1)
{
for (int i = 0; i < s.Length; i++)
{
if (s[i] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
sign = 0;
}
if (!skipping)
{
d[dPos++] = s[i];
}
}
}
else if (channels == 2)
{
for (int i = 0; i < s.Length; i += 2)
{
if (s[i] + s[i + 1] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
sign = 0;
}
if (!skipping)
{
d[dPos++] = s[i];
d[dPos++] = s[i + 1];
}
}
}
else
{
for (int i = 0; i < s.Length; i += channels)
{
var x = s[i] + s[i + 1];
for (int j = 2; i < channels; j++)
{
x += s[i + j];
}
if (x < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
sign = 0;
}
if (!skipping)
{
d[dPos++] = s[i];
d[dPos++] = s[i + 1];
for (int j = 2; i < channels; j++)
{
d[dPos++] += s[i + j];
}
}
}
}
return dPos / channels;
}
public int endFloat(float[] s)
{
if (channels == 1)
{
for (int i = 0; i < s.Length; i++)
{
if (s[i] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
if (!skipping)
{
return i;
}
sign = 0;
}
}
}
else if (channels == 2)
{
for (int i = 0; i < s.Length; i += 2)
{
if (s[i] + s[i + 1] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
if (!skipping)
{
return i / 2;
}
sign = 0;
}
}
}
else
{
for (int i = 0; i < s.Length; i += channels)
{
var x = s[i] + s[i + 1];
for (int j = 2; i < channels; j++)
{
x += s[i + j];
}
if (x < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
if (!skipping)
{
return i / channels;
}
sign = 0;
}
}
}
return 0;
}
int processShort(short[] s, short[] d)
{
int dPos = 0;
if (channels == 1)
{
for (int i = 0; i < s.Length; i++)
{
if (s[i] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
sign = 0;
}
if (!skipping)
{
d[dPos++] = s[i];
}
}
}
else if (channels == 2)
{
for (int i = 0; i < s.Length; i += 2)
{
if (s[i] + s[i + 1] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
sign = 0;
}
if (!skipping)
{
d[dPos++] = s[i];
d[dPos++] = s[i + 1];
}
}
}
else
{
for (int i = 0; i < s.Length; i += channels)
{
var x = s[i] + s[i + 1];
for (int j = 2; i < channels; j++)
{
x += s[i + j];
}
if (x < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
sign = 0;
}
if (!skipping)
{
d[dPos++] = s[i];
d[dPos++] = s[i + 1];
for (int j = 2; i < channels; j++)
{
d[dPos++] += s[i + j];
}
}
}
}
return dPos / channels;
}
public int endShort(short[] s)
{
if (channels == 1)
{
for (int i = 0; i < s.Length; i++)
{
if (s[i] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
if (!skipping)
{
return i;
}
sign = 0;
}
}
}
else if (channels == 2)
{
for (int i = 0; i < s.Length; i += 2)
{
if (s[i] + s[i + 1] < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
if (!skipping)
{
return i / 2;
}
sign = 0;
}
}
}
else
{
for (int i = 0; i < s.Length; i += channels)
{
var x = s[i] + s[i + 1];
for (int j = 2; i < channels; j++)
{
x += s[i + j];
}
if (x < 0)
{
sign = -1;
}
else if (sign < 0)
{
waveCnt++;
skipping = waveCnt % (skipGroup * skipFactor) < skipGroup;
if (!skipping)
{
return i / channels;
}
sign = 0;
}
}
}
return 0;
}
}
/// <summary>Sample-rate conversion Audio Processor.</summary>
/// This processor converts the sample-rate of the source stream. Internally, it uses <see cref="AudioUtil.Resample{T}(T[], T[], int, int)"></see>.
public class Resampler<T> : IProcessor<T>
{
protected T[] frameResampled;
int channels;
/// <summary>Create a new Resampler instance.</summary>
/// <param name="dstSize">Frame size of a destination frame. Determins output rate.</param>
/// <param name="channels">Number of audio channels expected in both in- and output.</param>
public Resampler(int dstSize, int channels)
{
this.frameResampled = new T[dstSize];
this.channels = channels;
}
public T[] Process(T[] buf)
{
AudioUtil.Resample(buf, this.frameResampled, this.frameResampled.Length, channels);
return this.frameResampled;
}
public void Dispose()
{
}
}
/// <summary>Audio Level Metering interface.</summary>
public interface ILevelMeter
{
/// <summary>
/// Average amplitude value over last half second.
/// </summary>
float CurrentAvgAmp { get; }
/// <summary>
/// Maximum amplitude value over last half second sec.
/// </summary>
float CurrentPeakAmp
{
get;
}
/// <summary>
/// Average of CurrentPeakAmps since last reset.
/// </summary>
float AccumAvgPeakAmp { get; }
/// <summary>
/// Reset <see cref="AccumAvgPeakAmp"></see>.
/// </summary>
void ResetAccumAvgPeakAmp();
}
/// <summary>Dummy Audio Level Meter that doesn't actually do anything.</summary>
public class LevelMeterDummy : ILevelMeter
{
public float CurrentAvgAmp { get { return 0; } }
public float CurrentPeakAmp { get { return 0; } }
public float AccumAvgPeakAmp { get { return 0; } }
public void ResetAccumAvgPeakAmp() { }
}
/// <summary>
/// Audio Level Meter.
/// </summary>
abstract public class LevelMeter<T> : IProcessor<T>, ILevelMeter
{
// sum of all values in buffer
protected float ampSum;
// max of values from start buffer to current pos
protected float ampPeak;
protected int bufferSize;
protected float[] prevValues;
protected int prevValuesHead;
protected float accumAvgPeakAmpSum;
protected int accumAvgPeakAmpCount;
protected float currentPeakAmp;
protected float norm;
internal LevelMeter(int samplingRate, int numChannels)
{
this.bufferSize = samplingRate * numChannels / 2; // 1/2 sec
this.prevValues = new float[this.bufferSize];
}
public float CurrentAvgAmp { get { return ampSum / this.bufferSize * norm; } }
public float CurrentPeakAmp
{
get { return currentPeakAmp * norm; }
protected set { currentPeakAmp = value / norm; }
}
public float AccumAvgPeakAmp { get { return this.accumAvgPeakAmpCount == 0 ? 0 : accumAvgPeakAmpSum / this.accumAvgPeakAmpCount * norm; } }
public void ResetAccumAvgPeakAmp() { this.accumAvgPeakAmpSum = 0; this.accumAvgPeakAmpCount = 0; ampPeak = 0; }
public abstract T[] Process(T[] buf);
public void Dispose()
{
}
}
/// <summary>
/// LevelMeter specialization for float audio.
/// </summary>
public class LevelMeterFloat : LevelMeter<float>
{
/// <summary>Create new LevelMeterFloat instance.</summary>
/// <param name="samplingRate">Sampling rate of the audio signal (in Hz).</param>
/// <param name="numChannels">Number of channels in the audio signal.</param>
public LevelMeterFloat(int samplingRate, int numChannels) : base(samplingRate, numChannels)
{
norm = 1.0f;
}
public override float[] Process(float[] buf)
{
foreach (var v in buf)
{
var a = v;
if (a < 0)
{
a = -a;
}
ampSum = ampSum + a - this.prevValues[this.prevValuesHead];
this.prevValues[this.prevValuesHead] = a;
if (ampPeak < a)
{
ampPeak = a;
}
if (this.prevValuesHead == 0)
{
currentPeakAmp = ampPeak;
ampPeak = 0;
accumAvgPeakAmpSum += currentPeakAmp;
accumAvgPeakAmpCount++;
}
this.prevValuesHead = (this.prevValuesHead + 1) % this.bufferSize;
}
return buf;
}
}
/// <summary>
/// LevelMeter specialization for short audio.
/// </summary>
public class LevelMeterShort : LevelMeter<short>
{
/// <summary>Create new LevelMeterShort instance.</summary>
/// <param name="samplingRate">Sampling rate of the audio signal (in Hz).</param>
/// <param name="numChannels">Number of channels in the audio signal.</param>
public LevelMeterShort(int samplingRate, int numChannels) : base(samplingRate, numChannels)
{
norm = 1.0f / short.MaxValue;
}
public override short[] Process(short[] buf)
{
foreach (var v in buf)
{
var a = v;
if (a < 0)
{
a = (short)-a;
}
ampSum = ampSum + a - this.prevValues[this.prevValuesHead];
this.prevValues[this.prevValuesHead] = a;
if (ampPeak < a)
{
ampPeak = a;
}
if (this.prevValuesHead == 0)
{
currentPeakAmp = ampPeak;
ampPeak = 0;
accumAvgPeakAmpSum += currentPeakAmp;
accumAvgPeakAmpCount++;
}
this.prevValuesHead = (this.prevValuesHead + 1) % this.bufferSize;
}
return buf;
}
}
/// <summary>Voice Activity Detector interface.</summary>
public interface IVoiceDetector
{
/// <summary>If true, voice detection enabled.</summary>
bool On { get; set; }
/// <summary>Voice detected as soon as signal level exceeds threshold.</summary>
float Threshold { get; set; }
/// <summary>If true, voice detected.</summary>
bool Detected { get; }
/// <summary>Last time when switched to detected state.</summary>
DateTime DetectedTime { get; }
/// <summary>Called when switched to detected state.</summary>
event Action OnDetected;
/// <summary>Keep detected state during this time after signal level dropped below threshold.</summary>
int ActivityDelayMs { get; set; }
}
/// <summary>Calibration Utility for Voice Detector</summary>.
/// Using this audio processor, you can calibrate the <see cref="IVoiceDetector.Threshold"></see>.
public class VoiceDetectorCalibration<T> : IProcessor<T>
{
IVoiceDetector voiceDetector;
ILevelMeter levelMeter;
int valuesPerSec;
public bool IsCalibrating { get { return calibrateCount > 0; } }
protected int calibrateCount;
private Action<float> onCalibrated;
/// <summary>Create new VoiceDetectorCalibration instance.</summary>
/// <param name="voiceDetector">Voice Detector to calibrate.</param>
/// <param name="levelMeter">Level Meter to look at for calibration.</param>
/// <param name="samplingRate">Sampling rate of the audio signal (in Hz).</param>
/// <param name="channels">Number of channels in the audio signal.</param>
public VoiceDetectorCalibration(IVoiceDetector voiceDetector, ILevelMeter levelMeter, int samplingRate, int channels)
{
this.valuesPerSec = samplingRate * channels;
this.voiceDetector = voiceDetector;
this.levelMeter = levelMeter;
}
/// <summary>Start calibration.</summary>
/// <param name="durationMs">Duration of the calibration procedure (in milliseconds).</param>
/// <param name="onCalibrated">Optional callback that is called after calibration is complete.</param>
/// <remarks>
/// This activates the Calibration process.
/// It will reset the given LevelMeter's AccumAvgPeakAmp (accumulated average peak amplitude),
/// and when the duration has passed, use it for the VoiceDetector's detection threshold.
/// </remarks>
public void Calibrate(int durationMs, Action<float> onCalibrated = null)
{
this.calibrateCount = valuesPerSec * durationMs / 1000;
this.onCalibrated = onCalibrated;
levelMeter.ResetAccumAvgPeakAmp();
}
public T[] Process(T[] buf)
{
if (this.calibrateCount != 0)
{
this.calibrateCount -= buf.Length;
if (this.calibrateCount <= 0)
{
this.calibrateCount = 0;
this.voiceDetector.Threshold = levelMeter.AccumAvgPeakAmp * 2;
if (this.onCalibrated != null) this.onCalibrated(this.voiceDetector.Threshold);
}
}
return buf;
}
public void Dispose()
{
}
}
/// <summary>Dummy VoiceDetector that doesn't actually do anything.</summary>
public class VoiceDetectorDummy : IVoiceDetector
{
public bool On { get { return false; } set { } }
public float Threshold { get { return 0; } set { } }
public bool Detected { get { return false; } }
public int ActivityDelayMs { get { return 0; } set { } }
public DateTime DetectedTime { get; private set; }
public event Action OnDetected { add { } remove { } } // Disabling Warning CS0067 The event 'AudioUtil.VoiceDetectorDummy.OnDetected' is never used.
}
/// <summary>
/// Simple voice activity detector triggered by signal level.
/// </summary>
abstract public class VoiceDetector<T> : IProcessor<T>, IVoiceDetector
{
/// <summary>If true, voice detection enabled.</summary>
public bool On { get; set; }
/// <summary>Voice detected as soon as signal level exceeds threshold.</summary>
public float Threshold { get { return threshold * norm; } set { threshold = value / norm; } }
protected float norm;
protected float threshold;
bool detected;
/// <summary>If true, voice detected.</summary>
public bool Detected
{
get { return detected; }
protected set
{
if (detected != value)
{
detected = value; DetectedTime = DateTime.Now;
if (detected && OnDetected != null) OnDetected();
}
}
}
/// <summary>Last time when switched to detected state.</summary>
public DateTime DetectedTime { get; private set; }
/// <summary>Keep detected state during this time after signal level dropped below threshold.</summary>
public int ActivityDelayMs
{
get { return this.activityDelay; }
set
{
this.activityDelay = value;
this.activityDelayValuesCount = value * valuesCountPerSec / 1000;
}
}
/// <summary>Called when switched to detected state.</summary>
public event Action OnDetected;
protected int activityDelay;
protected int autoSilenceCounter = 0;
protected int valuesCountPerSec;
protected int activityDelayValuesCount;
internal VoiceDetector(int samplingRate, int numChannels)
{
this.valuesCountPerSec = samplingRate * numChannels;
this.ActivityDelayMs = 500;
this.On = true;
}
public abstract T[] Process(T[] buf);
public void Dispose()
{
}
}
/// <summary>VoiceDetector specialization for float audio.</summary>
public class VoiceDetectorFloat : VoiceDetector<float>
{
/// <summary>Create a new VoiceDetectorFloat instance.</summary>
/// <param name="samplingRate">Sampling rate of the audio signal (in Hz).</param>
/// <param name="numChannels">Number of channels in the audio signal.</param>
public VoiceDetectorFloat(int samplingRate, int numChannels) : base(samplingRate, numChannels)
{
norm = 1f;
}
public override float[] Process(float[] buffer)
{
if (this.On)
{
foreach (var s in buffer)
{
if (s > this.threshold)
{
this.Detected = true;
this.autoSilenceCounter = 0;
}
else
{
this.autoSilenceCounter++;
}
}
if (this.autoSilenceCounter > this.activityDelayValuesCount)
{
this.Detected = false;
}
return Detected ? buffer : null;
}
else
{
return buffer;
}
}
}
/// <summary>VoiceDetector specialization for float audio.</summary>
public class VoiceDetectorShort : VoiceDetector<short>
{
/// <summary>Create a new VoiceDetectorFloat instance</summary>
/// <param name="samplingRate">Sampling rate of the audio signal (in Hz).</param>
/// <param name="numChannels">Number of channels in the audio signal.</param>
public VoiceDetectorShort(int samplingRate, int numChannels) : base(samplingRate, numChannels)
{
norm = 1.0f / short.MaxValue;
}
public override short[] Process(short[] buffer)
{
if (this.On)
{
foreach (var s in buffer)
{
if (s > this.threshold)
{
this.Detected = true;
this.autoSilenceCounter = 0;
}
else
{
this.autoSilenceCounter++;
}
}
if (this.autoSilenceCounter > this.activityDelayValuesCount)
{
this.Detected = false;
}
return Detected ? buffer : null;
}
else
{
return buffer;
}
}
}
/// <summary>Utility Audio Processor Voice Detection Calibration.</summary>
/// Encapsulates level meter, voice detector and voice detector calibrator in single instance.
public class VoiceLevelDetectCalibrate<T> : IProcessor<T>
{
/// <summary>The LevelMeter in use.</summary>
public ILevelMeter LevelMeter { get; private set; }
/// <summary>The VoiceDetector in use</summary>
public IVoiceDetector VoiceDetector { get; private set; }
/// <summary>The VoiceDetectorCalibration in use.</summary>
VoiceDetectorCalibration<T> calibration;
/// <summary>Create new VoiceLevelDetectCalibrate instance</summary>
/// <param name="samplingRate">Sampling rate of the audio signal (in Hz).</param>
/// <param name="channels">Number of channels in the audio signal.</param>
public VoiceLevelDetectCalibrate(int samplingRate, int channels)
{
var x = new T[1];
if (x[0] is float)
{
LevelMeter = new LevelMeterFloat(samplingRate, channels);
VoiceDetector = new VoiceDetectorFloat(samplingRate, channels);
}
else if (x[0] is short)
{
LevelMeter = new LevelMeterShort(samplingRate, channels);
VoiceDetector = new VoiceDetectorShort(samplingRate, channels);
}
else
{
throw new Exception("VoiceLevelDetectCalibrate: type not supported: " + x[0].GetType());
}
calibration = new VoiceDetectorCalibration<T>(VoiceDetector, LevelMeter, samplingRate, channels);
}
/// <summary>Start calibration</summary>
/// <param name="durationMs">Duration of the calibration procedure (in milliseconds).</param>
/// <param name="onCalibrated">Called when calibration is complete. Parameter is new threshold value.</param>
/// This activates the Calibration process.
/// It will reset the given LevelMeter's AccumAvgPeakAmp (accumulated average peak amplitude),
/// and when the duration has passed, use it for the VoiceDetector's detection threshold.
public void Calibrate(int durationMs, Action<float> onCalibrated = null)
{
calibration.Calibrate(durationMs, onCalibrated);
}
public bool IsCalibrating { get { return calibration.IsCalibrating; } }
public T[] Process(T[] buf)
{
buf = (LevelMeter as IProcessor<T>).Process(buf);
buf = (calibration as IProcessor<T>).Process(buf);
buf = (VoiceDetector as IProcessor<T>).Process(buf);
return buf;
}
public void Dispose()
{
(LevelMeter as IProcessor<T>).Dispose();
(VoiceDetector as IProcessor<T>).Dispose();
calibration.Dispose();
}
}
}
}