1
0
forked from cgvr/DeltaVR

cafe waiter npc and mic detect speech more

This commit is contained in:
2026-02-21 22:06:23 +02:00
parent dbd2cdca0d
commit 8a3f10a9a2
5 changed files with 113 additions and 25 deletions

View File

@@ -61,6 +61,7 @@ public class MicrophoneStand : MonoBehaviour
private void OnPlayerSpeechUpdated(string text) private void OnPlayerSpeechUpdated(string text)
{ {
outputText.text = text; outputText.text = text;
OnPlayerFinishedSpeaking?.Invoke();
} }
private void OnPlayerSpeechFinished(string playerText) private void OnPlayerSpeechFinished(string playerText)

View File

@@ -62,6 +62,7 @@ public class FMODWhisperBridge : MonoBehaviour
// activation flag // activation flag
private bool isRecordingActivated = false; private bool isRecordingActivated = false;
private bool _skipOneFeedFrame = false;
private void Awake() private void Awake()
{ {
@@ -188,13 +189,15 @@ public class FMODWhisperBridge : MonoBehaviour
// Wire events // Wire events
_stream.OnSegmentUpdated += (seg) => _stream.OnSegmentUpdated += (seg) =>
{ {
if (IsSpeechMeaningful(seg.Result)) string cleanedText = PostProcessInput(seg.Result);
OnWhisperSegmentUpdated?.Invoke(seg.Result); if (!string.IsNullOrEmpty(cleanedText))
OnWhisperSegmentUpdated?.Invoke(cleanedText);
}; };
_stream.OnSegmentFinished += (seg) => _stream.OnSegmentFinished += (seg) =>
{ {
if (IsSpeechMeaningful(seg.Result)) string cleanedText = PostProcessInput(seg.Result);
OnWhisperSegmentFinished?.Invoke(seg.Result); if (!string.IsNullOrEmpty(cleanedText))
OnWhisperSegmentFinished?.Invoke(cleanedText);
}; };
whisper.useVad = useVadInStream; whisper.useVad = useVadInStream;
@@ -202,15 +205,28 @@ public class FMODWhisperBridge : MonoBehaviour
_stream.StartStream(); _stream.StartStream();
_streamStarted = true; _streamStarted = true;
// Unpause loopback if it's meant to play only while active
if (playLoopback && loopbackOnlyWhenActive && _playChannel.hasHandle())
_playChannel.setPaused(false);
// Prepare temp arrays roughly 100ms of audio // --- NEW: Clear the ring buffer and reset read pointer ---
EnsureTmpCapacity((rate / 10) * _nativeChannels); // Pause loopback while we clear (optional, but avoids clicks)
if (playLoopback && _playChannel.hasHandle())
_playChannel.setPaused(true);
// Clear buffer bytes
ClearRecordRingBuffer();
// Reset our read pointer to the current write head
_core.getRecordPosition(recordDriverId, out _lastRecordPos);
// Well skip feeding for one frame to guarantee a clean start
_skipOneFeedFrame = true;
// Unpause loopback if we want it active during recording
if (playLoopback && _playChannel.hasHandle() && (!loopbackOnlyWhenActive || isRecordingActivated))
_playChannel.setPaused(loopbackOnlyWhenActive ? false : _playChannel.getPaused(out var paused) == FMOD.RESULT.OK && paused ? false : false);
isRecordingActivated = true; isRecordingActivated = true;
Debug.Log("[FMOD→Whisper] Stream activated (Whisper started; FMOD was already recording)."); Debug.Log("[FMOD→Whisper] Stream activated (buffer cleared; reading from current head).");
} }
/// <summary> /// <summary>
@@ -269,36 +285,37 @@ public class FMODWhisperBridge : MonoBehaviour
IntPtr p1, p2; IntPtr p1, p2;
uint len1, len2; uint len1, len2;
var r = _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2); var r = _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
if (r != RESULT.OK) if (r != RESULT.OK)
{ {
// If lock fails, still advance last position to avoid spin
_lastRecordPos = recPos; _lastRecordPos = recPos;
return; return;
} }
try try
{ {
if (shouldFeed) if (shouldFeed && !_skipOneFeedFrame)
{ {
if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1); if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2); if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
} }
// else: just discard; were only keeping the ring fresh. // If skipping, we just discard this frame to ensure no stale data leaks.
} }
finally finally
{ {
_recSound.unlock(p1, p2, len1, len2); _recSound.unlock(p1, p2, len1, len2);
} }
if (_skipOneFeedFrame) _skipOneFeedFrame = false;
_lastRecordPos = recPos; _lastRecordPos = recPos;
} }
private bool IsSpeechMeaningful(string userText) private string PostProcessInput(string input)
{ {
return !string.IsNullOrEmpty(userText) return input.Replace("[silence]", "").Replace("[ Silence ]", "").Replace("BLANK_AUDIO", "").Replace("[", "").Replace("]", "").Trim();
&& !userText.Contains("BLANK_AUDIO")
&& !userText.Trim().Equals("[ Silence ]");
} }
private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen) private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen)
@@ -359,4 +376,55 @@ public class FMODWhisperBridge : MonoBehaviour
_recSound.clearHandle(); _recSound.clearHandle();
} }
} }
private void ClearRecordRingBuffer()
{
if (!_recSound.hasHandle() || _soundPcmLength == 0) return;
uint totalBytes = _soundPcmLength * (uint)_nativeChannels * 2; // PCM16
IntPtr p1, p2;
uint len1, len2;
// Lock the whole buffer (start=0, length=totalBytes)
var r = _recSound.@lock(0, totalBytes, out p1, out p2, out len1, out len2);
if (r != FMOD.RESULT.OK)
{
Debug.LogWarning($"[FMOD→Whisper] Could not lock ring buffer to clear: {r}");
return;
}
try
{
if (len1 > 0)
{
// zero p1
// Well reuse a static zero array to avoid allocating huge buffers repeatedly
ZeroMem(p1, (int)len1);
}
if (len2 > 0)
{
ZeroMem(p2, (int)len2);
}
}
finally
{
_recSound.unlock(p1, p2, len1, len2);
}
}
// cheap zeroing helper (avoids allocating len-sized arrays each time)
private static readonly byte[] _zeroChunk = new byte[16 * 1024]; // 16 KB
private static void ZeroMem(IntPtr dst, int byteLen)
{
int offset = 0;
while (byteLen > 0)
{
int n = Math.Min(_zeroChunk.Length, byteLen);
Marshal.Copy(_zeroChunk, 0, dst + offset, n);
offset += n;
byteLen -= n;
}
}
} }

View File

@@ -50,9 +50,9 @@ public class CafeWaiterNPC : NPCController
{ {
SpeakVoiceLine(0); SpeakVoiceLine(0);
fmodWhisperBridge.ActivateRecording();
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated; fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished; fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
fmodWhisperBridge.ActivateRecording();
notepad.SetActive(true); notepad.SetActive(true);
state = 1; state = 1;
@@ -76,7 +76,7 @@ public class CafeWaiterNPC : NPCController
private void OnPlayerSpeechFinished(string playerText) private void OnPlayerSpeechFinished(string playerText)
{ {
if (Time.time < lastPlayerVoiceUpdateTime + 0.5f) if (Time.time < lastPlayerVoiceUpdateTime + 1.0f)
{ {
return; return;
} }
@@ -85,23 +85,34 @@ public class CafeWaiterNPC : NPCController
if (state == 1) if (state == 1)
{ {
// Show transcription and ask whether it is correct // Show transcription and ask whether it is correct
fmodWhisperBridge.DeactivateRecording();
notepadText.text = playerText; notepadText.text = playerText;
notepad.transform.DOLocalRotate(notepadFlippedRotation, 0.5f); notepad.transform.DOLocalRotate(notepadFlippedRotation, 0.5f).OnComplete(() =>
{
fmodWhisperBridge.ActivateRecording();
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
});
SpeakVoiceLine(1); SpeakVoiceLine(1);
state = 2; state = 2;
} else if (state == 2) } else if (state == 2)
{ {
fmodWhisperBridge.DeactivateRecording();
// Flip notepad back // Flip notepad back
notepad.transform.DOLocalRotate(notepadOriginalRotation, 0.5f); notepad.transform.DOLocalRotate(notepadOriginalRotation, 0.5f);
// if player answered positively, bring food, otherwise ask again // if player answered positively, bring food, otherwise ask again
if (playerText.ToLower().Contains("yes")) if (playerText.ToLower().Contains("ye"))
{ {
SpeakVoiceLine(2); SpeakVoiceLine(2);
Invoke("BringFood", 1f); Invoke("BringFood", 1f);
state = 3; state = 3;
} else } else
{ {
SpeakVoiceLine(3); SpeakVoiceLine(3);
fmodWhisperBridge.ActivateRecording();
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
state = 1; state = 1;
} }
} }
@@ -113,6 +124,14 @@ public class CafeWaiterNPC : NPCController
if (state == 1) if (state == 1)
{ {
notepadText.text = playerText; notepadText.text = playerText;
// For now, when something is transcribed, treat it as player finished speaking
OnPlayerSpeechFinished(playerText);
}
// faster reaction to player answering yes/no
else if (state == 2)
{
OnPlayerSpeechFinished(playerText);
} }
} }

Binary file not shown.