forked from cgvr/DeltaVR
cafe waiter npc and mic detect speech more
This commit is contained in:
Binary file not shown.
@@ -61,6 +61,7 @@ public class MicrophoneStand : MonoBehaviour
|
|||||||
private void OnPlayerSpeechUpdated(string text)
|
private void OnPlayerSpeechUpdated(string text)
|
||||||
{
|
{
|
||||||
outputText.text = text;
|
outputText.text = text;
|
||||||
|
OnPlayerFinishedSpeaking?.Invoke();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void OnPlayerSpeechFinished(string playerText)
|
private void OnPlayerSpeechFinished(string playerText)
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ public class FMODWhisperBridge : MonoBehaviour
|
|||||||
|
|
||||||
// activation flag
|
// activation flag
|
||||||
private bool isRecordingActivated = false;
|
private bool isRecordingActivated = false;
|
||||||
|
private bool _skipOneFeedFrame = false;
|
||||||
|
|
||||||
private void Awake()
|
private void Awake()
|
||||||
{
|
{
|
||||||
@@ -188,13 +189,15 @@ public class FMODWhisperBridge : MonoBehaviour
|
|||||||
// Wire events
|
// Wire events
|
||||||
_stream.OnSegmentUpdated += (seg) =>
|
_stream.OnSegmentUpdated += (seg) =>
|
||||||
{
|
{
|
||||||
if (IsSpeechMeaningful(seg.Result))
|
string cleanedText = PostProcessInput(seg.Result);
|
||||||
OnWhisperSegmentUpdated?.Invoke(seg.Result);
|
if (!string.IsNullOrEmpty(cleanedText))
|
||||||
|
OnWhisperSegmentUpdated?.Invoke(cleanedText);
|
||||||
};
|
};
|
||||||
_stream.OnSegmentFinished += (seg) =>
|
_stream.OnSegmentFinished += (seg) =>
|
||||||
{
|
{
|
||||||
if (IsSpeechMeaningful(seg.Result))
|
string cleanedText = PostProcessInput(seg.Result);
|
||||||
OnWhisperSegmentFinished?.Invoke(seg.Result);
|
if (!string.IsNullOrEmpty(cleanedText))
|
||||||
|
OnWhisperSegmentFinished?.Invoke(cleanedText);
|
||||||
};
|
};
|
||||||
|
|
||||||
whisper.useVad = useVadInStream;
|
whisper.useVad = useVadInStream;
|
||||||
@@ -202,15 +205,28 @@ public class FMODWhisperBridge : MonoBehaviour
|
|||||||
_stream.StartStream();
|
_stream.StartStream();
|
||||||
_streamStarted = true;
|
_streamStarted = true;
|
||||||
|
|
||||||
// Unpause loopback if it's meant to play only while active
|
|
||||||
if (playLoopback && loopbackOnlyWhenActive && _playChannel.hasHandle())
|
|
||||||
_playChannel.setPaused(false);
|
|
||||||
|
|
||||||
// Prepare temp arrays roughly 100ms of audio
|
// --- NEW: Clear the ring buffer and reset read pointer ---
|
||||||
EnsureTmpCapacity((rate / 10) * _nativeChannels);
|
// Pause loopback while we clear (optional, but avoids clicks)
|
||||||
|
if (playLoopback && _playChannel.hasHandle())
|
||||||
|
_playChannel.setPaused(true);
|
||||||
|
|
||||||
|
// Clear buffer bytes
|
||||||
|
ClearRecordRingBuffer();
|
||||||
|
|
||||||
|
// Reset our read pointer to the current write head
|
||||||
|
_core.getRecordPosition(recordDriverId, out _lastRecordPos);
|
||||||
|
|
||||||
|
// We’ll skip feeding for one frame to guarantee a clean start
|
||||||
|
_skipOneFeedFrame = true;
|
||||||
|
|
||||||
|
// Unpause loopback if we want it active during recording
|
||||||
|
if (playLoopback && _playChannel.hasHandle() && (!loopbackOnlyWhenActive || isRecordingActivated))
|
||||||
|
_playChannel.setPaused(loopbackOnlyWhenActive ? false : _playChannel.getPaused(out var paused) == FMOD.RESULT.OK && paused ? false : false);
|
||||||
|
|
||||||
isRecordingActivated = true;
|
isRecordingActivated = true;
|
||||||
Debug.Log("[FMOD→Whisper] Stream activated (Whisper started; FMOD was already recording).");
|
Debug.Log("[FMOD→Whisper] Stream activated (buffer cleared; reading from current head).");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -269,36 +285,37 @@ public class FMODWhisperBridge : MonoBehaviour
|
|||||||
|
|
||||||
IntPtr p1, p2;
|
IntPtr p1, p2;
|
||||||
uint len1, len2;
|
uint len1, len2;
|
||||||
|
|
||||||
var r = _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
|
var r = _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
|
||||||
if (r != RESULT.OK)
|
if (r != RESULT.OK)
|
||||||
{
|
{
|
||||||
// If lock fails, still advance last position to avoid spin
|
|
||||||
_lastRecordPos = recPos;
|
_lastRecordPos = recPos;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (shouldFeed)
|
if (shouldFeed && !_skipOneFeedFrame)
|
||||||
{
|
{
|
||||||
if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
|
if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
|
||||||
if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
|
if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
|
||||||
}
|
}
|
||||||
// else: just discard; we’re only keeping the ring fresh.
|
// If skipping, we just discard this frame to ensure no stale data leaks.
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
_recSound.unlock(p1, p2, len1, len2);
|
_recSound.unlock(p1, p2, len1, len2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (_skipOneFeedFrame) _skipOneFeedFrame = false;
|
||||||
|
|
||||||
_lastRecordPos = recPos;
|
_lastRecordPos = recPos;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool IsSpeechMeaningful(string userText)
|
private string PostProcessInput(string input)
|
||||||
{
|
{
|
||||||
return !string.IsNullOrEmpty(userText)
|
return input.Replace("[silence]", "").Replace("[ Silence ]", "").Replace("BLANK_AUDIO", "").Replace("[", "").Replace("]", "").Trim();
|
||||||
&& !userText.Contains("BLANK_AUDIO")
|
|
||||||
&& !userText.Trim().Equals("[ Silence ]");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen)
|
private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen)
|
||||||
@@ -359,4 +376,55 @@ public class FMODWhisperBridge : MonoBehaviour
|
|||||||
_recSound.clearHandle();
|
_recSound.clearHandle();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void ClearRecordRingBuffer()
|
||||||
|
{
|
||||||
|
if (!_recSound.hasHandle() || _soundPcmLength == 0) return;
|
||||||
|
|
||||||
|
uint totalBytes = _soundPcmLength * (uint)_nativeChannels * 2; // PCM16
|
||||||
|
IntPtr p1, p2;
|
||||||
|
uint len1, len2;
|
||||||
|
|
||||||
|
// Lock the whole buffer (start=0, length=totalBytes)
|
||||||
|
var r = _recSound.@lock(0, totalBytes, out p1, out p2, out len1, out len2);
|
||||||
|
if (r != FMOD.RESULT.OK)
|
||||||
|
{
|
||||||
|
Debug.LogWarning($"[FMOD→Whisper] Could not lock ring buffer to clear: {r}");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (len1 > 0)
|
||||||
|
{
|
||||||
|
// zero p1
|
||||||
|
// We’ll reuse a static zero array to avoid allocating huge buffers repeatedly
|
||||||
|
ZeroMem(p1, (int)len1);
|
||||||
|
}
|
||||||
|
if (len2 > 0)
|
||||||
|
{
|
||||||
|
ZeroMem(p2, (int)len2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
_recSound.unlock(p1, p2, len1, len2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cheap zeroing helper (avoids allocating len-sized arrays each time)
|
||||||
|
private static readonly byte[] _zeroChunk = new byte[16 * 1024]; // 16 KB
|
||||||
|
private static void ZeroMem(IntPtr dst, int byteLen)
|
||||||
|
{
|
||||||
|
int offset = 0;
|
||||||
|
while (byteLen > 0)
|
||||||
|
{
|
||||||
|
int n = Math.Min(_zeroChunk.Length, byteLen);
|
||||||
|
Marshal.Copy(_zeroChunk, 0, dst + offset, n);
|
||||||
|
offset += n;
|
||||||
|
byteLen -= n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,9 +50,9 @@ public class CafeWaiterNPC : NPCController
|
|||||||
{
|
{
|
||||||
SpeakVoiceLine(0);
|
SpeakVoiceLine(0);
|
||||||
|
|
||||||
|
fmodWhisperBridge.ActivateRecording();
|
||||||
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
||||||
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||||
fmodWhisperBridge.ActivateRecording();
|
|
||||||
|
|
||||||
notepad.SetActive(true);
|
notepad.SetActive(true);
|
||||||
state = 1;
|
state = 1;
|
||||||
@@ -76,7 +76,7 @@ public class CafeWaiterNPC : NPCController
|
|||||||
|
|
||||||
private void OnPlayerSpeechFinished(string playerText)
|
private void OnPlayerSpeechFinished(string playerText)
|
||||||
{
|
{
|
||||||
if (Time.time < lastPlayerVoiceUpdateTime + 0.5f)
|
if (Time.time < lastPlayerVoiceUpdateTime + 1.0f)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -85,23 +85,34 @@ public class CafeWaiterNPC : NPCController
|
|||||||
if (state == 1)
|
if (state == 1)
|
||||||
{
|
{
|
||||||
// Show transcription and ask whether it is correct
|
// Show transcription and ask whether it is correct
|
||||||
|
fmodWhisperBridge.DeactivateRecording();
|
||||||
notepadText.text = playerText;
|
notepadText.text = playerText;
|
||||||
notepad.transform.DOLocalRotate(notepadFlippedRotation, 0.5f);
|
notepad.transform.DOLocalRotate(notepadFlippedRotation, 0.5f).OnComplete(() =>
|
||||||
|
{
|
||||||
|
fmodWhisperBridge.ActivateRecording();
|
||||||
|
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
||||||
|
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||||
|
});
|
||||||
SpeakVoiceLine(1);
|
SpeakVoiceLine(1);
|
||||||
state = 2;
|
state = 2;
|
||||||
} else if (state == 2)
|
} else if (state == 2)
|
||||||
{
|
{
|
||||||
|
fmodWhisperBridge.DeactivateRecording();
|
||||||
// Flip notepad back
|
// Flip notepad back
|
||||||
notepad.transform.DOLocalRotate(notepadOriginalRotation, 0.5f);
|
notepad.transform.DOLocalRotate(notepadOriginalRotation, 0.5f);
|
||||||
// if player answered positively, bring food, otherwise ask again
|
// if player answered positively, bring food, otherwise ask again
|
||||||
if (playerText.ToLower().Contains("yes"))
|
if (playerText.ToLower().Contains("ye"))
|
||||||
{
|
{
|
||||||
|
|
||||||
SpeakVoiceLine(2);
|
SpeakVoiceLine(2);
|
||||||
Invoke("BringFood", 1f);
|
Invoke("BringFood", 1f);
|
||||||
state = 3;
|
state = 3;
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
SpeakVoiceLine(3);
|
SpeakVoiceLine(3);
|
||||||
|
fmodWhisperBridge.ActivateRecording();
|
||||||
|
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
||||||
|
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||||
state = 1;
|
state = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -113,6 +124,14 @@ public class CafeWaiterNPC : NPCController
|
|||||||
if (state == 1)
|
if (state == 1)
|
||||||
{
|
{
|
||||||
notepadText.text = playerText;
|
notepadText.text = playerText;
|
||||||
|
|
||||||
|
// For now, when something is transcribed, treat it as player finished speaking
|
||||||
|
OnPlayerSpeechFinished(playerText);
|
||||||
|
}
|
||||||
|
// faster reaction to player answering yes/no
|
||||||
|
else if (state == 2)
|
||||||
|
{
|
||||||
|
OnPlayerSpeechFinished(playerText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user