diff --git a/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity b/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity
index 653ed15f..e59fbf7c 100644
--- a/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity
+++ b/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:0631fb43a46acd8f647502cd91a9d48c6df78c38b3f8e0a6727dad534486e4bf
-size 68525490
+oid sha256:5f4fd0fdace577985445de3829614fe92721750ede29834c23c8c30d1a3f4b7d
+size 68526229
diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs
index c86c8e32..c7ec4473 100644
--- a/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs
+++ b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs
@@ -5,18 +5,19 @@ using UnityEngine;
using FMOD;
using FMODUnity;
using Whisper; // WhisperManager, WhisperStream, WhisperResult
-using Whisper.Utils; // AudioChunk
+using Whisper.Utils;
+using Debug = UnityEngine.Debug; // AudioChunk
///
-/// Capture microphone with FMOD and feed chunks to WhisperStream (no Unity Microphone).
-/// Also (optionally) plays the recorded sound back via FMOD loopback.
+/// FMOD mic is initialized once (Start) and runs continuously in a ring buffer.
+/// Whisper stream is created when ActivateRecording() is called and disposed on DeactivateRecording().
+/// Optional loopback can be paused/resumed instead of starting/stopping to avoid stalls.
///
-
public class FMODWhisperBridge : MonoBehaviour
{
[Header("Whisper")]
[SerializeField] private WhisperManager whisper; // assign in Inspector
- [SerializeField] private bool useVadInStream = false; // let WhisperStream do VAD or not
+ [SerializeField] private bool useVadInStream = true; // let WhisperStream do VAD or not
[Header("FMOD capture")]
[Tooltip("Recording device index (0 = default)")]
@@ -29,6 +30,8 @@ public class FMODWhisperBridge : MonoBehaviour
[Header("Loopback (monitor your voice)")]
public bool playLoopback = true;
+ [Tooltip("If true, loopback plays only while active; otherwise it’s always on.")]
+ public bool loopbackOnlyWhenActive = true;
[Range(0f, 2f)] public float loopbackVolume = 1.0f;
public delegate void OnWhisperSegmentUpdatedDelegate(string result);
@@ -55,7 +58,9 @@ public class FMODWhisperBridge : MonoBehaviour
// temp conversion buffer
private float[] _floatTmp = new float[0];
+ private short[] _shortOverlay;
+ // activation flag
private bool isRecordingActivated = false;
private void Awake()
@@ -66,85 +71,180 @@ public class FMODWhisperBridge : MonoBehaviour
private async void Start()
{
- // Query device info to get native rate/channels.
- // (FMOD: getRecordDriverInfo gives you system rate & speaker mode)
+ // -------------- FMOD initialize ONCE --------------
+ // Query device info
string name;
Guid guid;
SPEAKERMODE sm;
int smChannels;
DRIVER_STATE driverState;
- // signature: getRecordDriverInfo(id, out name, nameLen, out guid, out systemrate, out speakermode, out speakermodechannels, out driverState)
- _core.getRecordDriverInfo(recordDriverId, out name, 256, out guid, out _nativeRate, out sm, out smChannels, out driverState);
- _nativeChannels = channels > 0 ? channels : smChannels;
- UnityEngine.Debug.Log($"[FMOD→Whisper] Using input device #{recordDriverId}: \"{name}\" rate={_nativeRate} ch={_nativeChannels}");
- // Build a user sound buffer that FMOD will fill (OPENUSER | LOOP_NORMAL).
+ var res = _core.getRecordDriverInfo(
+ recordDriverId,
+ out name, 256,
+ out guid,
+ out _nativeRate,
+ out sm,
+ out smChannels,
+ out driverState
+ );
+
+ if (res != RESULT.OK)
+ {
+ Debug.LogError($"[FMOD→Whisper] getRecordDriverInfo failed: {res}");
+ return;
+ }
+
+ _nativeChannels = channels > 0 ? channels : smChannels;
+ int rate = (_nativeRate > 0) ? _nativeRate : desiredSampleRate;
+ Debug.Log($"[FMOD→Whisper] Using input device #{recordDriverId}: \"{name}\" rate={rate} ch={_nativeChannels}");
+
+ // Build user sound (ring buffer) — multiple seconds
CREATESOUNDEXINFO ex = new CREATESOUNDEXINFO
{
cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO)),
numchannels = _nativeChannels,
- defaultfrequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate,
+ defaultfrequency = rate,
format = SOUND_FORMAT.PCM16,
- length = (uint)(((_nativeRate > 0 ? _nativeRate : desiredSampleRate) * _nativeChannels) * sizeof(short)) // seconds=1 (we loop)
+ length = (uint)(rate * _nativeChannels * sizeof(short) * bufferLengthSec)
};
- _core.createSound("", MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESAMPLE, ref ex, out _recSound);
+ res = _core.createSound("", MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESAMPLE, ref ex, out _recSound);
+ if (res != RESULT.OK)
+ {
+ Debug.LogError($"[FMOD→Whisper] createSound failed: {res}");
+ return;
+ }
+
_recSound.getLength(out _soundPcmLength, TIMEUNIT.PCM);
- // Start FMOD recording into that sound (looping ring buffer).
- _core.recordStart(recordDriverId, _recSound, true);
- UnityEngine.Debug.Log("[FMOD→Whisper] Recording started.");
+ // Start recording (looping)
+ res = _core.recordStart(recordDriverId, _recSound, true);
+ if (res != RESULT.OK)
+ {
+ Debug.LogError($"[FMOD→Whisper] recordStart failed: {res}");
+ _recSound.release();
+ _recSound.clearHandle();
+ return;
+ }
- // Optional loopback playback using FMOD (plays same sound ring buffer).
+ // Initialize record position to avoid a huge first delta
+ _core.getRecordPosition(recordDriverId, out _lastRecordPos);
+ Debug.Log("[FMOD→Whisper] Recording started.");
+
+ // Loopback channel (optional). Start once; pause when inactive if desired.
_core.getMasterChannelGroup(out _masterGroup);
if (playLoopback)
{
- _core.playSound(_recSound, _masterGroup, false, out _playChannel);
- _playChannel.setMode(MODE._2D);
- _playChannel.setVolume(loopbackVolume);
- UnityEngine.Debug.Log("[FMOD→Whisper] Loopback playback started.");
+ res = _core.playSound(_recSound, _masterGroup, false, out _playChannel);
+ if (res == RESULT.OK && _playChannel.hasHandle())
+ {
+ _playChannel.setMode(MODE._2D);
+ _playChannel.setVolume(loopbackVolume);
+ if (loopbackOnlyWhenActive) _playChannel.setPaused(true); // keep muted until Activate
+ Debug.Log("[FMOD→Whisper] Loopback playback ready.");
+ }
+ else
+ {
+ Debug.LogWarning($"[FMOD→Whisper] playSound failed or channel invalid: {res}");
+ }
}
- // Create Whisper stream WITHOUT MicrophoneRecord, just from (freq, channels).
- // We'll push AudioChunk manually.
- // NOTE: WhisperStream’s sliding window is governed by manager’s stepSec/keepSec/lengthSec.
- _stream = await whisper.CreateStream(ex.defaultfrequency, _nativeChannels);
- _stream.OnResultUpdated += (txt) =>
+ // No Whisper stream here. It will be created on ActivateRecording().
+ await System.Threading.Tasks.Task.Yield();
+ }
+
+ ///
+ /// Creates a fresh Whisper stream and starts feeding audio (FMOD already recording).
+ ///
+ public async void ActivateRecording()
+ {
+ if (isRecordingActivated)
{
- //OnWhisperResultProcessed?.Invoke(txt);
- };
+ Debug.Log("[FMOD→Whisper] ActivateRecording ignored (already active).");
+ return;
+ }
+
+ if (!_recSound.hasHandle())
+ {
+ Debug.LogError("[FMOD→Whisper] FMOD not initialized or recording not running.");
+ return;
+ }
+
+ int rate = (_nativeRate > 0) ? _nativeRate : desiredSampleRate;
+
+ try
+ {
+ _stream = await whisper.CreateStream(rate, _nativeChannels);
+ }
+ catch (Exception e)
+ {
+ Debug.LogError($"[FMOD→Whisper] CreateStream exception: {e}");
+ _stream = null;
+ _streamStarted = false;
+ return;
+ }
+
+ // Wire events
_stream.OnSegmentUpdated += (seg) =>
{
if (IsSpeechMeaningful(seg.Result))
- {
OnWhisperSegmentUpdated?.Invoke(seg.Result);
- }
};
_stream.OnSegmentFinished += (seg) =>
{
if (IsSpeechMeaningful(seg.Result))
- {
OnWhisperSegmentFinished?.Invoke(seg.Result);
- }
};
- // If you want Whisper to respect VAD, enable in manager or set useVad (manager controls stream params).
whisper.useVad = useVadInStream;
_stream.StartStream();
_streamStarted = true;
- // prepare temp arrays roughly 100ms of audio
- EnsureTmpCapacity((ex.defaultfrequency / 10) * _nativeChannels);
+ // Unpause loopback if it's meant to play only while active
+ if (playLoopback && loopbackOnlyWhenActive && _playChannel.hasHandle())
+ _playChannel.setPaused(false);
+
+ // Prepare temp arrays roughly 100ms of audio
+ EnsureTmpCapacity((rate / 10) * _nativeChannels);
+
+ isRecordingActivated = true;
+ Debug.Log("[FMOD→Whisper] Stream activated (Whisper started; FMOD was already recording).");
+ }
+
+ ///
+ /// Stops and disposes the Whisper stream only. FMOD keeps recording.
+ ///
+ public void DeactivateRecording()
+ {
+ if (!isRecordingActivated && !_streamStarted)
+ return;
+
+ isRecordingActivated = false;
+
+ // Pause loopback if it should only be active during recording
+ if (playLoopback && loopbackOnlyWhenActive && _playChannel.hasHandle())
+ _playChannel.setPaused(true);
+
+ // Tear down Whisper stream
+ if (_streamStarted)
+ {
+ try { _stream.StopStream(); } catch { /* ignore */ }
+ _streamStarted = false;
+ }
+ _stream = null;
+
+ Debug.Log("[FMOD→Whisper] Stream deactivated (Whisper stopped; FMOD still recording).");
}
private void Update()
{
- if (!isRecordingActivated) return;
+ // Always tick FMOD
if (_core.handle != IntPtr.Zero) _core.update();
- if (!_streamStarted || !_recSound.hasHandle()) return;
+ if (!_recSound.hasHandle()) return;
- // How many samples recorded since last frame?
+ // Compute how many samples recorded since last frame.
uint recPos;
_core.getRecordPosition(recordDriverId, out recPos);
@@ -152,23 +252,39 @@ public class FMODWhisperBridge : MonoBehaviour
? (recPos - _lastRecordPos)
: (recPos + _soundPcmLength - _lastRecordPos);
- if (deltaSamples == 0) return;
+ if (deltaSamples == 0)
+ {
+ // Even if 0, keep last pos
+ _lastRecordPos = recPos;
+ return;
+ }
- // We’ll read that region (16-bit) and convert to float[] [-1..1].
- // Calculate byte range to lock in sound buffer
- uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 16-bit = 2 bytes
+ // If not active, we *still* advance the ring (so we don't backlog data),
+ // but we *don't* push chunks to Whisper.
+ bool shouldFeed = isRecordingActivated && _streamStarted && _stream != null;
+
+ // Calculate byte range to lock (16-bit)
+ uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 2 bytes per sample
uint startBytes = _lastRecordPos * (uint)_nativeChannels * 2;
IntPtr p1, p2;
uint len1, len2;
- // Lock can wrap — FMOD splits into p1/p2.
- _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
+ var r = _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
+ if (r != RESULT.OK)
+ {
+ // If lock fails, still advance last position to avoid spin
+ _lastRecordPos = recPos;
+ return;
+ }
try
{
- // Convert both parts to float and push to Whisper
- if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
- if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
+ if (shouldFeed)
+ {
+ if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
+ if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
+ }
+ // else: just discard; we’re only keeping the ring fresh.
}
finally
{
@@ -178,19 +294,11 @@ public class FMODWhisperBridge : MonoBehaviour
_lastRecordPos = recPos;
}
- public void ActivateRecording()
- {
- isRecordingActivated = true;
- }
-
- public void DeactivateRecording()
- {
- isRecordingActivated = false;
- }
-
private bool IsSpeechMeaningful(string userText)
{
- return !string.IsNullOrEmpty(userText) && !userText.Contains("BLANK_AUDIO") && !userText.Trim().Equals("[ Silence ]");
+ return !string.IsNullOrEmpty(userText)
+ && !userText.Contains("BLANK_AUDIO")
+ && !userText.Trim().Equals("[ Silence ]");
}
private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen)
@@ -198,26 +306,20 @@ public class FMODWhisperBridge : MonoBehaviour
int samples = (int)(byteLen / 2); // 2 bytes per sample
EnsureTmpCapacity(samples);
- // Marshal the 16-bit PCM into managed space
- // We pin a short[] overlay to avoid copying twice
- int shorts = samples;
- int byteCount = (int)byteLen;
+ EnsureShortOverlay(samples, out short[] sBuf);
+ Marshal.Copy(src, sBuf, 0, samples);
- // Use Marshal.Copy into a short[] then convert to float[-1..1]
- // (You can also unsafe copy for speed if needed.)
- EnsureShortOverlay(shorts, out short[] sBuf);
- Marshal.Copy(src, sBuf, 0, shorts);
-
- for (int i = 0; i < shorts; i++)
+ // Convert to float [-1..1] (no downmix change from your original)
+ for (int i = 0; i < samples; i++)
{
- // 32768f avoids clipping at -32768
_floatTmp[i] = Mathf.Clamp(sBuf[i] / 32768f, -1f, 1f);
}
- // Build a chunk for WhisperStream; with VAD off, IsVoiceDetected=true is fine.
+ // TODO (optional): downmix to mono and/or run a light gate before feeding.
+ // For now we keep your original behavior:
var chunk = new AudioChunk
{
- Data = _floatTmp.AsSpan(0, shorts).ToArray(),
+ Data = _floatTmp.AsSpan(0, samples).ToArray(),
Frequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate,
Channels = _nativeChannels,
IsVoiceDetected = true
@@ -226,7 +328,6 @@ public class FMODWhisperBridge : MonoBehaviour
_stream.AddToStream(chunk);
}
- private short[] _shortOverlay;
private void EnsureShortOverlay(int samples, out short[] buf)
{
if (_shortOverlay == null || _shortOverlay.Length < samples)
@@ -242,13 +343,20 @@ public class FMODWhisperBridge : MonoBehaviour
private void OnDisable()
{
- if (_streamStarted)
- {
- _stream.StopStream();
- _streamStarted = false;
- }
+ // Stop Whisper (if active)
+ DeactivateRecording();
- if (_playChannel.hasHandle()) { _playChannel.stop(); _playChannel.clearHandle(); }
- if (_recSound.hasHandle()) { _core.recordStop(recordDriverId); _recSound.release(); _recSound.clearHandle(); }
+ // Stop/purge FMOD resources (since object is going away)
+ if (_playChannel.hasHandle())
+ {
+ try { _playChannel.stop(); } catch { /* ignore */ }
+ _playChannel.clearHandle();
+ }
+ if (_recSound.hasHandle())
+ {
+ try { _core.recordStop(recordDriverId); } catch { /* ignore */ }
+ try { _recSound.release(); } catch { /* ignore */ }
+ _recSound.clearHandle();
+ }
}
}
diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/ReleasableButton.cs b/Assets/_PROJECT/Scripts/ModeGeneration/ReleasableButton.cs
index 832ee6ac..678eb5d9 100644
--- a/Assets/_PROJECT/Scripts/ModeGeneration/ReleasableButton.cs
+++ b/Assets/_PROJECT/Scripts/ModeGeneration/ReleasableButton.cs
@@ -5,6 +5,8 @@ public class ReleasableButton : MonoBehaviour
{
public delegate void OnButtonPressedDelegate();
public event OnButtonPressedDelegate OnButtonPressed;
+ public delegate void OnButtonReleasedDelegate();
+ public event OnButtonReleasedDelegate OnButtonReleased;
public Transform movableParts;
public float moveDuration = 0.25f;
@@ -37,7 +39,6 @@ public class ReleasableButton : MonoBehaviour
{
if (!isButtonDown && collider.gameObject.tag.EndsWith("Hand"))
{
- Debug.Log("collided with: " + collider.gameObject.name);
Activate();
OnButtonPressed?.Invoke();
}
@@ -47,8 +48,8 @@ public class ReleasableButton : MonoBehaviour
{
if (isButtonDown && collider.gameObject.tag.EndsWith("Hand"))
{
- Debug.Log("collider exited: " + collider.gameObject.name);
Deactivate();
+ OnButtonReleased?.Invoke();
}
}
diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs b/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs
index 6c647a3a..8e06528f 100644
--- a/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs
+++ b/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs
@@ -18,6 +18,7 @@ public class RadioTransmitter : XRGrabInteractable
void Start()
{
radioButton.OnButtonPressed += OnRadioButtonPressed;
+ radioButton.OnButtonReleased += OnRadioButtonReleased;
}
// Update is called once per frame
@@ -38,12 +39,20 @@ public class RadioTransmitter : XRGrabInteractable
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
AudioManager.Instance.PlayAttachedInstance(FMODEvents.Instance.RadioButton, gameObject);
fmodWhisperBridge.ActivateRecording();
- // TODO: deactivate when button is released
+ }
+
+ private void OnRadioButtonReleased()
+ {
+ fmodWhisperBridge.OnWhisperSegmentUpdated -= OnPlayerSpeechUpdated;
+ fmodWhisperBridge.OnWhisperSegmentFinished -= OnPlayerSpeechFinished;
+ AudioManager.Instance.PlayAttachedInstance(FMODEvents.Instance.RadioButton, gameObject);
+ fmodWhisperBridge.DeactivateRecording();
}
private void OnPlayerSpeechUpdated(string text)
{
computerScreen.text = text;
+ OnPlayerFinishedSpeaking?.Invoke();
}
private void OnPlayerSpeechFinished(string playerText)