From 6e9e5cb4d4ba2d205f6d9e4f0b3a5f9f463b20e8 Mon Sep 17 00:00:00 2001 From: henrisel Date: Thu, 12 Mar 2026 17:16:34 +0200 Subject: [PATCH] releasing radio button starts feeding silence into Whisper stream --- 3d-generation-pipeline/README.md | 3 - .../_PROJECT/Fonts/Handlee-Regular SDF.asset | 4 +- .../_PROJECT/Fonts/Quantico-Regular SDF.asset | 4 +- .../ModeGeneration/FMODWhisperBridge.cs | 59 +++++++++++++------ .../ShapeDetection/RadioTransmitter.cs | 5 +- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/3d-generation-pipeline/README.md b/3d-generation-pipeline/README.md index 8bf0d30d..5d0a4224 100644 --- a/3d-generation-pipeline/README.md +++ b/3d-generation-pipeline/README.md @@ -1,6 +1,5 @@ ### TODO * korralik teleportation minigame'idesse, kasutades TeleportProviderit, nagu button -* quest marker peaks jätma nähtavad noolekesed enda liikumise teele * teha build kus archery range'is spawnitud objektid * FMOD ChannelControl errorid * disable tutorial!!! @@ -17,12 +16,10 @@ * peab mängijale kuidagi selgitama, kuidas scale'ida prinditud objekte * prinditud objekti scale'imisele min ja max size limiit * soft particle'id, et ei clipiks - * kui lased lahti raadio nupu, siis inputti enam juurde ei lasta * uks võiks käia ainult väljapoole * seest tühi shape config liiga raske, sest printer unreliable? * 5 configit piisavalt * archery range: - * kui lähed mikrofoni rangeist välja, siis inputti enam juurde ei lasta * võiks jääda kordama viimast instruktsiooni, kui mängija ei progressi edasi * kui archery range läbi, võiks npc öelda "good job, try again..." * UFO möödalendamise animation/cutscene alguses kui ütleb "delta attacked by ufos" diff --git a/Assets/_PROJECT/Fonts/Handlee-Regular SDF.asset b/Assets/_PROJECT/Fonts/Handlee-Regular SDF.asset index 3adee157..3959ddfc 100644 --- a/Assets/_PROJECT/Fonts/Handlee-Regular SDF.asset +++ b/Assets/_PROJECT/Fonts/Handlee-Regular SDF.asset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b44c887685103ad63c4304b34eb596a32268c15e0fc1834f225140f88cf3d3c -size 2718509 +oid sha256:6636dab1549af7c947be5182e96421aa56e053dd323e8cd8e1426f5eb5c2e637 +size 2719569 diff --git a/Assets/_PROJECT/Fonts/Quantico-Regular SDF.asset b/Assets/_PROJECT/Fonts/Quantico-Regular SDF.asset index 67c42292..c52cd5a8 100644 --- a/Assets/_PROJECT/Fonts/Quantico-Regular SDF.asset +++ b/Assets/_PROJECT/Fonts/Quantico-Regular SDF.asset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e7b93b54901e7d731e6e381bae63b627626f44e22fbead4f41ce56cedaf44603 -size 2120747 +oid sha256:7e0d6602dbeea3cf40398d760116f65cc8d46425caf060424729658ab8dbe82c +size 2142997 diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs index 19b58f4f..50f6f0bc 100644 --- a/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs +++ b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs @@ -179,6 +179,12 @@ public class FMODWhisperBridge : MonoBehaviour int rate = (_nativeRate > 0) ? _nativeRate : desiredSampleRate; + if (_stream != null) + { + try { _stream.StopStream(); } catch { } + _streamStarted = false; + } + try { _stream = await whisper.CreateStream(rate, _nativeChannels); @@ -247,16 +253,6 @@ public class FMODWhisperBridge : MonoBehaviour // Pause loopback if it should only be active during recording if (playLoopback && loopbackOnlyWhenActive && _playChannel.hasHandle()) _playChannel.setPaused(true); - - // Tear down Whisper stream - if (_streamStarted) - { - try { _stream.StopStream(); } catch { /* ignore */ } - _streamStarted = false; - } - _stream = null; - - Debug.Log("[FMOD→Whisper] Stream deactivated (Whisper stopped; FMOD still recording)."); } /// @@ -303,10 +299,6 @@ public class FMODWhisperBridge : MonoBehaviour return; } - // If not active, we *still* advance the ring (so we don't backlog data), - // but we *don't* push chunks to Whisper. - bool shouldFeed = isRecordingActivated && _streamStarted && _stream != null; - // Calculate byte range to lock (16-bit) uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 2 bytes per sample uint startBytes = _lastRecordPos * (uint)_nativeChannels * 2; @@ -323,7 +315,6 @@ public class FMODWhisperBridge : MonoBehaviour try { - float rmsAccumulator = 0f; int rmsSampleCount = 0; @@ -345,13 +336,24 @@ public class FMODWhisperBridge : MonoBehaviour currentVolumeRms = Mathf.Lerp(currentVolumeRms, rms, 1f - Mathf.Pow(1f - volumeSmoothing, Time.deltaTime * 60f)); } - // 2) Feed audio to Whisper ONLY when active - if (shouldFeed && !_skipOneFeedFrame) + // 2) Feed audio to Whisper + if (_streamStarted && _stream != null) { - if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1); - if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2); + if (isRecordingActivated && !_skipOneFeedFrame) + { + // Feed real mic + if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1); + if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2); + } + else + { + // Feed same-size silence instead + if (len1 > 0) FeedSilentChunk(len1); + if (len2 > 0) FeedSilentChunk(len2); + } } + // If skipping, we just discard this frame to ensure no stale data leaks. } finally @@ -370,6 +372,25 @@ public class FMODWhisperBridge : MonoBehaviour return input.Replace("[silence]", "").Replace("[ Silence ]", "").Replace("BLANK_AUDIO", "").Replace("[", "").Replace("]", "").Trim(); } + private void FeedSilentChunk(uint byteLen) + { + int samples = (int)(byteLen / 2); + EnsureTmpCapacity(samples); + + // fill existing _floatTmp with zeros (efficient) + Array.Clear(_floatTmp, 0, samples); + + var chunk = new AudioChunk + { + Data = _floatTmp.AsSpan(0, samples).ToArray(), + Frequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate, + Channels = _nativeChannels, + IsVoiceDetected = false + }; + + _stream.AddToStream(chunk); + } + private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen) { int samples = (int)(byteLen / 2); // 2 bytes per sample diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs b/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs index 00538023..33cd389c 100644 --- a/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs +++ b/Assets/_PROJECT/Scripts/ModeGeneration/ShapeDetection/RadioTransmitter.cs @@ -76,13 +76,13 @@ public class RadioTransmitter : XRGrabInteractable private void OnRadioButtonReleased() { + fmodWhisperBridge.DeactivateRecording(); AudioManager.Instance.PlayAttachedInstance(FMODEvents.Instance.RadioButton, gameObject); } private void OnPlayerSpeechUpdated(string text) { computerScreen.text = text; - //OnPlayerFinishedSpeaking?.Invoke(); } private void OnPlayerSpeechFinished(string playerText) @@ -93,8 +93,7 @@ public class RadioTransmitter : XRGrabInteractable fmodWhisperBridge.OnWhisperSegmentUpdated -= OnPlayerSpeechUpdated; fmodWhisperBridge.OnWhisperSegmentFinished -= OnPlayerSpeechFinished; - fmodWhisperBridge.DeactivateRecording(); - + particleEmission.enabled = false; } }