From cb4d4036e7fc63fe366a68c41f149f74f0b383a4 Mon Sep 17 00:00:00 2001 From: henrisel Date: Wed, 14 Jan 2026 18:02:41 +0200 Subject: [PATCH] microphone recording working with FMOD!!! --- Assets/DefaultPrefabObjects.asset | 4 +- .../ModelGeneration/MicrophoneStand 1.prefab | 36 +-- .../ModelGenerationManager.prefab | 76 +----- .../_PROJECT/Scenes/DeltaBuilding_base.unity | 4 +- .../ArcheryRange/MicrophoneStand.cs | 44 +--- .../Scripts/ModeGeneration/FMODMicLoopback.cs | 122 +++++++++ .../ModeGeneration/FMODMicLoopback.cs.meta | 11 + .../ModeGeneration/FMODWhisperBridge.cs | 238 ++++++++++++++++++ .../ModeGeneration/FMODWhisperBridge.cs.meta | 11 + .../{MicrophoneTesting.cs => MicLoopback.cs} | 2 +- ...oneTesting.cs.meta => MicLoopback.cs.meta} | 0 ProjectSettings/AudioManager.asset | 2 +- 12 files changed, 410 insertions(+), 140 deletions(-) create mode 100644 Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs create mode 100644 Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs.meta create mode 100644 Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs create mode 100644 Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs.meta rename Assets/_PROJECT/Scripts/ModeGeneration/{MicrophoneTesting.cs => MicLoopback.cs} (98%) rename Assets/_PROJECT/Scripts/ModeGeneration/{MicrophoneTesting.cs.meta => MicLoopback.cs.meta} (100%) diff --git a/Assets/DefaultPrefabObjects.asset b/Assets/DefaultPrefabObjects.asset index 5f81ef69..3ad2f828 100644 --- a/Assets/DefaultPrefabObjects.asset +++ b/Assets/DefaultPrefabObjects.asset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b824dfcc0800631ab60fd7291056035798563881fb4f7e6e620e65ef9342109c -size 2411 +oid sha256:4fb1f320c2d0ae3f8352cbbdb2b998905794490b709402e89d63cd78f7ed26dc +size 2328 diff --git a/Assets/_PROJECT/Prefabs/ModelGeneration/MicrophoneStand 1.prefab b/Assets/_PROJECT/Prefabs/ModelGeneration/MicrophoneStand 1.prefab index b84f80db..75470cb4 100644 --- a/Assets/_PROJECT/Prefabs/ModelGeneration/MicrophoneStand 1.prefab +++ b/Assets/_PROJECT/Prefabs/ModelGeneration/MicrophoneStand 1.prefab @@ -13,7 +13,6 @@ GameObject: - component: {fileID: 7203143526414218131} - component: {fileID: 2568780590385977406} - component: {fileID: 1523408157143897080} - - component: {fileID: 2918895218826581840} m_Layer: 0 m_Name: MicrophoneStand 1 m_TagString: Untagged @@ -35,7 +34,7 @@ Transform: m_Children: - {fileID: 1850758373829337931} m_Father: {fileID: 0} - m_RootOrder: 0 + m_RootOrder: -1 m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} --- !u!33 &4763701867718457502 MeshFilter: @@ -120,41 +119,10 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: 357101b2cface4943b04dfd25d4944e3, type: 3} m_Name: m_EditorClassIdentifier: - whisper: {fileID: 0} - microphoneRecord: {fileID: 2918895218826581840} - microphoneDevice: outputText: {fileID: 0} microphoneOffStatus: {fileID: 8977839985090371394} microphoneOnStatus: {fileID: 6537061652288108950} ---- !u!114 &2918895218826581840 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_CorrespondingSourceObject: {fileID: 0} - m_PrefabInstance: {fileID: 0} - m_PrefabAsset: {fileID: 0} - m_GameObject: {fileID: 4291579148315658230} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 3bc03a4c19604ea394e364f8fc632928, type: 3} - m_Name: - m_EditorClassIdentifier: - maxLengthSec: 60 - loop: 0 - frequency: 16000 - chunksLengthSec: 0.5 - echo: 1 - useVad: 1 - vadUpdateRateSec: 0.1 - vadContextSec: 30 - vadLastSec: 1.25 - vadThd: 1 - vadFreqThd: 100 - vadIndicatorImage: {fileID: 0} - vadStop: 0 - dropVadPart: 1 - vadStopTime: 3 - microphoneDropdown: {fileID: 0} - microphoneDefaultLabel: Default microphone + fmodWhisperBridge: {fileID: 0} --- !u!1 &5819798980962142350 GameObject: m_ObjectHideFlags: 0 diff --git a/Assets/_PROJECT/Prefabs/ModelGeneration/ModelGenerationManager.prefab b/Assets/_PROJECT/Prefabs/ModelGeneration/ModelGenerationManager.prefab index fb371030..5c0f4c2a 100644 --- a/Assets/_PROJECT/Prefabs/ModelGeneration/ModelGenerationManager.prefab +++ b/Assets/_PROJECT/Prefabs/ModelGeneration/ModelGenerationManager.prefab @@ -13,8 +13,7 @@ GameObject: - component: {fileID: 3363453355800186393} - component: {fileID: 2166102850012183631} - component: {fileID: 8356602476881383464} - - component: {fileID: 4280129837482332964} - - component: {fileID: 8105025160921831064} + - component: {fileID: 5682436338090300270} m_Layer: 0 m_Name: ModelGenerationManager m_TagString: Untagged @@ -64,6 +63,8 @@ MonoBehaviour: INVOKEAI_BASE_URL: http://ltat-cgvr9.domenis.ut.ee:9090 DEFAULT_QUEUE_ID: default MODEL_KEY: 81d45960-08a0-4b8c-a48b-e7d73b21bfe2 + promptSuffix: ', single object, front and side fully visible, realistic style, + plain neutral background, clear details, soft studio lighting, true-to-scale' --- !u!114 &2166102850012183631 MonoBehaviour: m_ObjectHideFlags: 0 @@ -110,7 +111,7 @@ MonoBehaviour: useVad: 1 tokensTimestamps: 0 audioCtx: 0 ---- !u!114 &4280129837482332964 +--- !u!114 &5682436338090300270 MonoBehaviour: m_ObjectHideFlags: 0 m_CorrespondingSourceObject: {fileID: 0} @@ -119,65 +120,14 @@ MonoBehaviour: m_GameObject: {fileID: 154411548685861447} m_Enabled: 1 m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: a2836e36774ca1c4bbbee976e17b649c, type: 3} + m_Script: {fileID: 11500000, guid: a264cb5321d9f6741a2625b8c63c34a7, type: 3} m_Name: m_EditorClassIdentifier: - _componentIndexCache: 0 - _addedNetworkObject: {fileID: 8105025160921831064} - _networkObjectCache: {fileID: 8105025160921831064} - _synchronizeParent: 0 - _packing: - Position: 1 - Rotation: 1 - Scale: 0 - _interpolation: 2 - _extrapolation: 2 - _enableTeleport: 0 - _teleportThreshold: 1 - _clientAuthoritative: 1 - _sendToOwner: 1 - _synchronizePosition: 1 - _positionSnapping: - X: 0 - Y: 0 - Z: 0 - _synchronizeRotation: 1 - _rotationSnapping: - X: 0 - Y: 0 - Z: 0 - _synchronizeScale: 1 - _scaleSnapping: - X: 0 - Y: 0 - Z: 0 ---- !u!114 &8105025160921831064 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_CorrespondingSourceObject: {fileID: 0} - m_PrefabInstance: {fileID: 0} - m_PrefabAsset: {fileID: 0} - m_GameObject: {fileID: 154411548685861447} - m_Enabled: 1 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 26b716c41e9b56b4baafaf13a523ba2e, type: 3} - m_Name: - m_EditorClassIdentifier: - k__BackingField: 0 - k__BackingField: 0 - k__BackingField: {fileID: 0} - _networkBehaviours: - - {fileID: 4280129837482332964} - k__BackingField: {fileID: 0} - k__BackingField: [] - _isNetworked: 1 - _isGlobal: 0 - _initializeOrder: 0 - _defaultDespawnType: 0 - NetworkObserver: {fileID: 0} - k__BackingField: 0 - k__BackingField: 0 - _scenePathHash: 0 - k__BackingField: 0 - k__BackingField: 17646158845367820466 - _sceneNetworkObjects: [] + whisper: {fileID: 8356602476881383464} + useVadInStream: 0 + recordDriverId: 0 + desiredSampleRate: 48000 + channels: 1 + bufferLengthSec: 5 + playLoopback: 0 + loopbackVolume: 1 diff --git a/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity b/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity index 6aef7a14..f1407ca9 100644 --- a/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity +++ b/Assets/_PROJECT/Scenes/DeltaBuilding_base.unity @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5cb70d3f641c5491e6d2b2c98ad2399fa7650033ed3a9aca15944e96377e6d72 -size 68524704 +oid sha256:7ab55f63f6e08277d728144e389597009723cfe416181f05f7388bb56f032ee0 +size 68553549 diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/ArcheryRange/MicrophoneStand.cs b/Assets/_PROJECT/Scripts/ModeGeneration/ArcheryRange/MicrophoneStand.cs index bc868408..848831f2 100644 --- a/Assets/_PROJECT/Scripts/ModeGeneration/ArcheryRange/MicrophoneStand.cs +++ b/Assets/_PROJECT/Scripts/ModeGeneration/ArcheryRange/MicrophoneStand.cs @@ -1,36 +1,19 @@ using TMPro; using UnityEngine; -using Whisper; -using Whisper.Utils; public class MicrophoneStand : MonoBehaviour { - public WhisperManager whisper; - public MicrophoneRecord microphoneRecord; - public string microphoneDevice; public TextMeshProUGUI outputText; - private WhisperStream stream; - - [SerializeField] - private string textOutput; - public GameObject microphoneOffStatus; public GameObject microphoneOnStatus; + public FMODWhisperBridge fmodWhisperBridge; + // Start is called before the first frame update - async void Start() + void Start() { - Debug.Log("Mic devices: " + string.Join(", ", Microphone.devices)); - Debug.Log("Using mic device: " + microphoneDevice); - microphoneRecord.SelectedMicDevice = microphoneDevice; - - // This causes about 1 sec long freeze, has to be done once at the start of the game - microphoneRecord.StartRecord(); - - stream = await whisper.CreateStream(microphoneRecord); - stream.OnResultUpdated += OnWhisperResult; - //stream.StartStream(); + fmodWhisperBridge.OnWhisperResultProcessed += OnWhisperResult; microphoneOffStatus.SetActive(true); microphoneOnStatus.SetActive(false); @@ -51,10 +34,7 @@ public class MicrophoneStand : MonoBehaviour { microphoneOffStatus.SetActive(false); microphoneOnStatus.SetActive(true); - - stream.StartStream(); - //microphoneRecord.StartRecord(); - Debug.Log("Whisper stream started."); + fmodWhisperBridge.ActivateRecording(); } } @@ -65,27 +45,17 @@ public class MicrophoneStand : MonoBehaviour { microphoneOffStatus.SetActive(true); microphoneOnStatus.SetActive(false); - - stream.StopStream(); - //microphoneRecord.StopRecord(); - textOutput = outputText.text; + fmodWhisperBridge.DeactivateRecording(); } } private void OnWhisperResult(string result) { - Debug.Log("Whisper result processed: " + result); outputText.text = result; } - private void OnDestroy() - { - microphoneRecord.StopRecord(); - Destroy(gameObject); - } - public string GetTextOutput() { - return textOutput; + return outputText.text; } } diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs b/Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs new file mode 100644 index 00000000..9f2c0596 --- /dev/null +++ b/Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs @@ -0,0 +1,122 @@ + +using FMOD; +using FMODUnity; +using System.Runtime.InteropServices; +using UnityEngine; + +public class FMODMicLoopback : MonoBehaviour +{ + private uint LATENCY_MS = 50; + private uint DRIFT_MS = 1; + + private uint samplesRecorded, samplesPlayed = 0; + private int nativeRate, nativeChannels = 0; + private uint recSoundLength = 0; + uint lastPlayPos = 0; + uint lastRecordPos = 0; + private uint driftThreshold = 0; + private uint desiredLatency = 0; + private uint adjustLatency = 0; + private int actualLatency = 0; + uint minRecordDelta = 0xFFFFFFFF; + + private FMOD.CREATESOUNDEXINFO exInfo = new FMOD.CREATESOUNDEXINFO(); + + private FMOD.Sound recSound; + private FMOD.Channel channel; + + // Start is called before the first frame update + void Start() + { + /* + Determine latency in samples. + */ + FMODUnity.RuntimeManager.CoreSystem.getRecordDriverInfo(0, out _, 0, out _, out nativeRate, out _, out nativeChannels, out _); + + driftThreshold = (uint)(nativeRate * DRIFT_MS) / 1000; + desiredLatency = (uint)(nativeRate * LATENCY_MS) / 1000; + adjustLatency = desiredLatency; + actualLatency = (int)desiredLatency; + + /* + Create user sound to record into, then start recording. + */ + exInfo.cbsize = Marshal.SizeOf(typeof(FMOD.CREATESOUNDEXINFO)); + exInfo.numchannels = nativeChannels; + exInfo.format = FMOD.SOUND_FORMAT.PCM16; + exInfo.defaultfrequency = nativeRate; + exInfo.length = (uint)(nativeRate * sizeof(short) * nativeChannels); + + FMODUnity.RuntimeManager.CoreSystem.createSound("", FMOD.MODE.LOOP_NORMAL | FMOD.MODE.OPENUSER, ref exInfo, out recSound); + + FMODUnity.RuntimeManager.CoreSystem.recordStart(0, recSound, true); + + recSound.getLength(out recSoundLength, FMOD.TIMEUNIT.PCM); + } + + // Update is called once per frame + void Update() + { + /* + Determine how much has been recorded since we last checked + */ + uint recordPos = 0; + FMODUnity.RuntimeManager.CoreSystem.getRecordPosition(0, out recordPos); + + uint recordDelta = (recordPos >= lastRecordPos) ? (recordPos - lastRecordPos) : (recordPos + recSoundLength - lastRecordPos); + lastRecordPos = recordPos; + samplesRecorded += recordDelta; + + if (recordDelta != 0 && (recordDelta < minRecordDelta)) + { + minRecordDelta = recordDelta; // Smallest driver granularity seen so far + adjustLatency = (recordDelta <= desiredLatency) ? desiredLatency : recordDelta; // Adjust our latency if driver granularity is high + } + + /* + Delay playback until our desired latency is reached. + */ + if (!channel.hasHandle() && samplesRecorded >= adjustLatency) + { + FMODUnity.RuntimeManager.CoreSystem.getMasterChannelGroup(out FMOD.ChannelGroup mCG); + FMODUnity.RuntimeManager.CoreSystem.playSound(recSound, mCG, false, out channel); + } + + /* + Determine how much has been played since we last checked. + */ + if (channel.hasHandle()) + { + uint playPos = 0; + channel.getPosition(out playPos, FMOD.TIMEUNIT.PCM); + + uint playDelta = (playPos >= lastPlayPos) ? (playPos - lastPlayPos) : (playPos + recSoundLength - lastPlayPos); + lastPlayPos = playPos; + samplesPlayed += playDelta; + + // Compensate for any drift. + int latency = (int)(samplesRecorded - samplesPlayed); + actualLatency = (int)((0.97f * actualLatency) + (0.03f * latency)); + + int playbackRate = nativeRate; + if (actualLatency < (int)(adjustLatency - driftThreshold)) + { + // Playback position is catching up to the record position, slow playback down by 2% + playbackRate = nativeRate - (nativeRate / 50); + } + + else if (actualLatency > (int)(adjustLatency + driftThreshold)) + { + // Playback is falling behind the record position, speed playback up by 2% + playbackRate = nativeRate + (nativeRate / 50); + } + + channel.setFrequency((float)playbackRate); + } + } + + private void OnDestroy() + { + recSound.release(); + } +} diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs.meta b/Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs.meta new file mode 100644 index 00000000..2156c04a --- /dev/null +++ b/Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 12623af3ff8b7df4abbaacd753e43555 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs new file mode 100644 index 00000000..bc3fe3a1 --- /dev/null +++ b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs @@ -0,0 +1,238 @@ + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using FMOD; +using FMODUnity; +using Whisper; // WhisperManager, WhisperStream, WhisperResult +using Whisper.Utils; // AudioChunk + +/// +/// Capture microphone with FMOD and feed chunks to WhisperStream (no Unity Microphone). +/// Also (optionally) plays the recorded sound back via FMOD loopback. +/// + +public class FMODWhisperBridge : MonoBehaviour +{ + [Header("Whisper")] + [SerializeField] private WhisperManager whisper; // assign in Inspector + [SerializeField] private bool useVadInStream = false; // let WhisperStream do VAD or not + + [Header("FMOD capture")] + [Tooltip("Recording device index (0 = default)")] + public int recordDriverId = 0; + [Tooltip("Set 48000 on Quest; falls back to device rate automatically")] + public int desiredSampleRate = 48000; + [Tooltip("Mono recommended for Whisper")] + public int channels = 1; + [Range(1, 10)] public int bufferLengthSec = 5; + + [Header("Loopback (monitor your voice)")] + public bool playLoopback = true; + [Range(0f, 2f)] public float loopbackVolume = 1.0f; + + public delegate void OnWhisperResultProcessedDelegate(string result); + public event OnWhisperResultProcessedDelegate OnWhisperResultProcessed; + + // FMOD + private FMOD.System _core; + private Sound _recSound; + private Channel _playChannel; + private ChannelGroup _masterGroup; + private uint _soundPcmLength; // in samples + private int _nativeRate; + private int _nativeChannels; + + // ring-buffer tracking + private uint _lastRecordPos = 0; + + // Whisper + private WhisperStream _stream; + private bool _streamStarted; + + // temp conversion buffer + private float[] _floatTmp = new float[0]; + + private bool isRecordingActivated = false; + + private void Awake() + { + if (!whisper) whisper = FindObjectOfType(); + _core = RuntimeManager.CoreSystem; // FMOD core system + } + + private async void Start() + { + // Query device info to get native rate/channels. + // (FMOD: getRecordDriverInfo gives you system rate & speaker mode) + string name; + Guid guid; + SPEAKERMODE sm; + int smChannels; + DRIVER_STATE driverState; + // signature: getRecordDriverInfo(id, out name, nameLen, out guid, out systemrate, out speakermode, out speakermodechannels, out driverState) + _core.getRecordDriverInfo(recordDriverId, out name, 256, out guid, out _nativeRate, out sm, out smChannels, out driverState); + _nativeChannels = channels > 0 ? channels : smChannels; + UnityEngine.Debug.Log($"[FMOD→Whisper] Using input device #{recordDriverId}: \"{name}\" rate={_nativeRate} ch={_nativeChannels}"); + + // Build a user sound buffer that FMOD will fill (OPENUSER | LOOP_NORMAL). + CREATESOUNDEXINFO ex = new CREATESOUNDEXINFO + { + cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO)), + numchannels = _nativeChannels, + defaultfrequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate, + format = SOUND_FORMAT.PCM16, + length = (uint)(((_nativeRate > 0 ? _nativeRate : desiredSampleRate) * _nativeChannels) * sizeof(short)) // seconds=1 (we loop) + }; + + _core.createSound("", MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESAMPLE, ref ex, out _recSound); + _recSound.getLength(out _soundPcmLength, TIMEUNIT.PCM); + + // Start FMOD recording into that sound (looping ring buffer). + _core.recordStart(recordDriverId, _recSound, true); + UnityEngine.Debug.Log("[FMOD→Whisper] Recording started."); + + // Optional loopback playback using FMOD (plays same sound ring buffer). + _core.getMasterChannelGroup(out _masterGroup); + if (playLoopback) + { + _core.playSound(_recSound, _masterGroup, false, out _playChannel); + _playChannel.setMode(MODE._2D); + _playChannel.setVolume(loopbackVolume); + UnityEngine.Debug.Log("[FMOD→Whisper] Loopback playback started."); + } + + // Create Whisper stream WITHOUT MicrophoneRecord, just from (freq, channels). + // We'll push AudioChunk manually. + // NOTE: WhisperStream’s sliding window is governed by manager’s stepSec/keepSec/lengthSec. + _stream = await whisper.CreateStream(ex.defaultfrequency, _nativeChannels); + _stream.OnResultUpdated += (txt) => + { + //OnWhisperResultProcessed?.Invoke(txt); + //UnityEngine.Debug.Log($"[Whisper] result updated: {txt}"); + }; + _stream.OnSegmentUpdated += (seg) => + { + OnWhisperResultProcessed?.Invoke(seg.Result); + //UnityEngine.Debug.Log($"[Whisper] Seg finished: {seg.Result}"); + }; + + // If you want Whisper to respect VAD, enable in manager or set useVad (manager controls stream params). + whisper.useVad = useVadInStream; + + _stream.StartStream(); + _streamStarted = true; + + // prepare temp arrays roughly 100ms of audio + EnsureTmpCapacity((ex.defaultfrequency / 10) * _nativeChannels); + } + + private void Update() + { + if (!isRecordingActivated) return; + if (_core.handle != IntPtr.Zero) _core.update(); + if (!_streamStarted || !_recSound.hasHandle()) return; + + // How many samples recorded since last frame? + uint recPos; + _core.getRecordPosition(recordDriverId, out recPos); + + uint deltaSamples = (recPos >= _lastRecordPos) + ? (recPos - _lastRecordPos) + : (recPos + _soundPcmLength - _lastRecordPos); + + if (deltaSamples == 0) return; + + // We’ll read that region (16-bit) and convert to float[] [-1..1]. + // Calculate byte range to lock in sound buffer + uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 16-bit = 2 bytes + uint startBytes = _lastRecordPos * (uint)_nativeChannels * 2; + + IntPtr p1, p2; + uint len1, len2; + // Lock can wrap — FMOD splits into p1/p2. + _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2); + + try + { + // Convert both parts to float and push to Whisper + if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1); + if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2); + } + finally + { + _recSound.unlock(p1, p2, len1, len2); + } + + _lastRecordPos = recPos; + } + + public void ActivateRecording() + { + isRecordingActivated = true; + } + + public void DeactivateRecording() + { + isRecordingActivated = false; + } + + private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen) + { + int samples = (int)(byteLen / 2); // 2 bytes per sample + EnsureTmpCapacity(samples); + + // Marshal the 16-bit PCM into managed space + // We pin a short[] overlay to avoid copying twice + int shorts = samples; + int byteCount = (int)byteLen; + + // Use Marshal.Copy into a short[] then convert to float[-1..1] + // (You can also unsafe copy for speed if needed.) + EnsureShortOverlay(shorts, out short[] sBuf); + Marshal.Copy(src, sBuf, 0, shorts); + + for (int i = 0; i < shorts; i++) + { + // 32768f avoids clipping at -32768 + _floatTmp[i] = Mathf.Clamp(sBuf[i] / 32768f, -1f, 1f); + } + + // Build a chunk for WhisperStream; with VAD off, IsVoiceDetected=true is fine. + var chunk = new AudioChunk + { + Data = _floatTmp.AsSpan(0, shorts).ToArray(), + Frequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate, + Channels = _nativeChannels, + IsVoiceDetected = true + }; + + _stream.AddToStream(chunk); + } + + private short[] _shortOverlay; + private void EnsureShortOverlay(int samples, out short[] buf) + { + if (_shortOverlay == null || _shortOverlay.Length < samples) + _shortOverlay = new short[Mathf.NextPowerOfTwo(samples)]; + buf = _shortOverlay; + } + + private void EnsureTmpCapacity(int samples) + { + if (_floatTmp == null || _floatTmp.Length < samples) + _floatTmp = new float[Mathf.NextPowerOfTwo(samples)]; + } + + private void OnDisable() + { + if (_streamStarted) + { + _stream.StopStream(); + _streamStarted = false; + } + + if (_playChannel.hasHandle()) { _playChannel.stop(); _playChannel.clearHandle(); } + if (_recSound.hasHandle()) { _core.recordStop(recordDriverId); _recSound.release(); _recSound.clearHandle(); } + } +} diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs.meta b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs.meta new file mode 100644 index 00000000..9f01251c --- /dev/null +++ b/Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: a264cb5321d9f6741a2625b8c63c34a7 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/MicrophoneTesting.cs b/Assets/_PROJECT/Scripts/ModeGeneration/MicLoopback.cs similarity index 98% rename from Assets/_PROJECT/Scripts/ModeGeneration/MicrophoneTesting.cs rename to Assets/_PROJECT/Scripts/ModeGeneration/MicLoopback.cs index 37eb8cc8..e943495c 100644 --- a/Assets/_PROJECT/Scripts/ModeGeneration/MicrophoneTesting.cs +++ b/Assets/_PROJECT/Scripts/ModeGeneration/MicLoopback.cs @@ -2,7 +2,7 @@ using UnityEngine; [RequireComponent(typeof(AudioSource))] -public class MicrophoneTesting : MonoBehaviour +public class MicLoopback : MonoBehaviour { [Header("Mic settings")] [Tooltip("Leave empty for default device")] diff --git a/Assets/_PROJECT/Scripts/ModeGeneration/MicrophoneTesting.cs.meta b/Assets/_PROJECT/Scripts/ModeGeneration/MicLoopback.cs.meta similarity index 100% rename from Assets/_PROJECT/Scripts/ModeGeneration/MicrophoneTesting.cs.meta rename to Assets/_PROJECT/Scripts/ModeGeneration/MicLoopback.cs.meta diff --git a/ProjectSettings/AudioManager.asset b/ProjectSettings/AudioManager.asset index 061963e3..cf53a0f2 100644 --- a/ProjectSettings/AudioManager.asset +++ b/ProjectSettings/AudioManager.asset @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c360e8f38f20f70b2e82ee45d8bfaaa8f2e88decfc868628077ea60eea9fbd2f +oid sha256:0ff21b07816dd928d7f83a8149d7a2783d4ad7c3a34408840877ced6f9d051e7 size 463