forked from cgvr/DeltaVR
Compare commits
4 Commits
efd42d3907
...
8a3f10a9a2
| Author | SHA1 | Date | |
|---|---|---|---|
| 8a3f10a9a2 | |||
| dbd2cdca0d | |||
| 1a29f785b8 | |||
| c968e6bed4 |
@@ -1,6 +1,7 @@
|
||||
### TODO
|
||||
* klaviatuuriga võiks saada teleportida mängijat interactionite algustesse
|
||||
* quest marker peaks jätma nähtavad noolekesed enda liikumise teele
|
||||
* korralik teleportation minigame'idesse, kasutades TeleportProviderit, nagu button
|
||||
* igal pool, kus mängija peab rääkima, peaks olema:
|
||||
* helilained räägitava heli amplituudi põhjal
|
||||
* "loading" dünaamiline ikoon kui mängija ootab protsessimise järel
|
||||
|
||||
@@ -8,212 +8,8 @@ GameObject:
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 8461444523124913307}
|
||||
- component: {fileID: 3572526038880022669}
|
||||
- component: {fileID: 3766976745949113692}
|
||||
- component: {fileID: 1047001759896168042}
|
||||
- component: {fileID: 4197516043068701935}
|
||||
- component: {fileID: 8992144926730888769}
|
||||
- component: {fileID: 2113851326702507156}
|
||||
m_Layer: 0
|
||||
m_Name: TargetUFO
|
||||
m_TagString: Untagged
|
||||
m_Icon: {fileID: 0}
|
||||
m_NavMeshLayer: 0
|
||||
m_StaticEditorFlags: 0
|
||||
m_IsActive: 1
|
||||
--- !u!4 &8461444523124913307
|
||||
Transform:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
|
||||
m_LocalPosition: {x: 1.398, y: 1.1, z: 8.292}
|
||||
m_LocalScale: {x: 1, y: 1, z: 1}
|
||||
m_ConstrainProportionsScale: 0
|
||||
m_Children:
|
||||
- {fileID: 7933779445518253981}
|
||||
m_Father: {fileID: 0}
|
||||
m_RootOrder: -1
|
||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||
--- !u!114 &3572526038880022669
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: 3b311379c72a5ae4b8936e3b7283dd7a, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
_componentIndexCache: 0
|
||||
_addedNetworkObject: {fileID: 1047001759896168042}
|
||||
_networkObjectCache: {fileID: 1047001759896168042}
|
||||
pointsText: {fileID: 1237163760934993282, guid: 105635d7165dacd47956f38546d4a2ea,
|
||||
type: 3}
|
||||
endPosition: {x: 0, y: 0, z: 0}
|
||||
forwardSpeed: 2
|
||||
--- !u!54 &3766976745949113692
|
||||
Rigidbody:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
serializedVersion: 4
|
||||
m_Mass: 1
|
||||
m_Drag: 0
|
||||
m_AngularDrag: 0
|
||||
m_CenterOfMass: {x: 0, y: 0, z: 0}
|
||||
m_InertiaTensor: {x: 1, y: 1, z: 1}
|
||||
m_InertiaRotation: {x: 0, y: 0, z: 0, w: 1}
|
||||
m_IncludeLayers:
|
||||
serializedVersion: 2
|
||||
m_Bits: 0
|
||||
m_ExcludeLayers:
|
||||
serializedVersion: 2
|
||||
m_Bits: 0
|
||||
m_ImplicitCom: 1
|
||||
m_ImplicitTensor: 1
|
||||
m_UseGravity: 0
|
||||
m_IsKinematic: 1
|
||||
m_Interpolate: 0
|
||||
m_Constraints: 126
|
||||
m_CollisionDetection: 0
|
||||
--- !u!114 &1047001759896168042
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: 26b716c41e9b56b4baafaf13a523ba2e, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
<IsNested>k__BackingField: 0
|
||||
<ComponentIndex>k__BackingField: 0
|
||||
<PredictedSpawn>k__BackingField: {fileID: 0}
|
||||
_networkBehaviours:
|
||||
- {fileID: 3572526038880022669}
|
||||
- {fileID: 4197516043068701935}
|
||||
- {fileID: 7294680161384397297}
|
||||
- {fileID: 3316891016740450456}
|
||||
- {fileID: 8062271296867124751}
|
||||
- {fileID: 8618473685913268443}
|
||||
<ParentNetworkObject>k__BackingField: {fileID: 0}
|
||||
<ChildNetworkObjects>k__BackingField: []
|
||||
_isNetworked: 1
|
||||
_isGlobal: 0
|
||||
_initializeOrder: 0
|
||||
_defaultDespawnType: 1
|
||||
NetworkObserver: {fileID: 0}
|
||||
<PrefabId>k__BackingField: 9
|
||||
<SpawnableCollectionId>k__BackingField: 0
|
||||
_scenePathHash: 0
|
||||
<SceneId>k__BackingField: 0
|
||||
<AssetPathHash>k__BackingField: 12713957617246967544
|
||||
_sceneNetworkObjects: []
|
||||
--- !u!114 &4197516043068701935
|
||||
MonoBehaviour:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
m_Enabled: 1
|
||||
m_EditorHideFlags: 0
|
||||
m_Script: {fileID: 11500000, guid: a2836e36774ca1c4bbbee976e17b649c, type: 3}
|
||||
m_Name:
|
||||
m_EditorClassIdentifier:
|
||||
_componentIndexCache: 1
|
||||
_addedNetworkObject: {fileID: 1047001759896168042}
|
||||
_networkObjectCache: {fileID: 1047001759896168042}
|
||||
_synchronizeParent: 0
|
||||
_packing:
|
||||
Position: 1
|
||||
Rotation: 1
|
||||
Scale: 0
|
||||
_interpolation: 2
|
||||
_extrapolation: 2
|
||||
_enableTeleport: 0
|
||||
_teleportThreshold: 1
|
||||
_clientAuthoritative: 1
|
||||
_sendToOwner: 1
|
||||
_synchronizePosition: 1
|
||||
_positionSnapping:
|
||||
X: 0
|
||||
Y: 0
|
||||
Z: 0
|
||||
_synchronizeRotation: 1
|
||||
_rotationSnapping:
|
||||
X: 0
|
||||
Y: 0
|
||||
Z: 0
|
||||
_synchronizeScale: 1
|
||||
_scaleSnapping:
|
||||
X: 0
|
||||
Y: 0
|
||||
Z: 0
|
||||
--- !u!136 &8992144926730888769
|
||||
CapsuleCollider:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
m_Material: {fileID: 0}
|
||||
m_IncludeLayers:
|
||||
serializedVersion: 2
|
||||
m_Bits: 0
|
||||
m_ExcludeLayers:
|
||||
serializedVersion: 2
|
||||
m_Bits: 0
|
||||
m_LayerOverridePriority: 0
|
||||
m_IsTrigger: 0
|
||||
m_ProvidesContacts: 0
|
||||
m_Enabled: 1
|
||||
serializedVersion: 2
|
||||
m_Radius: 1.07
|
||||
m_Height: 0.97
|
||||
m_Direction: 1
|
||||
m_Center: {x: 0.16, y: -0.51, z: 0}
|
||||
--- !u!65 &2113851326702507156
|
||||
BoxCollider:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
m_GameObject: {fileID: 93219228833127587}
|
||||
m_Material: {fileID: 0}
|
||||
m_IncludeLayers:
|
||||
serializedVersion: 2
|
||||
m_Bits: 0
|
||||
m_ExcludeLayers:
|
||||
serializedVersion: 2
|
||||
m_Bits: 0
|
||||
m_LayerOverridePriority: 0
|
||||
m_IsTrigger: 0
|
||||
m_ProvidesContacts: 0
|
||||
m_Enabled: 1
|
||||
serializedVersion: 3
|
||||
m_Size: {x: 4.5, y: 0.5, z: 3.77}
|
||||
m_Center: {x: 0.12, y: -0.24, z: 0.06}
|
||||
--- !u!1 &1503281482029840645
|
||||
GameObject:
|
||||
m_ObjectHideFlags: 0
|
||||
m_CorrespondingSourceObject: {fileID: 0}
|
||||
m_PrefabInstance: {fileID: 0}
|
||||
m_PrefabAsset: {fileID: 0}
|
||||
serializedVersion: 6
|
||||
m_Component:
|
||||
- component: {fileID: 7933779445518253981}
|
||||
- component: {fileID: 7294680161384397297}
|
||||
- component: {fileID: 3337587390139541606}
|
||||
- component: {fileID: 627565954663756710}
|
||||
m_Layer: 0
|
||||
m_Name: SportModel
|
||||
m_TagString: Untagged
|
||||
|
||||
Binary file not shown.
@@ -61,6 +61,7 @@ public class MicrophoneStand : MonoBehaviour
|
||||
private void OnPlayerSpeechUpdated(string text)
|
||||
{
|
||||
outputText.text = text;
|
||||
OnPlayerFinishedSpeaking?.Invoke();
|
||||
}
|
||||
|
||||
private void OnPlayerSpeechFinished(string playerText)
|
||||
|
||||
@@ -5,18 +5,19 @@ using UnityEngine;
|
||||
using FMOD;
|
||||
using FMODUnity;
|
||||
using Whisper; // WhisperManager, WhisperStream, WhisperResult
|
||||
using Whisper.Utils; // AudioChunk
|
||||
using Whisper.Utils;
|
||||
using Debug = UnityEngine.Debug; // AudioChunk
|
||||
|
||||
/// <summary>
|
||||
/// Capture microphone with FMOD and feed chunks to WhisperStream (no Unity Microphone).
|
||||
/// Also (optionally) plays the recorded sound back via FMOD loopback.
|
||||
/// FMOD mic is initialized once (Start) and runs continuously in a ring buffer.
|
||||
/// Whisper stream is created when ActivateRecording() is called and disposed on DeactivateRecording().
|
||||
/// Optional loopback can be paused/resumed instead of starting/stopping to avoid stalls.
|
||||
/// </summary>
|
||||
|
||||
public class FMODWhisperBridge : MonoBehaviour
|
||||
{
|
||||
[Header("Whisper")]
|
||||
[SerializeField] private WhisperManager whisper; // assign in Inspector
|
||||
[SerializeField] private bool useVadInStream = false; // let WhisperStream do VAD or not
|
||||
[SerializeField] private bool useVadInStream = true; // let WhisperStream do VAD or not
|
||||
|
||||
[Header("FMOD capture")]
|
||||
[Tooltip("Recording device index (0 = default)")]
|
||||
@@ -29,6 +30,8 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
|
||||
[Header("Loopback (monitor your voice)")]
|
||||
public bool playLoopback = true;
|
||||
[Tooltip("If true, loopback plays only while active; otherwise it’s always on.")]
|
||||
public bool loopbackOnlyWhenActive = true;
|
||||
[Range(0f, 2f)] public float loopbackVolume = 1.0f;
|
||||
|
||||
public delegate void OnWhisperSegmentUpdatedDelegate(string result);
|
||||
@@ -55,8 +58,11 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
|
||||
// temp conversion buffer
|
||||
private float[] _floatTmp = new float[0];
|
||||
private short[] _shortOverlay;
|
||||
|
||||
// activation flag
|
||||
private bool isRecordingActivated = false;
|
||||
private bool _skipOneFeedFrame = false;
|
||||
|
||||
private void Awake()
|
||||
{
|
||||
@@ -66,85 +72,195 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
|
||||
private async void Start()
|
||||
{
|
||||
// Query device info to get native rate/channels.
|
||||
// (FMOD: getRecordDriverInfo gives you system rate & speaker mode)
|
||||
// -------------- FMOD initialize ONCE --------------
|
||||
// Query device info
|
||||
string name;
|
||||
Guid guid;
|
||||
SPEAKERMODE sm;
|
||||
int smChannels;
|
||||
DRIVER_STATE driverState;
|
||||
// signature: getRecordDriverInfo(id, out name, nameLen, out guid, out systemrate, out speakermode, out speakermodechannels, out driverState)
|
||||
_core.getRecordDriverInfo(recordDriverId, out name, 256, out guid, out _nativeRate, out sm, out smChannels, out driverState);
|
||||
_nativeChannels = channels > 0 ? channels : smChannels;
|
||||
UnityEngine.Debug.Log($"[FMOD→Whisper] Using input device #{recordDriverId}: \"{name}\" rate={_nativeRate} ch={_nativeChannels}");
|
||||
|
||||
// Build a user sound buffer that FMOD will fill (OPENUSER | LOOP_NORMAL).
|
||||
var res = _core.getRecordDriverInfo(
|
||||
recordDriverId,
|
||||
out name, 256,
|
||||
out guid,
|
||||
out _nativeRate,
|
||||
out sm,
|
||||
out smChannels,
|
||||
out driverState
|
||||
);
|
||||
|
||||
if (res != RESULT.OK)
|
||||
{
|
||||
Debug.LogError($"[FMOD→Whisper] getRecordDriverInfo failed: {res}");
|
||||
return;
|
||||
}
|
||||
|
||||
_nativeChannels = channels > 0 ? channels : smChannels;
|
||||
int rate = (_nativeRate > 0) ? _nativeRate : desiredSampleRate;
|
||||
Debug.Log($"[FMOD→Whisper] Using input device #{recordDriverId}: \"{name}\" rate={rate} ch={_nativeChannels}");
|
||||
|
||||
// Build user sound (ring buffer) — multiple seconds
|
||||
CREATESOUNDEXINFO ex = new CREATESOUNDEXINFO
|
||||
{
|
||||
cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO)),
|
||||
numchannels = _nativeChannels,
|
||||
defaultfrequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate,
|
||||
defaultfrequency = rate,
|
||||
format = SOUND_FORMAT.PCM16,
|
||||
length = (uint)(((_nativeRate > 0 ? _nativeRate : desiredSampleRate) * _nativeChannels) * sizeof(short)) // seconds=1 (we loop)
|
||||
length = (uint)(rate * _nativeChannels * sizeof(short) * bufferLengthSec)
|
||||
};
|
||||
|
||||
_core.createSound("", MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESAMPLE, ref ex, out _recSound);
|
||||
res = _core.createSound("", MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESAMPLE, ref ex, out _recSound);
|
||||
if (res != RESULT.OK)
|
||||
{
|
||||
Debug.LogError($"[FMOD→Whisper] createSound failed: {res}");
|
||||
return;
|
||||
}
|
||||
|
||||
_recSound.getLength(out _soundPcmLength, TIMEUNIT.PCM);
|
||||
|
||||
// Start FMOD recording into that sound (looping ring buffer).
|
||||
_core.recordStart(recordDriverId, _recSound, true);
|
||||
UnityEngine.Debug.Log("[FMOD→Whisper] Recording started.");
|
||||
// Start recording (looping)
|
||||
res = _core.recordStart(recordDriverId, _recSound, true);
|
||||
if (res != RESULT.OK)
|
||||
{
|
||||
Debug.LogError($"[FMOD→Whisper] recordStart failed: {res}");
|
||||
_recSound.release();
|
||||
_recSound.clearHandle();
|
||||
return;
|
||||
}
|
||||
|
||||
// Optional loopback playback using FMOD (plays same sound ring buffer).
|
||||
// Initialize record position to avoid a huge first delta
|
||||
_core.getRecordPosition(recordDriverId, out _lastRecordPos);
|
||||
Debug.Log("[FMOD→Whisper] Recording started.");
|
||||
|
||||
// Loopback channel (optional). Start once; pause when inactive if desired.
|
||||
_core.getMasterChannelGroup(out _masterGroup);
|
||||
if (playLoopback)
|
||||
{
|
||||
_core.playSound(_recSound, _masterGroup, false, out _playChannel);
|
||||
_playChannel.setMode(MODE._2D);
|
||||
_playChannel.setVolume(loopbackVolume);
|
||||
UnityEngine.Debug.Log("[FMOD→Whisper] Loopback playback started.");
|
||||
res = _core.playSound(_recSound, _masterGroup, false, out _playChannel);
|
||||
if (res == RESULT.OK && _playChannel.hasHandle())
|
||||
{
|
||||
_playChannel.setMode(MODE._2D);
|
||||
_playChannel.setVolume(loopbackVolume);
|
||||
if (loopbackOnlyWhenActive) _playChannel.setPaused(true); // keep muted until Activate
|
||||
Debug.Log("[FMOD→Whisper] Loopback playback ready.");
|
||||
}
|
||||
else
|
||||
{
|
||||
Debug.LogWarning($"[FMOD→Whisper] playSound failed or channel invalid: {res}");
|
||||
}
|
||||
}
|
||||
|
||||
// Create Whisper stream WITHOUT MicrophoneRecord, just from (freq, channels).
|
||||
// We'll push AudioChunk manually.
|
||||
// NOTE: WhisperStream’s sliding window is governed by manager’s stepSec/keepSec/lengthSec.
|
||||
_stream = await whisper.CreateStream(ex.defaultfrequency, _nativeChannels);
|
||||
_stream.OnResultUpdated += (txt) =>
|
||||
// No Whisper stream here. It will be created on ActivateRecording().
|
||||
await System.Threading.Tasks.Task.Yield();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a fresh Whisper stream and starts feeding audio (FMOD already recording).
|
||||
/// </summary>
|
||||
public async void ActivateRecording()
|
||||
{
|
||||
if (isRecordingActivated)
|
||||
{
|
||||
//OnWhisperResultProcessed?.Invoke(txt);
|
||||
};
|
||||
Debug.Log("[FMOD→Whisper] ActivateRecording ignored (already active).");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_recSound.hasHandle())
|
||||
{
|
||||
Debug.LogError("[FMOD→Whisper] FMOD not initialized or recording not running.");
|
||||
return;
|
||||
}
|
||||
|
||||
int rate = (_nativeRate > 0) ? _nativeRate : desiredSampleRate;
|
||||
|
||||
try
|
||||
{
|
||||
_stream = await whisper.CreateStream(rate, _nativeChannels);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Debug.LogError($"[FMOD→Whisper] CreateStream exception: {e}");
|
||||
_stream = null;
|
||||
_streamStarted = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Wire events
|
||||
_stream.OnSegmentUpdated += (seg) =>
|
||||
{
|
||||
if (IsSpeechMeaningful(seg.Result))
|
||||
{
|
||||
OnWhisperSegmentUpdated?.Invoke(seg.Result);
|
||||
}
|
||||
string cleanedText = PostProcessInput(seg.Result);
|
||||
if (!string.IsNullOrEmpty(cleanedText))
|
||||
OnWhisperSegmentUpdated?.Invoke(cleanedText);
|
||||
};
|
||||
_stream.OnSegmentFinished += (seg) =>
|
||||
{
|
||||
if (IsSpeechMeaningful(seg.Result))
|
||||
{
|
||||
OnWhisperSegmentFinished?.Invoke(seg.Result);
|
||||
}
|
||||
string cleanedText = PostProcessInput(seg.Result);
|
||||
if (!string.IsNullOrEmpty(cleanedText))
|
||||
OnWhisperSegmentFinished?.Invoke(cleanedText);
|
||||
};
|
||||
|
||||
// If you want Whisper to respect VAD, enable in manager or set useVad (manager controls stream params).
|
||||
whisper.useVad = useVadInStream;
|
||||
|
||||
_stream.StartStream();
|
||||
_streamStarted = true;
|
||||
|
||||
// prepare temp arrays roughly 100ms of audio
|
||||
EnsureTmpCapacity((ex.defaultfrequency / 10) * _nativeChannels);
|
||||
|
||||
// --- NEW: Clear the ring buffer and reset read pointer ---
|
||||
// Pause loopback while we clear (optional, but avoids clicks)
|
||||
if (playLoopback && _playChannel.hasHandle())
|
||||
_playChannel.setPaused(true);
|
||||
|
||||
// Clear buffer bytes
|
||||
ClearRecordRingBuffer();
|
||||
|
||||
// Reset our read pointer to the current write head
|
||||
_core.getRecordPosition(recordDriverId, out _lastRecordPos);
|
||||
|
||||
// We’ll skip feeding for one frame to guarantee a clean start
|
||||
_skipOneFeedFrame = true;
|
||||
|
||||
// Unpause loopback if we want it active during recording
|
||||
if (playLoopback && _playChannel.hasHandle() && (!loopbackOnlyWhenActive || isRecordingActivated))
|
||||
_playChannel.setPaused(loopbackOnlyWhenActive ? false : _playChannel.getPaused(out var paused) == FMOD.RESULT.OK && paused ? false : false);
|
||||
|
||||
isRecordingActivated = true;
|
||||
Debug.Log("[FMOD→Whisper] Stream activated (buffer cleared; reading from current head).");
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops and disposes the Whisper stream only. FMOD keeps recording.
|
||||
/// </summary>
|
||||
public void DeactivateRecording()
|
||||
{
|
||||
if (!isRecordingActivated && !_streamStarted)
|
||||
return;
|
||||
|
||||
isRecordingActivated = false;
|
||||
|
||||
// Pause loopback if it should only be active during recording
|
||||
if (playLoopback && loopbackOnlyWhenActive && _playChannel.hasHandle())
|
||||
_playChannel.setPaused(true);
|
||||
|
||||
// Tear down Whisper stream
|
||||
if (_streamStarted)
|
||||
{
|
||||
try { _stream.StopStream(); } catch { /* ignore */ }
|
||||
_streamStarted = false;
|
||||
}
|
||||
_stream = null;
|
||||
|
||||
Debug.Log("[FMOD→Whisper] Stream deactivated (Whisper stopped; FMOD still recording).");
|
||||
}
|
||||
|
||||
private void Update()
|
||||
{
|
||||
if (!isRecordingActivated) return;
|
||||
// Always tick FMOD
|
||||
if (_core.handle != IntPtr.Zero) _core.update();
|
||||
if (!_streamStarted || !_recSound.hasHandle()) return;
|
||||
if (!_recSound.hasHandle()) return;
|
||||
|
||||
// How many samples recorded since last frame?
|
||||
// Compute how many samples recorded since last frame.
|
||||
uint recPos;
|
||||
_core.getRecordPosition(recordDriverId, out recPos);
|
||||
|
||||
@@ -152,45 +268,54 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
? (recPos - _lastRecordPos)
|
||||
: (recPos + _soundPcmLength - _lastRecordPos);
|
||||
|
||||
if (deltaSamples == 0) return;
|
||||
if (deltaSamples == 0)
|
||||
{
|
||||
// Even if 0, keep last pos
|
||||
_lastRecordPos = recPos;
|
||||
return;
|
||||
}
|
||||
|
||||
// We’ll read that region (16-bit) and convert to float[] [-1..1].
|
||||
// Calculate byte range to lock in sound buffer
|
||||
uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 16-bit = 2 bytes
|
||||
// If not active, we *still* advance the ring (so we don't backlog data),
|
||||
// but we *don't* push chunks to Whisper.
|
||||
bool shouldFeed = isRecordingActivated && _streamStarted && _stream != null;
|
||||
|
||||
// Calculate byte range to lock (16-bit)
|
||||
uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 2 bytes per sample
|
||||
uint startBytes = _lastRecordPos * (uint)_nativeChannels * 2;
|
||||
|
||||
IntPtr p1, p2;
|
||||
uint len1, len2;
|
||||
// Lock can wrap — FMOD splits into p1/p2.
|
||||
_recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
|
||||
|
||||
var r = _recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
|
||||
if (r != RESULT.OK)
|
||||
{
|
||||
_lastRecordPos = recPos;
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// Convert both parts to float and push to Whisper
|
||||
if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
|
||||
if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
|
||||
if (shouldFeed && !_skipOneFeedFrame)
|
||||
{
|
||||
if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
|
||||
if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
|
||||
}
|
||||
// If skipping, we just discard this frame to ensure no stale data leaks.
|
||||
}
|
||||
finally
|
||||
{
|
||||
_recSound.unlock(p1, p2, len1, len2);
|
||||
}
|
||||
|
||||
if (_skipOneFeedFrame) _skipOneFeedFrame = false;
|
||||
|
||||
_lastRecordPos = recPos;
|
||||
|
||||
}
|
||||
|
||||
public void ActivateRecording()
|
||||
private string PostProcessInput(string input)
|
||||
{
|
||||
isRecordingActivated = true;
|
||||
}
|
||||
|
||||
public void DeactivateRecording()
|
||||
{
|
||||
isRecordingActivated = false;
|
||||
}
|
||||
|
||||
private bool IsSpeechMeaningful(string userText)
|
||||
{
|
||||
return !string.IsNullOrEmpty(userText) && !userText.Contains("BLANK_AUDIO") && !userText.Trim().Equals("[ Silence ]");
|
||||
return input.Replace("[silence]", "").Replace("[ Silence ]", "").Replace("BLANK_AUDIO", "").Replace("[", "").Replace("]", "").Trim();
|
||||
}
|
||||
|
||||
private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen)
|
||||
@@ -198,26 +323,20 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
int samples = (int)(byteLen / 2); // 2 bytes per sample
|
||||
EnsureTmpCapacity(samples);
|
||||
|
||||
// Marshal the 16-bit PCM into managed space
|
||||
// We pin a short[] overlay to avoid copying twice
|
||||
int shorts = samples;
|
||||
int byteCount = (int)byteLen;
|
||||
EnsureShortOverlay(samples, out short[] sBuf);
|
||||
Marshal.Copy(src, sBuf, 0, samples);
|
||||
|
||||
// Use Marshal.Copy into a short[] then convert to float[-1..1]
|
||||
// (You can also unsafe copy for speed if needed.)
|
||||
EnsureShortOverlay(shorts, out short[] sBuf);
|
||||
Marshal.Copy(src, sBuf, 0, shorts);
|
||||
|
||||
for (int i = 0; i < shorts; i++)
|
||||
// Convert to float [-1..1] (no downmix change from your original)
|
||||
for (int i = 0; i < samples; i++)
|
||||
{
|
||||
// 32768f avoids clipping at -32768
|
||||
_floatTmp[i] = Mathf.Clamp(sBuf[i] / 32768f, -1f, 1f);
|
||||
}
|
||||
|
||||
// Build a chunk for WhisperStream; with VAD off, IsVoiceDetected=true is fine.
|
||||
// TODO (optional): downmix to mono and/or run a light gate before feeding.
|
||||
// For now we keep your original behavior:
|
||||
var chunk = new AudioChunk
|
||||
{
|
||||
Data = _floatTmp.AsSpan(0, shorts).ToArray(),
|
||||
Data = _floatTmp.AsSpan(0, samples).ToArray(),
|
||||
Frequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate,
|
||||
Channels = _nativeChannels,
|
||||
IsVoiceDetected = true
|
||||
@@ -226,7 +345,6 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
_stream.AddToStream(chunk);
|
||||
}
|
||||
|
||||
private short[] _shortOverlay;
|
||||
private void EnsureShortOverlay(int samples, out short[] buf)
|
||||
{
|
||||
if (_shortOverlay == null || _shortOverlay.Length < samples)
|
||||
@@ -242,13 +360,71 @@ public class FMODWhisperBridge : MonoBehaviour
|
||||
|
||||
private void OnDisable()
|
||||
{
|
||||
if (_streamStarted)
|
||||
// Stop Whisper (if active)
|
||||
DeactivateRecording();
|
||||
|
||||
// Stop/purge FMOD resources (since object is going away)
|
||||
if (_playChannel.hasHandle())
|
||||
{
|
||||
_stream.StopStream();
|
||||
_streamStarted = false;
|
||||
try { _playChannel.stop(); } catch { /* ignore */ }
|
||||
_playChannel.clearHandle();
|
||||
}
|
||||
if (_recSound.hasHandle())
|
||||
{
|
||||
try { _core.recordStop(recordDriverId); } catch { /* ignore */ }
|
||||
try { _recSound.release(); } catch { /* ignore */ }
|
||||
_recSound.clearHandle();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void ClearRecordRingBuffer()
|
||||
{
|
||||
if (!_recSound.hasHandle() || _soundPcmLength == 0) return;
|
||||
|
||||
uint totalBytes = _soundPcmLength * (uint)_nativeChannels * 2; // PCM16
|
||||
IntPtr p1, p2;
|
||||
uint len1, len2;
|
||||
|
||||
// Lock the whole buffer (start=0, length=totalBytes)
|
||||
var r = _recSound.@lock(0, totalBytes, out p1, out p2, out len1, out len2);
|
||||
if (r != FMOD.RESULT.OK)
|
||||
{
|
||||
Debug.LogWarning($"[FMOD→Whisper] Could not lock ring buffer to clear: {r}");
|
||||
return;
|
||||
}
|
||||
|
||||
if (_playChannel.hasHandle()) { _playChannel.stop(); _playChannel.clearHandle(); }
|
||||
if (_recSound.hasHandle()) { _core.recordStop(recordDriverId); _recSound.release(); _recSound.clearHandle(); }
|
||||
try
|
||||
{
|
||||
if (len1 > 0)
|
||||
{
|
||||
// zero p1
|
||||
// We’ll reuse a static zero array to avoid allocating huge buffers repeatedly
|
||||
ZeroMem(p1, (int)len1);
|
||||
}
|
||||
if (len2 > 0)
|
||||
{
|
||||
ZeroMem(p2, (int)len2);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_recSound.unlock(p1, p2, len1, len2);
|
||||
}
|
||||
}
|
||||
|
||||
// cheap zeroing helper (avoids allocating len-sized arrays each time)
|
||||
private static readonly byte[] _zeroChunk = new byte[16 * 1024]; // 16 KB
|
||||
private static void ZeroMem(IntPtr dst, int byteLen)
|
||||
{
|
||||
int offset = 0;
|
||||
while (byteLen > 0)
|
||||
{
|
||||
int n = Math.Min(_zeroChunk.Length, byteLen);
|
||||
Marshal.Copy(_zeroChunk, 0, dst + offset, n);
|
||||
offset += n;
|
||||
byteLen -= n;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,11 +3,15 @@ using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Threading.Tasks;
|
||||
using Unity.XR.CoreUtils;
|
||||
using UnityEngine;
|
||||
using UnityEngine.InputSystem;
|
||||
using UnityEngine.SceneManagement;
|
||||
|
||||
public class ModelGenerationUtils : MonoBehaviour
|
||||
{
|
||||
public static ModelGenerationUtils Instance { get; private set; }
|
||||
public Transform[] minigamePoints;
|
||||
|
||||
private void Awake()
|
||||
{
|
||||
@@ -23,9 +27,38 @@ public class ModelGenerationUtils : MonoBehaviour
|
||||
// Update is called once per frame
|
||||
void Update()
|
||||
{
|
||||
|
||||
// For teleporting player to different minigames, used during expos
|
||||
|
||||
// No keyboard present (e.g., Quest standalone without a paired keyboard)
|
||||
if (Keyboard.current == null) return;
|
||||
|
||||
if (Keyboard.current.digit1Key.wasPressedThisFrame)
|
||||
{
|
||||
XROrigin xrOrigin = FindObjectOfType<XROrigin>(includeInactive: true);
|
||||
xrOrigin.transform.position = minigamePoints[0].position;
|
||||
}
|
||||
|
||||
|
||||
if (Keyboard.current.digit2Key.wasPressedThisFrame)
|
||||
{
|
||||
XROrigin xrOrigin = FindObjectOfType<XROrigin>(includeInactive: true);
|
||||
xrOrigin.transform.position = minigamePoints[1].position;
|
||||
}
|
||||
|
||||
if (Keyboard.current.digit3Key.wasPressedThisFrame)
|
||||
{
|
||||
XROrigin xrOrigin = FindObjectOfType<XROrigin>(includeInactive: true);
|
||||
xrOrigin.transform.position = minigamePoints[2].position;
|
||||
}
|
||||
|
||||
if (Keyboard.current.digit0Key.wasPressedThisFrame)
|
||||
{
|
||||
string currentSceneName = SceneManager.GetActiveScene().name;
|
||||
SceneManager.LoadScene(currentSceneName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate model by starting a new Python subprocess
|
||||
* NOT USED IN LATEST VERSION
|
||||
|
||||
@@ -50,9 +50,9 @@ public class CafeWaiterNPC : NPCController
|
||||
{
|
||||
SpeakVoiceLine(0);
|
||||
|
||||
fmodWhisperBridge.ActivateRecording();
|
||||
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
||||
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||
fmodWhisperBridge.ActivateRecording();
|
||||
|
||||
notepad.SetActive(true);
|
||||
state = 1;
|
||||
@@ -76,7 +76,7 @@ public class CafeWaiterNPC : NPCController
|
||||
|
||||
private void OnPlayerSpeechFinished(string playerText)
|
||||
{
|
||||
if (Time.time < lastPlayerVoiceUpdateTime + 0.5f)
|
||||
if (Time.time < lastPlayerVoiceUpdateTime + 1.0f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -85,23 +85,34 @@ public class CafeWaiterNPC : NPCController
|
||||
if (state == 1)
|
||||
{
|
||||
// Show transcription and ask whether it is correct
|
||||
fmodWhisperBridge.DeactivateRecording();
|
||||
notepadText.text = playerText;
|
||||
notepad.transform.DOLocalRotate(notepadFlippedRotation, 0.5f);
|
||||
notepad.transform.DOLocalRotate(notepadFlippedRotation, 0.5f).OnComplete(() =>
|
||||
{
|
||||
fmodWhisperBridge.ActivateRecording();
|
||||
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
||||
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||
});
|
||||
SpeakVoiceLine(1);
|
||||
state = 2;
|
||||
} else if (state == 2)
|
||||
{
|
||||
fmodWhisperBridge.DeactivateRecording();
|
||||
// Flip notepad back
|
||||
notepad.transform.DOLocalRotate(notepadOriginalRotation, 0.5f);
|
||||
// if player answered positively, bring food, otherwise ask again
|
||||
if (playerText.ToLower().Contains("yes"))
|
||||
if (playerText.ToLower().Contains("ye"))
|
||||
{
|
||||
|
||||
SpeakVoiceLine(2);
|
||||
Invoke("BringFood", 1f);
|
||||
state = 3;
|
||||
} else
|
||||
{
|
||||
SpeakVoiceLine(3);
|
||||
fmodWhisperBridge.ActivateRecording();
|
||||
fmodWhisperBridge.OnWhisperSegmentUpdated += OnPlayerSpeechUpdated;
|
||||
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||
state = 1;
|
||||
}
|
||||
}
|
||||
@@ -113,6 +124,14 @@ public class CafeWaiterNPC : NPCController
|
||||
if (state == 1)
|
||||
{
|
||||
notepadText.text = playerText;
|
||||
|
||||
// For now, when something is transcribed, treat it as player finished speaking
|
||||
OnPlayerSpeechFinished(playerText);
|
||||
}
|
||||
// faster reaction to player answering yes/no
|
||||
else if (state == 2)
|
||||
{
|
||||
OnPlayerSpeechFinished(playerText);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@ public class ReleasableButton : MonoBehaviour
|
||||
{
|
||||
public delegate void OnButtonPressedDelegate();
|
||||
public event OnButtonPressedDelegate OnButtonPressed;
|
||||
public delegate void OnButtonReleasedDelegate();
|
||||
public event OnButtonReleasedDelegate OnButtonReleased;
|
||||
|
||||
public Transform movableParts;
|
||||
public float moveDuration = 0.25f;
|
||||
@@ -37,7 +39,6 @@ public class ReleasableButton : MonoBehaviour
|
||||
{
|
||||
if (!isButtonDown && collider.gameObject.tag.EndsWith("Hand"))
|
||||
{
|
||||
Debug.Log("collided with: " + collider.gameObject.name);
|
||||
Activate();
|
||||
OnButtonPressed?.Invoke();
|
||||
}
|
||||
@@ -47,8 +48,8 @@ public class ReleasableButton : MonoBehaviour
|
||||
{
|
||||
if (isButtonDown && collider.gameObject.tag.EndsWith("Hand"))
|
||||
{
|
||||
Debug.Log("collider exited: " + collider.gameObject.name);
|
||||
Deactivate();
|
||||
OnButtonReleased?.Invoke();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ public class RadioTransmitter : XRGrabInteractable
|
||||
void Start()
|
||||
{
|
||||
radioButton.OnButtonPressed += OnRadioButtonPressed;
|
||||
radioButton.OnButtonReleased += OnRadioButtonReleased;
|
||||
}
|
||||
|
||||
// Update is called once per frame
|
||||
@@ -38,12 +39,20 @@ public class RadioTransmitter : XRGrabInteractable
|
||||
fmodWhisperBridge.OnWhisperSegmentFinished += OnPlayerSpeechFinished;
|
||||
AudioManager.Instance.PlayAttachedInstance(FMODEvents.Instance.RadioButton, gameObject);
|
||||
fmodWhisperBridge.ActivateRecording();
|
||||
// TODO: deactivate when button is released
|
||||
}
|
||||
|
||||
private void OnRadioButtonReleased()
|
||||
{
|
||||
fmodWhisperBridge.OnWhisperSegmentUpdated -= OnPlayerSpeechUpdated;
|
||||
fmodWhisperBridge.OnWhisperSegmentFinished -= OnPlayerSpeechFinished;
|
||||
AudioManager.Instance.PlayAttachedInstance(FMODEvents.Instance.RadioButton, gameObject);
|
||||
fmodWhisperBridge.DeactivateRecording();
|
||||
}
|
||||
|
||||
private void OnPlayerSpeechUpdated(string text)
|
||||
{
|
||||
computerScreen.text = text;
|
||||
OnPlayerFinishedSpeaking?.Invoke();
|
||||
}
|
||||
|
||||
private void OnPlayerSpeechFinished(string playerText)
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user