forked from cgvr/DeltaVR
microphone recording working with FMOD!!!
This commit is contained in:
Binary file not shown.
@@ -13,7 +13,6 @@ GameObject:
|
|||||||
- component: {fileID: 7203143526414218131}
|
- component: {fileID: 7203143526414218131}
|
||||||
- component: {fileID: 2568780590385977406}
|
- component: {fileID: 2568780590385977406}
|
||||||
- component: {fileID: 1523408157143897080}
|
- component: {fileID: 1523408157143897080}
|
||||||
- component: {fileID: 2918895218826581840}
|
|
||||||
m_Layer: 0
|
m_Layer: 0
|
||||||
m_Name: MicrophoneStand 1
|
m_Name: MicrophoneStand 1
|
||||||
m_TagString: Untagged
|
m_TagString: Untagged
|
||||||
@@ -35,7 +34,7 @@ Transform:
|
|||||||
m_Children:
|
m_Children:
|
||||||
- {fileID: 1850758373829337931}
|
- {fileID: 1850758373829337931}
|
||||||
m_Father: {fileID: 0}
|
m_Father: {fileID: 0}
|
||||||
m_RootOrder: 0
|
m_RootOrder: -1
|
||||||
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
|
||||||
--- !u!33 &4763701867718457502
|
--- !u!33 &4763701867718457502
|
||||||
MeshFilter:
|
MeshFilter:
|
||||||
@@ -120,41 +119,10 @@ MonoBehaviour:
|
|||||||
m_Script: {fileID: 11500000, guid: 357101b2cface4943b04dfd25d4944e3, type: 3}
|
m_Script: {fileID: 11500000, guid: 357101b2cface4943b04dfd25d4944e3, type: 3}
|
||||||
m_Name:
|
m_Name:
|
||||||
m_EditorClassIdentifier:
|
m_EditorClassIdentifier:
|
||||||
whisper: {fileID: 0}
|
|
||||||
microphoneRecord: {fileID: 2918895218826581840}
|
|
||||||
microphoneDevice:
|
|
||||||
outputText: {fileID: 0}
|
outputText: {fileID: 0}
|
||||||
microphoneOffStatus: {fileID: 8977839985090371394}
|
microphoneOffStatus: {fileID: 8977839985090371394}
|
||||||
microphoneOnStatus: {fileID: 6537061652288108950}
|
microphoneOnStatus: {fileID: 6537061652288108950}
|
||||||
--- !u!114 &2918895218826581840
|
fmodWhisperBridge: {fileID: 0}
|
||||||
MonoBehaviour:
|
|
||||||
m_ObjectHideFlags: 0
|
|
||||||
m_CorrespondingSourceObject: {fileID: 0}
|
|
||||||
m_PrefabInstance: {fileID: 0}
|
|
||||||
m_PrefabAsset: {fileID: 0}
|
|
||||||
m_GameObject: {fileID: 4291579148315658230}
|
|
||||||
m_Enabled: 1
|
|
||||||
m_EditorHideFlags: 0
|
|
||||||
m_Script: {fileID: 11500000, guid: 3bc03a4c19604ea394e364f8fc632928, type: 3}
|
|
||||||
m_Name:
|
|
||||||
m_EditorClassIdentifier:
|
|
||||||
maxLengthSec: 60
|
|
||||||
loop: 0
|
|
||||||
frequency: 16000
|
|
||||||
chunksLengthSec: 0.5
|
|
||||||
echo: 1
|
|
||||||
useVad: 1
|
|
||||||
vadUpdateRateSec: 0.1
|
|
||||||
vadContextSec: 30
|
|
||||||
vadLastSec: 1.25
|
|
||||||
vadThd: 1
|
|
||||||
vadFreqThd: 100
|
|
||||||
vadIndicatorImage: {fileID: 0}
|
|
||||||
vadStop: 0
|
|
||||||
dropVadPart: 1
|
|
||||||
vadStopTime: 3
|
|
||||||
microphoneDropdown: {fileID: 0}
|
|
||||||
microphoneDefaultLabel: Default microphone
|
|
||||||
--- !u!1 &5819798980962142350
|
--- !u!1 &5819798980962142350
|
||||||
GameObject:
|
GameObject:
|
||||||
m_ObjectHideFlags: 0
|
m_ObjectHideFlags: 0
|
||||||
|
|||||||
@@ -13,8 +13,7 @@ GameObject:
|
|||||||
- component: {fileID: 3363453355800186393}
|
- component: {fileID: 3363453355800186393}
|
||||||
- component: {fileID: 2166102850012183631}
|
- component: {fileID: 2166102850012183631}
|
||||||
- component: {fileID: 8356602476881383464}
|
- component: {fileID: 8356602476881383464}
|
||||||
- component: {fileID: 4280129837482332964}
|
- component: {fileID: 5682436338090300270}
|
||||||
- component: {fileID: 8105025160921831064}
|
|
||||||
m_Layer: 0
|
m_Layer: 0
|
||||||
m_Name: ModelGenerationManager
|
m_Name: ModelGenerationManager
|
||||||
m_TagString: Untagged
|
m_TagString: Untagged
|
||||||
@@ -64,6 +63,8 @@ MonoBehaviour:
|
|||||||
INVOKEAI_BASE_URL: http://ltat-cgvr9.domenis.ut.ee:9090
|
INVOKEAI_BASE_URL: http://ltat-cgvr9.domenis.ut.ee:9090
|
||||||
DEFAULT_QUEUE_ID: default
|
DEFAULT_QUEUE_ID: default
|
||||||
MODEL_KEY: 81d45960-08a0-4b8c-a48b-e7d73b21bfe2
|
MODEL_KEY: 81d45960-08a0-4b8c-a48b-e7d73b21bfe2
|
||||||
|
promptSuffix: ', single object, front and side fully visible, realistic style,
|
||||||
|
plain neutral background, clear details, soft studio lighting, true-to-scale'
|
||||||
--- !u!114 &2166102850012183631
|
--- !u!114 &2166102850012183631
|
||||||
MonoBehaviour:
|
MonoBehaviour:
|
||||||
m_ObjectHideFlags: 0
|
m_ObjectHideFlags: 0
|
||||||
@@ -110,7 +111,7 @@ MonoBehaviour:
|
|||||||
useVad: 1
|
useVad: 1
|
||||||
tokensTimestamps: 0
|
tokensTimestamps: 0
|
||||||
audioCtx: 0
|
audioCtx: 0
|
||||||
--- !u!114 &4280129837482332964
|
--- !u!114 &5682436338090300270
|
||||||
MonoBehaviour:
|
MonoBehaviour:
|
||||||
m_ObjectHideFlags: 0
|
m_ObjectHideFlags: 0
|
||||||
m_CorrespondingSourceObject: {fileID: 0}
|
m_CorrespondingSourceObject: {fileID: 0}
|
||||||
@@ -119,65 +120,14 @@ MonoBehaviour:
|
|||||||
m_GameObject: {fileID: 154411548685861447}
|
m_GameObject: {fileID: 154411548685861447}
|
||||||
m_Enabled: 1
|
m_Enabled: 1
|
||||||
m_EditorHideFlags: 0
|
m_EditorHideFlags: 0
|
||||||
m_Script: {fileID: 11500000, guid: a2836e36774ca1c4bbbee976e17b649c, type: 3}
|
m_Script: {fileID: 11500000, guid: a264cb5321d9f6741a2625b8c63c34a7, type: 3}
|
||||||
m_Name:
|
m_Name:
|
||||||
m_EditorClassIdentifier:
|
m_EditorClassIdentifier:
|
||||||
_componentIndexCache: 0
|
whisper: {fileID: 8356602476881383464}
|
||||||
_addedNetworkObject: {fileID: 8105025160921831064}
|
useVadInStream: 0
|
||||||
_networkObjectCache: {fileID: 8105025160921831064}
|
recordDriverId: 0
|
||||||
_synchronizeParent: 0
|
desiredSampleRate: 48000
|
||||||
_packing:
|
channels: 1
|
||||||
Position: 1
|
bufferLengthSec: 5
|
||||||
Rotation: 1
|
playLoopback: 0
|
||||||
Scale: 0
|
loopbackVolume: 1
|
||||||
_interpolation: 2
|
|
||||||
_extrapolation: 2
|
|
||||||
_enableTeleport: 0
|
|
||||||
_teleportThreshold: 1
|
|
||||||
_clientAuthoritative: 1
|
|
||||||
_sendToOwner: 1
|
|
||||||
_synchronizePosition: 1
|
|
||||||
_positionSnapping:
|
|
||||||
X: 0
|
|
||||||
Y: 0
|
|
||||||
Z: 0
|
|
||||||
_synchronizeRotation: 1
|
|
||||||
_rotationSnapping:
|
|
||||||
X: 0
|
|
||||||
Y: 0
|
|
||||||
Z: 0
|
|
||||||
_synchronizeScale: 1
|
|
||||||
_scaleSnapping:
|
|
||||||
X: 0
|
|
||||||
Y: 0
|
|
||||||
Z: 0
|
|
||||||
--- !u!114 &8105025160921831064
|
|
||||||
MonoBehaviour:
|
|
||||||
m_ObjectHideFlags: 0
|
|
||||||
m_CorrespondingSourceObject: {fileID: 0}
|
|
||||||
m_PrefabInstance: {fileID: 0}
|
|
||||||
m_PrefabAsset: {fileID: 0}
|
|
||||||
m_GameObject: {fileID: 154411548685861447}
|
|
||||||
m_Enabled: 1
|
|
||||||
m_EditorHideFlags: 0
|
|
||||||
m_Script: {fileID: 11500000, guid: 26b716c41e9b56b4baafaf13a523ba2e, type: 3}
|
|
||||||
m_Name:
|
|
||||||
m_EditorClassIdentifier:
|
|
||||||
<IsNested>k__BackingField: 0
|
|
||||||
<ComponentIndex>k__BackingField: 0
|
|
||||||
<PredictedSpawn>k__BackingField: {fileID: 0}
|
|
||||||
_networkBehaviours:
|
|
||||||
- {fileID: 4280129837482332964}
|
|
||||||
<ParentNetworkObject>k__BackingField: {fileID: 0}
|
|
||||||
<ChildNetworkObjects>k__BackingField: []
|
|
||||||
_isNetworked: 1
|
|
||||||
_isGlobal: 0
|
|
||||||
_initializeOrder: 0
|
|
||||||
_defaultDespawnType: 0
|
|
||||||
NetworkObserver: {fileID: 0}
|
|
||||||
<PrefabId>k__BackingField: 0
|
|
||||||
<SpawnableCollectionId>k__BackingField: 0
|
|
||||||
_scenePathHash: 0
|
|
||||||
<SceneId>k__BackingField: 0
|
|
||||||
<AssetPathHash>k__BackingField: 17646158845367820466
|
|
||||||
_sceneNetworkObjects: []
|
|
||||||
|
|||||||
Binary file not shown.
@@ -1,36 +1,19 @@
|
|||||||
using TMPro;
|
using TMPro;
|
||||||
using UnityEngine;
|
using UnityEngine;
|
||||||
using Whisper;
|
|
||||||
using Whisper.Utils;
|
|
||||||
|
|
||||||
public class MicrophoneStand : MonoBehaviour
|
public class MicrophoneStand : MonoBehaviour
|
||||||
{
|
{
|
||||||
public WhisperManager whisper;
|
|
||||||
public MicrophoneRecord microphoneRecord;
|
|
||||||
public string microphoneDevice;
|
|
||||||
public TextMeshProUGUI outputText;
|
public TextMeshProUGUI outputText;
|
||||||
|
|
||||||
private WhisperStream stream;
|
|
||||||
|
|
||||||
[SerializeField]
|
|
||||||
private string textOutput;
|
|
||||||
|
|
||||||
public GameObject microphoneOffStatus;
|
public GameObject microphoneOffStatus;
|
||||||
public GameObject microphoneOnStatus;
|
public GameObject microphoneOnStatus;
|
||||||
|
|
||||||
|
public FMODWhisperBridge fmodWhisperBridge;
|
||||||
|
|
||||||
// Start is called before the first frame update
|
// Start is called before the first frame update
|
||||||
async void Start()
|
void Start()
|
||||||
{
|
{
|
||||||
Debug.Log("Mic devices: " + string.Join(", ", Microphone.devices));
|
fmodWhisperBridge.OnWhisperResultProcessed += OnWhisperResult;
|
||||||
Debug.Log("Using mic device: " + microphoneDevice);
|
|
||||||
microphoneRecord.SelectedMicDevice = microphoneDevice;
|
|
||||||
|
|
||||||
// This causes about 1 sec long freeze, has to be done once at the start of the game
|
|
||||||
microphoneRecord.StartRecord();
|
|
||||||
|
|
||||||
stream = await whisper.CreateStream(microphoneRecord);
|
|
||||||
stream.OnResultUpdated += OnWhisperResult;
|
|
||||||
//stream.StartStream();
|
|
||||||
|
|
||||||
microphoneOffStatus.SetActive(true);
|
microphoneOffStatus.SetActive(true);
|
||||||
microphoneOnStatus.SetActive(false);
|
microphoneOnStatus.SetActive(false);
|
||||||
@@ -51,10 +34,7 @@ public class MicrophoneStand : MonoBehaviour
|
|||||||
{
|
{
|
||||||
microphoneOffStatus.SetActive(false);
|
microphoneOffStatus.SetActive(false);
|
||||||
microphoneOnStatus.SetActive(true);
|
microphoneOnStatus.SetActive(true);
|
||||||
|
fmodWhisperBridge.ActivateRecording();
|
||||||
stream.StartStream();
|
|
||||||
//microphoneRecord.StartRecord();
|
|
||||||
Debug.Log("Whisper stream started.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,27 +45,17 @@ public class MicrophoneStand : MonoBehaviour
|
|||||||
{
|
{
|
||||||
microphoneOffStatus.SetActive(true);
|
microphoneOffStatus.SetActive(true);
|
||||||
microphoneOnStatus.SetActive(false);
|
microphoneOnStatus.SetActive(false);
|
||||||
|
fmodWhisperBridge.DeactivateRecording();
|
||||||
stream.StopStream();
|
|
||||||
//microphoneRecord.StopRecord();
|
|
||||||
textOutput = outputText.text;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void OnWhisperResult(string result)
|
private void OnWhisperResult(string result)
|
||||||
{
|
{
|
||||||
Debug.Log("Whisper result processed: " + result);
|
|
||||||
outputText.text = result;
|
outputText.text = result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void OnDestroy()
|
|
||||||
{
|
|
||||||
microphoneRecord.StopRecord();
|
|
||||||
Destroy(gameObject);
|
|
||||||
}
|
|
||||||
|
|
||||||
public string GetTextOutput()
|
public string GetTextOutput()
|
||||||
{
|
{
|
||||||
return textOutput;
|
return outputText.text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
122
Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs
Normal file
122
Assets/_PROJECT/Scripts/ModeGeneration/FMODMicLoopback.cs
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
|
||||||
|
using FMOD;
|
||||||
|
using FMODUnity;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using UnityEngine;
|
||||||
|
|
||||||
|
public class FMODMicLoopback : MonoBehaviour
|
||||||
|
{
|
||||||
|
private uint LATENCY_MS = 50;
|
||||||
|
private uint DRIFT_MS = 1;
|
||||||
|
|
||||||
|
private uint samplesRecorded, samplesPlayed = 0;
|
||||||
|
private int nativeRate, nativeChannels = 0;
|
||||||
|
private uint recSoundLength = 0;
|
||||||
|
uint lastPlayPos = 0;
|
||||||
|
uint lastRecordPos = 0;
|
||||||
|
private uint driftThreshold = 0;
|
||||||
|
private uint desiredLatency = 0;
|
||||||
|
private uint adjustLatency = 0;
|
||||||
|
private int actualLatency = 0;
|
||||||
|
uint minRecordDelta = 0xFFFFFFFF;
|
||||||
|
|
||||||
|
private FMOD.CREATESOUNDEXINFO exInfo = new FMOD.CREATESOUNDEXINFO();
|
||||||
|
|
||||||
|
private FMOD.Sound recSound;
|
||||||
|
private FMOD.Channel channel;
|
||||||
|
|
||||||
|
// Start is called before the first frame update
|
||||||
|
void Start()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Determine latency in samples.
|
||||||
|
*/
|
||||||
|
FMODUnity.RuntimeManager.CoreSystem.getRecordDriverInfo(0, out _, 0, out _, out nativeRate, out _, out nativeChannels, out _);
|
||||||
|
|
||||||
|
driftThreshold = (uint)(nativeRate * DRIFT_MS) / 1000;
|
||||||
|
desiredLatency = (uint)(nativeRate * LATENCY_MS) / 1000;
|
||||||
|
adjustLatency = desiredLatency;
|
||||||
|
actualLatency = (int)desiredLatency;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Create user sound to record into, then start recording.
|
||||||
|
*/
|
||||||
|
exInfo.cbsize = Marshal.SizeOf(typeof(FMOD.CREATESOUNDEXINFO));
|
||||||
|
exInfo.numchannels = nativeChannels;
|
||||||
|
exInfo.format = FMOD.SOUND_FORMAT.PCM16;
|
||||||
|
exInfo.defaultfrequency = nativeRate;
|
||||||
|
exInfo.length = (uint)(nativeRate * sizeof(short) * nativeChannels);
|
||||||
|
|
||||||
|
FMODUnity.RuntimeManager.CoreSystem.createSound("", FMOD.MODE.LOOP_NORMAL | FMOD.MODE.OPENUSER, ref exInfo, out recSound);
|
||||||
|
|
||||||
|
FMODUnity.RuntimeManager.CoreSystem.recordStart(0, recSound, true);
|
||||||
|
|
||||||
|
recSound.getLength(out recSoundLength, FMOD.TIMEUNIT.PCM);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update is called once per frame
|
||||||
|
void Update()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Determine how much has been recorded since we last checked
|
||||||
|
*/
|
||||||
|
uint recordPos = 0;
|
||||||
|
FMODUnity.RuntimeManager.CoreSystem.getRecordPosition(0, out recordPos);
|
||||||
|
|
||||||
|
uint recordDelta = (recordPos >= lastRecordPos) ? (recordPos - lastRecordPos) : (recordPos + recSoundLength - lastRecordPos);
|
||||||
|
lastRecordPos = recordPos;
|
||||||
|
samplesRecorded += recordDelta;
|
||||||
|
|
||||||
|
if (recordDelta != 0 && (recordDelta < minRecordDelta))
|
||||||
|
{
|
||||||
|
minRecordDelta = recordDelta; // Smallest driver granularity seen so far
|
||||||
|
adjustLatency = (recordDelta <= desiredLatency) ? desiredLatency : recordDelta; // Adjust our latency if driver granularity is high
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Delay playback until our desired latency is reached.
|
||||||
|
*/
|
||||||
|
if (!channel.hasHandle() && samplesRecorded >= adjustLatency)
|
||||||
|
{
|
||||||
|
FMODUnity.RuntimeManager.CoreSystem.getMasterChannelGroup(out FMOD.ChannelGroup mCG);
|
||||||
|
FMODUnity.RuntimeManager.CoreSystem.playSound(recSound, mCG, false, out channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Determine how much has been played since we last checked.
|
||||||
|
*/
|
||||||
|
if (channel.hasHandle())
|
||||||
|
{
|
||||||
|
uint playPos = 0;
|
||||||
|
channel.getPosition(out playPos, FMOD.TIMEUNIT.PCM);
|
||||||
|
|
||||||
|
uint playDelta = (playPos >= lastPlayPos) ? (playPos - lastPlayPos) : (playPos + recSoundLength - lastPlayPos);
|
||||||
|
lastPlayPos = playPos;
|
||||||
|
samplesPlayed += playDelta;
|
||||||
|
|
||||||
|
// Compensate for any drift.
|
||||||
|
int latency = (int)(samplesRecorded - samplesPlayed);
|
||||||
|
actualLatency = (int)((0.97f * actualLatency) + (0.03f * latency));
|
||||||
|
|
||||||
|
int playbackRate = nativeRate;
|
||||||
|
if (actualLatency < (int)(adjustLatency - driftThreshold))
|
||||||
|
{
|
||||||
|
// Playback position is catching up to the record position, slow playback down by 2%
|
||||||
|
playbackRate = nativeRate - (nativeRate / 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (actualLatency > (int)(adjustLatency + driftThreshold))
|
||||||
|
{
|
||||||
|
// Playback is falling behind the record position, speed playback up by 2%
|
||||||
|
playbackRate = nativeRate + (nativeRate / 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
channel.setFrequency((float)playbackRate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnDestroy()
|
||||||
|
{
|
||||||
|
recSound.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: 12623af3ff8b7df4abbaacd753e43555
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
238
Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs
Normal file
238
Assets/_PROJECT/Scripts/ModeGeneration/FMODWhisperBridge.cs
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using UnityEngine;
|
||||||
|
using FMOD;
|
||||||
|
using FMODUnity;
|
||||||
|
using Whisper; // WhisperManager, WhisperStream, WhisperResult
|
||||||
|
using Whisper.Utils; // AudioChunk
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Capture microphone with FMOD and feed chunks to WhisperStream (no Unity Microphone).
|
||||||
|
/// Also (optionally) plays the recorded sound back via FMOD loopback.
|
||||||
|
/// </summary>
|
||||||
|
|
||||||
|
public class FMODWhisperBridge : MonoBehaviour
|
||||||
|
{
|
||||||
|
[Header("Whisper")]
|
||||||
|
[SerializeField] private WhisperManager whisper; // assign in Inspector
|
||||||
|
[SerializeField] private bool useVadInStream = false; // let WhisperStream do VAD or not
|
||||||
|
|
||||||
|
[Header("FMOD capture")]
|
||||||
|
[Tooltip("Recording device index (0 = default)")]
|
||||||
|
public int recordDriverId = 0;
|
||||||
|
[Tooltip("Set 48000 on Quest; falls back to device rate automatically")]
|
||||||
|
public int desiredSampleRate = 48000;
|
||||||
|
[Tooltip("Mono recommended for Whisper")]
|
||||||
|
public int channels = 1;
|
||||||
|
[Range(1, 10)] public int bufferLengthSec = 5;
|
||||||
|
|
||||||
|
[Header("Loopback (monitor your voice)")]
|
||||||
|
public bool playLoopback = true;
|
||||||
|
[Range(0f, 2f)] public float loopbackVolume = 1.0f;
|
||||||
|
|
||||||
|
public delegate void OnWhisperResultProcessedDelegate(string result);
|
||||||
|
public event OnWhisperResultProcessedDelegate OnWhisperResultProcessed;
|
||||||
|
|
||||||
|
// FMOD
|
||||||
|
private FMOD.System _core;
|
||||||
|
private Sound _recSound;
|
||||||
|
private Channel _playChannel;
|
||||||
|
private ChannelGroup _masterGroup;
|
||||||
|
private uint _soundPcmLength; // in samples
|
||||||
|
private int _nativeRate;
|
||||||
|
private int _nativeChannels;
|
||||||
|
|
||||||
|
// ring-buffer tracking
|
||||||
|
private uint _lastRecordPos = 0;
|
||||||
|
|
||||||
|
// Whisper
|
||||||
|
private WhisperStream _stream;
|
||||||
|
private bool _streamStarted;
|
||||||
|
|
||||||
|
// temp conversion buffer
|
||||||
|
private float[] _floatTmp = new float[0];
|
||||||
|
|
||||||
|
private bool isRecordingActivated = false;
|
||||||
|
|
||||||
|
private void Awake()
|
||||||
|
{
|
||||||
|
if (!whisper) whisper = FindObjectOfType<WhisperManager>();
|
||||||
|
_core = RuntimeManager.CoreSystem; // FMOD core system
|
||||||
|
}
|
||||||
|
|
||||||
|
private async void Start()
|
||||||
|
{
|
||||||
|
// Query device info to get native rate/channels.
|
||||||
|
// (FMOD: getRecordDriverInfo gives you system rate & speaker mode)
|
||||||
|
string name;
|
||||||
|
Guid guid;
|
||||||
|
SPEAKERMODE sm;
|
||||||
|
int smChannels;
|
||||||
|
DRIVER_STATE driverState;
|
||||||
|
// signature: getRecordDriverInfo(id, out name, nameLen, out guid, out systemrate, out speakermode, out speakermodechannels, out driverState)
|
||||||
|
_core.getRecordDriverInfo(recordDriverId, out name, 256, out guid, out _nativeRate, out sm, out smChannels, out driverState);
|
||||||
|
_nativeChannels = channels > 0 ? channels : smChannels;
|
||||||
|
UnityEngine.Debug.Log($"[FMOD→Whisper] Using input device #{recordDriverId}: \"{name}\" rate={_nativeRate} ch={_nativeChannels}");
|
||||||
|
|
||||||
|
// Build a user sound buffer that FMOD will fill (OPENUSER | LOOP_NORMAL).
|
||||||
|
CREATESOUNDEXINFO ex = new CREATESOUNDEXINFO
|
||||||
|
{
|
||||||
|
cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO)),
|
||||||
|
numchannels = _nativeChannels,
|
||||||
|
defaultfrequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate,
|
||||||
|
format = SOUND_FORMAT.PCM16,
|
||||||
|
length = (uint)(((_nativeRate > 0 ? _nativeRate : desiredSampleRate) * _nativeChannels) * sizeof(short)) // seconds=1 (we loop)
|
||||||
|
};
|
||||||
|
|
||||||
|
_core.createSound("", MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESAMPLE, ref ex, out _recSound);
|
||||||
|
_recSound.getLength(out _soundPcmLength, TIMEUNIT.PCM);
|
||||||
|
|
||||||
|
// Start FMOD recording into that sound (looping ring buffer).
|
||||||
|
_core.recordStart(recordDriverId, _recSound, true);
|
||||||
|
UnityEngine.Debug.Log("[FMOD→Whisper] Recording started.");
|
||||||
|
|
||||||
|
// Optional loopback playback using FMOD (plays same sound ring buffer).
|
||||||
|
_core.getMasterChannelGroup(out _masterGroup);
|
||||||
|
if (playLoopback)
|
||||||
|
{
|
||||||
|
_core.playSound(_recSound, _masterGroup, false, out _playChannel);
|
||||||
|
_playChannel.setMode(MODE._2D);
|
||||||
|
_playChannel.setVolume(loopbackVolume);
|
||||||
|
UnityEngine.Debug.Log("[FMOD→Whisper] Loopback playback started.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create Whisper stream WITHOUT MicrophoneRecord, just from (freq, channels).
|
||||||
|
// We'll push AudioChunk manually.
|
||||||
|
// NOTE: WhisperStream’s sliding window is governed by manager’s stepSec/keepSec/lengthSec.
|
||||||
|
_stream = await whisper.CreateStream(ex.defaultfrequency, _nativeChannels);
|
||||||
|
_stream.OnResultUpdated += (txt) =>
|
||||||
|
{
|
||||||
|
//OnWhisperResultProcessed?.Invoke(txt);
|
||||||
|
//UnityEngine.Debug.Log($"[Whisper] result updated: {txt}");
|
||||||
|
};
|
||||||
|
_stream.OnSegmentUpdated += (seg) =>
|
||||||
|
{
|
||||||
|
OnWhisperResultProcessed?.Invoke(seg.Result);
|
||||||
|
//UnityEngine.Debug.Log($"[Whisper] Seg finished: {seg.Result}");
|
||||||
|
};
|
||||||
|
|
||||||
|
// If you want Whisper to respect VAD, enable in manager or set useVad (manager controls stream params).
|
||||||
|
whisper.useVad = useVadInStream;
|
||||||
|
|
||||||
|
_stream.StartStream();
|
||||||
|
_streamStarted = true;
|
||||||
|
|
||||||
|
// prepare temp arrays roughly 100ms of audio
|
||||||
|
EnsureTmpCapacity((ex.defaultfrequency / 10) * _nativeChannels);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Update()
|
||||||
|
{
|
||||||
|
if (!isRecordingActivated) return;
|
||||||
|
if (_core.handle != IntPtr.Zero) _core.update();
|
||||||
|
if (!_streamStarted || !_recSound.hasHandle()) return;
|
||||||
|
|
||||||
|
// How many samples recorded since last frame?
|
||||||
|
uint recPos;
|
||||||
|
_core.getRecordPosition(recordDriverId, out recPos);
|
||||||
|
|
||||||
|
uint deltaSamples = (recPos >= _lastRecordPos)
|
||||||
|
? (recPos - _lastRecordPos)
|
||||||
|
: (recPos + _soundPcmLength - _lastRecordPos);
|
||||||
|
|
||||||
|
if (deltaSamples == 0) return;
|
||||||
|
|
||||||
|
// We’ll read that region (16-bit) and convert to float[] [-1..1].
|
||||||
|
// Calculate byte range to lock in sound buffer
|
||||||
|
uint bytesToRead = deltaSamples * (uint)_nativeChannels * 2; // 16-bit = 2 bytes
|
||||||
|
uint startBytes = _lastRecordPos * (uint)_nativeChannels * 2;
|
||||||
|
|
||||||
|
IntPtr p1, p2;
|
||||||
|
uint len1, len2;
|
||||||
|
// Lock can wrap — FMOD splits into p1/p2.
|
||||||
|
_recSound.@lock(startBytes, bytesToRead, out p1, out p2, out len1, out len2);
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// Convert both parts to float and push to Whisper
|
||||||
|
if (len1 > 0) CopyPcm16ToFloatAndFeed(p1, len1);
|
||||||
|
if (len2 > 0) CopyPcm16ToFloatAndFeed(p2, len2);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
_recSound.unlock(p1, p2, len1, len2);
|
||||||
|
}
|
||||||
|
|
||||||
|
_lastRecordPos = recPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ActivateRecording()
|
||||||
|
{
|
||||||
|
isRecordingActivated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void DeactivateRecording()
|
||||||
|
{
|
||||||
|
isRecordingActivated = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CopyPcm16ToFloatAndFeed(IntPtr src, uint byteLen)
|
||||||
|
{
|
||||||
|
int samples = (int)(byteLen / 2); // 2 bytes per sample
|
||||||
|
EnsureTmpCapacity(samples);
|
||||||
|
|
||||||
|
// Marshal the 16-bit PCM into managed space
|
||||||
|
// We pin a short[] overlay to avoid copying twice
|
||||||
|
int shorts = samples;
|
||||||
|
int byteCount = (int)byteLen;
|
||||||
|
|
||||||
|
// Use Marshal.Copy into a short[] then convert to float[-1..1]
|
||||||
|
// (You can also unsafe copy for speed if needed.)
|
||||||
|
EnsureShortOverlay(shorts, out short[] sBuf);
|
||||||
|
Marshal.Copy(src, sBuf, 0, shorts);
|
||||||
|
|
||||||
|
for (int i = 0; i < shorts; i++)
|
||||||
|
{
|
||||||
|
// 32768f avoids clipping at -32768
|
||||||
|
_floatTmp[i] = Mathf.Clamp(sBuf[i] / 32768f, -1f, 1f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build a chunk for WhisperStream; with VAD off, IsVoiceDetected=true is fine.
|
||||||
|
var chunk = new AudioChunk
|
||||||
|
{
|
||||||
|
Data = _floatTmp.AsSpan(0, shorts).ToArray(),
|
||||||
|
Frequency = (_nativeRate > 0) ? _nativeRate : desiredSampleRate,
|
||||||
|
Channels = _nativeChannels,
|
||||||
|
IsVoiceDetected = true
|
||||||
|
};
|
||||||
|
|
||||||
|
_stream.AddToStream(chunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
private short[] _shortOverlay;
|
||||||
|
private void EnsureShortOverlay(int samples, out short[] buf)
|
||||||
|
{
|
||||||
|
if (_shortOverlay == null || _shortOverlay.Length < samples)
|
||||||
|
_shortOverlay = new short[Mathf.NextPowerOfTwo(samples)];
|
||||||
|
buf = _shortOverlay;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void EnsureTmpCapacity(int samples)
|
||||||
|
{
|
||||||
|
if (_floatTmp == null || _floatTmp.Length < samples)
|
||||||
|
_floatTmp = new float[Mathf.NextPowerOfTwo(samples)];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnDisable()
|
||||||
|
{
|
||||||
|
if (_streamStarted)
|
||||||
|
{
|
||||||
|
_stream.StopStream();
|
||||||
|
_streamStarted = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_playChannel.hasHandle()) { _playChannel.stop(); _playChannel.clearHandle(); }
|
||||||
|
if (_recSound.hasHandle()) { _core.recordStop(recordDriverId); _recSound.release(); _recSound.clearHandle(); }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: a264cb5321d9f6741a2625b8c63c34a7
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
using UnityEngine;
|
using UnityEngine;
|
||||||
|
|
||||||
[RequireComponent(typeof(AudioSource))]
|
[RequireComponent(typeof(AudioSource))]
|
||||||
public class MicrophoneTesting : MonoBehaviour
|
public class MicLoopback : MonoBehaviour
|
||||||
{
|
{
|
||||||
[Header("Mic settings")]
|
[Header("Mic settings")]
|
||||||
[Tooltip("Leave empty for default device")]
|
[Tooltip("Leave empty for default device")]
|
||||||
Binary file not shown.
Reference in New Issue
Block a user