1
0
forked from cgvr/DeltaVR

use Whisper in streaming mode

This commit is contained in:
2025-12-31 14:13:25 +02:00
parent 7bc58a48d0
commit 0c8c55f293
5 changed files with 84 additions and 111 deletions

View File

@@ -1,4 +1,3 @@
using System.Diagnostics;
using TMPro;
using Unity.XR.CoreUtils;
using UnityEngine;
@@ -9,17 +8,14 @@ public class VoiceTranscriptionBox : MonoBehaviour
{
public Material activeMaterial;
public Material inactiveMaterial;
public Material loadingMaterial;
private MeshRenderer meshRenderer;
private bool isLoading;
public WhisperManager whisper;
public MicrophoneRecord microphoneRecord;
public TextMeshProUGUI outputText;
private string _buffer;
private WhisperStream stream;
private string lastTextOutput;
public string LastTextOutput
@@ -30,19 +26,16 @@ public class VoiceTranscriptionBox : MonoBehaviour
}
}
private void Awake()
{
isLoading = false;
whisper.OnNewSegment += OnNewSegment;
microphoneRecord.OnRecordStop += OnRecordStop;
}
// Start is called before the first frame update
void Start()
async void Start()
{
meshRenderer = GetComponent<MeshRenderer>();
// This causes about 1 sec long freeze, has to be done once at the start of the game
microphoneRecord.StartRecord();
stream = await whisper.CreateStream(microphoneRecord);
stream.OnResultUpdated += OnWhisperResult;
}
// Update is called once per frame
@@ -53,17 +46,12 @@ public class VoiceTranscriptionBox : MonoBehaviour
void OnTriggerEnter(Collider other)
{
if (isLoading)
{
return;
}
KbmController controller = other.GetComponent<KbmController>();
XROrigin playerOrigin = other.GetComponent<XROrigin>();
if (controller != null || playerOrigin != null)
{
meshRenderer.material = activeMaterial;
microphoneRecord.StartRecord();
stream.StartStream();
}
}
@@ -73,40 +61,20 @@ public class VoiceTranscriptionBox : MonoBehaviour
XROrigin playerOrigin = other.GetComponent<XROrigin>();
if (controller != null | playerOrigin != null)
{
microphoneRecord.StopRecord();
meshRenderer.material = loadingMaterial;
isLoading = true;
stream.StopStream();
meshRenderer.material = inactiveMaterial;
}
}
private async void OnRecordStop(AudioChunk recordedAudio)
private void OnWhisperResult(string result)
{
_buffer = "";
var sw = new Stopwatch();
sw.Start();
var res = await whisper.GetTextAsync(recordedAudio.Data, recordedAudio.Frequency, recordedAudio.Channels);
if (res == null)
return;
var time = sw.ElapsedMilliseconds;
var rate = recordedAudio.Length / (time * 0.001f);
UnityEngine.Debug.Log($"Time: {time} ms\nRate: {rate:F1}x");
var text = res.Result;
lastTextOutput = text;
outputText.text = text;
meshRenderer.material = inactiveMaterial;
isLoading = false;
lastTextOutput = result;
outputText.text = result;
}
private void OnNewSegment(WhisperSegment segment)
private void OnDestroy()
{
_buffer += segment.Text;
UnityEngine.Debug.Log(_buffer + "...");
microphoneRecord.StopRecord();
Destroy(gameObject);
}
}