1
0
forked from cgvr/DeltaVR

transcribe voice to text with Whisper base model

This commit is contained in:
2025-12-15 15:30:15 +02:00
parent 0c026078d0
commit 9c7536d1d4
8 changed files with 188 additions and 2 deletions

View File

@@ -0,0 +1,92 @@
using System.Diagnostics;
using TMPro;
using UnityEngine;
using Whisper;
using Whisper.Utils;
public class VoiceTranscriptionTestBox : MonoBehaviour
{
public Material activeMaterial;
public Material inactiveMaterial;
private MeshRenderer meshRenderer;
public WhisperManager whisper;
public MicrophoneRecord microphoneRecord;
public TextMeshProUGUI outputText;
private string _buffer;
private void Awake()
{
whisper.OnNewSegment += OnNewSegment;
whisper.OnProgress += OnProgressHandler;
microphoneRecord.OnRecordStop += OnRecordStop;
}
// Start is called before the first frame update
void Start()
{
meshRenderer = GetComponent<MeshRenderer>();
}
// Update is called once per frame
void Update()
{
}
void OnTriggerEnter(Collider other)
{
KbmController controller = other.GetComponent<KbmController>();
if (controller != null)
{
meshRenderer.material = activeMaterial;
microphoneRecord.StartRecord();
}
}
private void OnTriggerExit(Collider other)
{
KbmController controller = other.GetComponent<KbmController>();
if (controller != null)
{
meshRenderer.material = inactiveMaterial;
microphoneRecord.StopRecord();
}
}
private async void OnRecordStop(AudioChunk recordedAudio)
{
_buffer = "";
var sw = new Stopwatch();
sw.Start();
var res = await whisper.GetTextAsync(recordedAudio.Data, recordedAudio.Frequency, recordedAudio.Channels);
if (res == null)
return;
var time = sw.ElapsedMilliseconds;
var rate = recordedAudio.Length / (time * 0.001f);
UnityEngine.Debug.Log($"Time: {time} ms\nRate: {rate:F1}x");
var text = res.Result;
outputText.text = text;
}
private void OnProgressHandler(int progress)
{
UnityEngine.Debug.Log($"Progress: {progress}%");
}
private void OnNewSegment(WhisperSegment segment)
{
_buffer += segment.Text;
UnityEngine.Debug.Log(_buffer + "...");
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: d28857190597d9a46a8ddf3cf902cc81
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

View File

@@ -0,0 +1,57 @@
using System.Diagnostics;
using UnityEngine;
using Whisper;
using Whisper.Utils;
public class VoiceTranscription : MonoBehaviour
{
public WhisperManager whisper;
public MicrophoneRecord microphoneRecord;
private string _buffer;
private void Awake()
{
whisper.OnNewSegment += OnNewSegment;
microphoneRecord.OnRecordStop += OnRecordStop;
}
// Start is called before the first frame update
void Start()
{
}
// Update is called once per frame
void Update()
{
}
private async void OnRecordStop(AudioChunk recordedAudio)
{
_buffer = "";
var sw = new Stopwatch();
sw.Start();
var res = await whisper.GetTextAsync(recordedAudio.Data, recordedAudio.Frequency, recordedAudio.Channels);
if (res == null)
return;
var time = sw.ElapsedMilliseconds;
var rate = recordedAudio.Length / (time * 0.001f);
UnityEngine.Debug.Log($"Time: {time} ms\nRate: {rate:F1}x");
var text = res.Result;
UnityEngine.Debug.Log(text);
}
private void OnNewSegment(WhisperSegment segment)
{
_buffer += segment.Text;
UnityEngine.Debug.Log(_buffer + "...");
}
}

View File

@@ -0,0 +1,11 @@
fileFormatVersion: 2
guid: af83274dbfe8bab4599dda694e2545c2
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant: