forked from cgvr/DeltaVR
transcribe voice to text with Whisper base model
This commit is contained in:
8
Assets/StreamingAssets/Whisper.meta
Normal file
8
Assets/StreamingAssets/Whisper.meta
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: 23fe3883e9cc804429bc54fb860d18f1
|
||||||
|
folderAsset: yes
|
||||||
|
DefaultImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
BIN
Assets/StreamingAssets/Whisper/ggml-base.bin
Normal file
BIN
Assets/StreamingAssets/Whisper/ggml-base.bin
Normal file
Binary file not shown.
7
Assets/StreamingAssets/Whisper/ggml-base.bin.meta
Normal file
7
Assets/StreamingAssets/Whisper/ggml-base.bin.meta
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: f6c028f06eda5904eae3f7a7418b8416
|
||||||
|
DefaultImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
Binary file not shown.
@@ -0,0 +1,92 @@
|
|||||||
|
using System.Diagnostics;
|
||||||
|
using TMPro;
|
||||||
|
using UnityEngine;
|
||||||
|
using Whisper;
|
||||||
|
using Whisper.Utils;
|
||||||
|
|
||||||
|
public class VoiceTranscriptionTestBox : MonoBehaviour
|
||||||
|
{
|
||||||
|
public Material activeMaterial;
|
||||||
|
public Material inactiveMaterial;
|
||||||
|
|
||||||
|
private MeshRenderer meshRenderer;
|
||||||
|
|
||||||
|
|
||||||
|
public WhisperManager whisper;
|
||||||
|
public MicrophoneRecord microphoneRecord;
|
||||||
|
public TextMeshProUGUI outputText;
|
||||||
|
|
||||||
|
private string _buffer;
|
||||||
|
|
||||||
|
private void Awake()
|
||||||
|
{
|
||||||
|
whisper.OnNewSegment += OnNewSegment;
|
||||||
|
whisper.OnProgress += OnProgressHandler;
|
||||||
|
|
||||||
|
microphoneRecord.OnRecordStop += OnRecordStop;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start is called before the first frame update
|
||||||
|
void Start()
|
||||||
|
{
|
||||||
|
meshRenderer = GetComponent<MeshRenderer>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update is called once per frame
|
||||||
|
void Update()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void OnTriggerEnter(Collider other)
|
||||||
|
{
|
||||||
|
KbmController controller = other.GetComponent<KbmController>();
|
||||||
|
if (controller != null)
|
||||||
|
{
|
||||||
|
meshRenderer.material = activeMaterial;
|
||||||
|
microphoneRecord.StartRecord();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnTriggerExit(Collider other)
|
||||||
|
{
|
||||||
|
KbmController controller = other.GetComponent<KbmController>();
|
||||||
|
if (controller != null)
|
||||||
|
{
|
||||||
|
meshRenderer.material = inactiveMaterial;
|
||||||
|
microphoneRecord.StopRecord();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private async void OnRecordStop(AudioChunk recordedAudio)
|
||||||
|
{
|
||||||
|
_buffer = "";
|
||||||
|
|
||||||
|
var sw = new Stopwatch();
|
||||||
|
sw.Start();
|
||||||
|
|
||||||
|
var res = await whisper.GetTextAsync(recordedAudio.Data, recordedAudio.Frequency, recordedAudio.Channels);
|
||||||
|
if (res == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
var time = sw.ElapsedMilliseconds;
|
||||||
|
var rate = recordedAudio.Length / (time * 0.001f);
|
||||||
|
UnityEngine.Debug.Log($"Time: {time} ms\nRate: {rate:F1}x");
|
||||||
|
|
||||||
|
var text = res.Result;
|
||||||
|
|
||||||
|
outputText.text = text;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnProgressHandler(int progress)
|
||||||
|
{
|
||||||
|
UnityEngine.Debug.Log($"Progress: {progress}%");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnNewSegment(WhisperSegment segment)
|
||||||
|
{
|
||||||
|
_buffer += segment.Text;
|
||||||
|
UnityEngine.Debug.Log(_buffer + "...");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: d28857190597d9a46a8ddf3cf902cc81
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
57
Assets/_PROJECT/Scripts/Audio/VoiceTranscription.cs
Normal file
57
Assets/_PROJECT/Scripts/Audio/VoiceTranscription.cs
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
using System.Diagnostics;
|
||||||
|
using UnityEngine;
|
||||||
|
using Whisper;
|
||||||
|
using Whisper.Utils;
|
||||||
|
|
||||||
|
public class VoiceTranscription : MonoBehaviour
|
||||||
|
{
|
||||||
|
public WhisperManager whisper;
|
||||||
|
public MicrophoneRecord microphoneRecord;
|
||||||
|
|
||||||
|
private string _buffer;
|
||||||
|
|
||||||
|
private void Awake()
|
||||||
|
{
|
||||||
|
whisper.OnNewSegment += OnNewSegment;
|
||||||
|
|
||||||
|
microphoneRecord.OnRecordStop += OnRecordStop;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start is called before the first frame update
|
||||||
|
void Start()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update is called once per frame
|
||||||
|
void Update()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private async void OnRecordStop(AudioChunk recordedAudio)
|
||||||
|
{
|
||||||
|
_buffer = "";
|
||||||
|
|
||||||
|
var sw = new Stopwatch();
|
||||||
|
sw.Start();
|
||||||
|
|
||||||
|
var res = await whisper.GetTextAsync(recordedAudio.Data, recordedAudio.Frequency, recordedAudio.Channels);
|
||||||
|
if (res == null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
var time = sw.ElapsedMilliseconds;
|
||||||
|
var rate = recordedAudio.Length / (time * 0.001f);
|
||||||
|
UnityEngine.Debug.Log($"Time: {time} ms\nRate: {rate:F1}x");
|
||||||
|
|
||||||
|
var text = res.Result;
|
||||||
|
|
||||||
|
UnityEngine.Debug.Log(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnNewSegment(WhisperSegment segment)
|
||||||
|
{
|
||||||
|
_buffer += segment.Text;
|
||||||
|
UnityEngine.Debug.Log(_buffer + "...");
|
||||||
|
}
|
||||||
|
}
|
||||||
11
Assets/_PROJECT/Scripts/Audio/VoiceTranscription.cs.meta
Normal file
11
Assets/_PROJECT/Scripts/Audio/VoiceTranscription.cs.meta
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
fileFormatVersion: 2
|
||||||
|
guid: af83274dbfe8bab4599dda694e2545c2
|
||||||
|
MonoImporter:
|
||||||
|
externalObjects: {}
|
||||||
|
serializedVersion: 2
|
||||||
|
defaultReferences: []
|
||||||
|
executionOrder: 0
|
||||||
|
icon: {instanceID: 0}
|
||||||
|
userData:
|
||||||
|
assetBundleName:
|
||||||
|
assetBundleVariant:
|
||||||
Reference in New Issue
Block a user