forked from cgvr/DeltaVR
use Whisper in streaming mode
This commit is contained in:
Binary file not shown.
@@ -64,18 +64,6 @@ public class InvokeAiClient : MonoBehaviour
|
||||
return JObject.Parse(json);
|
||||
}
|
||||
|
||||
private async Task<string> GetImageUrl(string imageName)
|
||||
{
|
||||
var requestUri = $"/api/v1/images/i/{Uri.EscapeDataString(imageName)}/urls";
|
||||
UnityEngine.Debug.Log("Get image URL: " + requestUri);
|
||||
using var resp = await httpClient.GetAsync(requestUri).ConfigureAwait(false);
|
||||
resp.EnsureSuccessStatusCode();
|
||||
|
||||
var json = await resp.Content.ReadAsStringAsync().ConfigureAwait(false);
|
||||
var root = JObject.Parse(json);
|
||||
return root.Value<string>("image_url");
|
||||
}
|
||||
|
||||
|
||||
private async Task<JObject> WaitForCompletion(string batchId, int timeoutSeconds = 300)
|
||||
{
|
||||
@@ -449,6 +437,17 @@ public class InvokeAiClient : MonoBehaviour
|
||||
return graph;
|
||||
}
|
||||
|
||||
private async Task<string> GetImageUrl(string imageName)
|
||||
{
|
||||
var requestUri = $"/api/v1/images/i/{Uri.EscapeDataString(imageName)}/urls";
|
||||
using var resp = await httpClient.GetAsync(requestUri).ConfigureAwait(false);
|
||||
resp.EnsureSuccessStatusCode();
|
||||
|
||||
var json = await resp.Content.ReadAsStringAsync().ConfigureAwait(false);
|
||||
var root = JObject.Parse(json);
|
||||
return root.Value<string>("image_url");
|
||||
}
|
||||
|
||||
|
||||
private async Task<string> GenerateImageUrl(JObject arguments)
|
||||
{
|
||||
@@ -510,9 +509,7 @@ public class InvokeAiClient : MonoBehaviour
|
||||
if (string.IsNullOrEmpty(imageName))
|
||||
continue;
|
||||
|
||||
// Resolve relative URL for the image (API-dependent)
|
||||
string imageRelativeUrl = await GetImageUrl(imageName);
|
||||
return imageRelativeUrl;
|
||||
return await GetImageUrl(imageName);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -530,8 +527,9 @@ public class InvokeAiClient : MonoBehaviour
|
||||
["model_key"] = MODEL_KEY,
|
||||
};
|
||||
|
||||
UnityEngine.Debug.Log("Starting image generation...");
|
||||
string imageUrl = await GenerateImageUrl(args);
|
||||
|
||||
UnityEngine.Debug.Log("Image URL ready: " + imageUrl);
|
||||
|
||||
var req = new HttpRequestMessage(HttpMethod.Get, imageUrl);
|
||||
using var resp = await httpClient.SendAsync(req, HttpCompletionOption.ResponseHeadersRead);
|
||||
|
||||
@@ -94,6 +94,7 @@ public class TrellisClient : MonoBehaviour
|
||||
{
|
||||
downloadResponse.EnsureSuccessStatusCode();
|
||||
var bytes = await downloadResponse.Content.ReadAsByteArrayAsync();
|
||||
Debug.Log($"Downloaded {bytes.Length} bytes");
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
using System.Diagnostics;
|
||||
using TMPro;
|
||||
using Unity.XR.CoreUtils;
|
||||
using UnityEngine;
|
||||
@@ -9,17 +8,14 @@ public class VoiceTranscriptionBox : MonoBehaviour
|
||||
{
|
||||
public Material activeMaterial;
|
||||
public Material inactiveMaterial;
|
||||
public Material loadingMaterial;
|
||||
|
||||
private MeshRenderer meshRenderer;
|
||||
private bool isLoading;
|
||||
|
||||
|
||||
public WhisperManager whisper;
|
||||
public MicrophoneRecord microphoneRecord;
|
||||
public TextMeshProUGUI outputText;
|
||||
|
||||
private string _buffer;
|
||||
private WhisperStream stream;
|
||||
|
||||
private string lastTextOutput;
|
||||
public string LastTextOutput
|
||||
@@ -30,19 +26,16 @@ public class VoiceTranscriptionBox : MonoBehaviour
|
||||
}
|
||||
}
|
||||
|
||||
private void Awake()
|
||||
{
|
||||
isLoading = false;
|
||||
|
||||
whisper.OnNewSegment += OnNewSegment;
|
||||
|
||||
microphoneRecord.OnRecordStop += OnRecordStop;
|
||||
}
|
||||
|
||||
// Start is called before the first frame update
|
||||
void Start()
|
||||
async void Start()
|
||||
{
|
||||
meshRenderer = GetComponent<MeshRenderer>();
|
||||
|
||||
// This causes about 1 sec long freeze, has to be done once at the start of the game
|
||||
microphoneRecord.StartRecord();
|
||||
|
||||
stream = await whisper.CreateStream(microphoneRecord);
|
||||
stream.OnResultUpdated += OnWhisperResult;
|
||||
}
|
||||
|
||||
// Update is called once per frame
|
||||
@@ -53,17 +46,12 @@ public class VoiceTranscriptionBox : MonoBehaviour
|
||||
|
||||
void OnTriggerEnter(Collider other)
|
||||
{
|
||||
if (isLoading)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
KbmController controller = other.GetComponent<KbmController>();
|
||||
XROrigin playerOrigin = other.GetComponent<XROrigin>();
|
||||
if (controller != null || playerOrigin != null)
|
||||
{
|
||||
meshRenderer.material = activeMaterial;
|
||||
microphoneRecord.StartRecord();
|
||||
stream.StartStream();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,40 +61,20 @@ public class VoiceTranscriptionBox : MonoBehaviour
|
||||
XROrigin playerOrigin = other.GetComponent<XROrigin>();
|
||||
if (controller != null | playerOrigin != null)
|
||||
{
|
||||
microphoneRecord.StopRecord();
|
||||
meshRenderer.material = loadingMaterial;
|
||||
isLoading = true;
|
||||
stream.StopStream();
|
||||
meshRenderer.material = inactiveMaterial;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private async void OnRecordStop(AudioChunk recordedAudio)
|
||||
private void OnWhisperResult(string result)
|
||||
{
|
||||
_buffer = "";
|
||||
|
||||
var sw = new Stopwatch();
|
||||
sw.Start();
|
||||
|
||||
var res = await whisper.GetTextAsync(recordedAudio.Data, recordedAudio.Frequency, recordedAudio.Channels);
|
||||
if (res == null)
|
||||
return;
|
||||
|
||||
var time = sw.ElapsedMilliseconds;
|
||||
var rate = recordedAudio.Length / (time * 0.001f);
|
||||
UnityEngine.Debug.Log($"Time: {time} ms\nRate: {rate:F1}x");
|
||||
|
||||
var text = res.Result;
|
||||
|
||||
lastTextOutput = text;
|
||||
outputText.text = text;
|
||||
|
||||
meshRenderer.material = inactiveMaterial;
|
||||
isLoading = false;
|
||||
lastTextOutput = result;
|
||||
outputText.text = result;
|
||||
}
|
||||
|
||||
private void OnNewSegment(WhisperSegment segment)
|
||||
private void OnDestroy()
|
||||
{
|
||||
_buffer += segment.Text;
|
||||
UnityEngine.Debug.Log(_buffer + "...");
|
||||
microphoneRecord.StopRecord();
|
||||
Destroy(gameObject);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user