Closed4
Whisper APIをUnityで動かす
概要
- Whisper API(OpenAIの音声認識API)を触ってみよう!
参考
-
https://qiita.com/mfuji3326/items/2553e6d44616ca315844
- と、同じ方の他のQiita記事
- https://synamon.hatenablog.com/entry/openai_api_unity
公式リファレンス
試行錯誤メモ
- あまりUnityでのWebRequestの本質を理解していないのでつらい
headerの"Content-Type"について
- headerは、コメントアウトされている方だと400エラーになった
// var headers = new Dictionary<string, string>
// {
// { "Authorization", "Bearer " + _apiKey },
// { "Content-Type", "multipart/form-data" }
// };
var headers = new Dictionary<string, string>
{
{ "Authorization", "Bearer " + _apiKey }
};
- 途中で以下のような箇所があるからか?
formData.Add(new MultipartFormFileSection("file", fileBytes, Path.GetFileName(FilePath), "multipart/form-data"));
- エラー内容
UnityWebRequestException: HTTP/1.1 400 Bad Request
{
"error": {
"message": "Could not parse multipart form",
"type": "invalid_request_error",
"param": null,
"code": null
}
}
WebRequestの中身の作成について
- WWWFormを使う場合、MultipartFormSectionを使う場合両方可能
byte[] fileBytes = await File.ReadAllBytesAsync(FilePath, cancellationToken: token);
// 下の★とどちらでも正しく処理される
// WWWForm form = new();
// form.AddField("model", ModelName);
// form.AddBinaryData("file", fileBytes, Path.GetFileName(FilePath), "multipart/form-data");
// 上の★とどちらでも正しく処理される
List<IMultipartFormSection> form = new();
form.Add(new MultipartFormDataSection("model", ModelName));
form.Add(new MultipartFormFileSection("file", fileBytes, Path.GetFileName(FilePath), "multipart/form-data"));
実装はこちら
- WhisperAPIConnection
using System;
using Cysharp.Threading.Tasks;
using Models.WhisperAPI;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using UnityEngine;
using UnityEngine.Networking;
public class WhisperAPIConnection
{
private readonly string _apiKey;
private const string ApiUrl = "https://api.openai.com/v1/audio/transcriptions";
private const string FilePath = "Assets/Audio/WhatToBuyForDinner.m4a";
private const string ModelName = "whisper-1";
public WhisperAPIConnection(string apiKey)
{
_apiKey = apiKey;
}
public async UniTask<WhisperAPIResponseModel> RequestAsync(CancellationToken token)
{
var headers = new Dictionary<string, string>
{
{ "Authorization", "Bearer " + _apiKey }
};
byte[] fileBytes = await File.ReadAllBytesAsync(FilePath, cancellationToken: token);
// 下の★とどちらでも正しく処理される
// WWWForm form = new();
// form.AddField("model", ModelName);
// form.AddBinaryData("file", fileBytes, Path.GetFileName(FilePath), "multipart/form-data");
// 上の★とどちらでも正しく処理される
List<IMultipartFormSection> form = new();
form.Add(new MultipartFormDataSection("model", ModelName));
form.Add(new MultipartFormFileSection("file", fileBytes, Path.GetFileName(FilePath), "multipart/form-data"));
using UnityWebRequest request = UnityWebRequest.Post(ApiUrl, form);
foreach (var header in headers)
{
request.SetRequestHeader(header.Key, header.Value);
}
await request.SendWebRequest().ToUniTask(cancellationToken: token);
if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError(request.error);
throw new Exception();
}
else
{
var responseString = request.downloadHandler.text;
var responseObject = JsonUtility.FromJson<WhisperAPIResponseModel>(responseString);
Debug.Log("WhisperAPI: " + responseObject.text);
return responseObject;
}
}
}
- WhisperAPIResponseModel
using System;
namespace Models.WhisperAPI
{
[Serializable]
public class WhisperAPIResponseModel
{
public string text;
}
}
- APIを呼び出す、UnityのSceneにくっつくコンポーネント。なお、Unity上のUIテキストに結果を反映している
using UnityEngine;
using System.Threading;
using Models.WhisperAPI;
using UnityEngine.UI;
public class WhisperRequestCaller : MonoBehaviour
{
[SerializeField] private Text displayText;
private CancellationTokenSource _cts = new();
private CancellationToken _token;
private WhisperAPIConnection _whisperConnection;
async void Start()
{
_token = _cts.Token;
_whisperConnection = new(Constants.API_KEY);
WhisperAPIResponseModel responseModel = await _whisperConnection.RequestAsync(_token);
displayText.text = responseModel.text;
}
}
このスクラップは2023/03/12にクローズされました