👏
flaskを使ってApple M1 で MediaRecorder で取得した音声をWhisper APIで利用する時に非常にハマった
openinterpreterに実装を頼んだけど、なかなか解決しなかった。音声入力のデバッグが難しかった。seleniumで工夫したらいけるかもしれない。
fileuploadした際に seek(0) を実行する
recorded_file.stream.seek(0)
ioを使えば、
いちいちlocalにファイルを保存しなくても良いはずだけれど、
それは後で検証する
from flask import Flask, jsonify, render_template
from flask import request
import base64
import requests
import os
import openai
app = Flask(__name__)
openai.api_key = os.getenv("OPENAI_API_KEY")
@app.route('/record_and_convert', methods=['POST'])
def record_and_convert():
recorded_file = request.files['audio']
recorded_file.stream.seek(0)
recorded_file.save('file.webm')
audio_file = open("file.webm", "rb")
print(audio_file)
speech = openai.Audio.transcribe("whisper-1", audio_file)
print(speech)
return jsonify({'speech': speech.text})
if __name__ == '__main__':
app.run(host="0.0.0.0", debug=False)
<button class="btn waves-effect waves-light" id="record-button">Start Recording</button>
<button class="btn waves-effect waves-light" disabled="" id="stop-button">Stop Recording and Upload</button>
<textarea id="speech-text"></textarea>
<button class="btn waves-effect waves-light" id="format-button">Format as medical record</button>
<div id="formatted-text"></div>
<script>
var mediaRecorder;
var audioChunks = [];
document.getElementById('record-button').addEventListener('click', function() {
navigator.mediaDevices.getUserMedia({ audio: true })
.then(function(stream) {
mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
mediaRecorder.addEventListener('dataavailable', function(event) {
audioChunks.push(event.data);
});
// timeslice to 1000
mediaRecorder.start(1000);
document.getElementById('stop-button').disabled = false;
});
});
document.getElementById('stop-button').addEventListener('click', function() {
mediaRecorder.stop();
let recordedBlob = new Blob(audioChunks);
console.log(recordedBlob);
var audioURL = window.URL.createObjectURL(recordedBlob);
console.log('Audio Blob:', audioURL);
let audioFile = new File([recordedBlob], 'recored.webm', { type: "audio/webm" });
var formData = new FormData();
formData.append('audio', audioFile);
console.log(audioFile);
audioChunks = [];
fetch('/record_and_convert', {
method: 'POST',
body: formData
})
.then(response => {
console.log('Response:', response);
return response.json();
})
.then(data => {
console.log('Data:', data);
document.getElementById('speech-text').value = data.speech;
})
.catch(error => {
console.error('Error:', error);
});
document.getElementById('stop-button').disabled = true;
});
</script>
Discussion