Open2023/03/30にコメント追加4

ChatGPTに音声入力してVOICEVOXで返答してもらう【python】

みやぎ 2023/03/30に更新

ChatGPTに音声入力してVOICEVOXで返答してもらいたい
ってことで出来たコード

エラー出たらchatgptに聞いて直すを繰り返して出来た超初心者のものなのでそこだけご注意を
alt押して話しかけると冥鳴ひまりさんが答えてくれます。終了って言うと終了します。

openai.api_key = ""
には自分のキーを入れてください
またVOICEVOXエンジンをバックエンドで立ち上げてください
あとなんか色々コマンドでインストールしたけど覚えてないのでエラーが出たらChatGPTに聞いてください

import io
import json
import keyboard
import openai
import pyaudio
import queue
import requests
import speech_recognition as sr
import sys
import threading
import time
import wave

from langchain import ConversationChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.schema import (
    HumanMessage,
    SystemMessage
)

from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

openai.api_key = ""

class PyAudioContextManager(pyaudio.PyAudio):
    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.terminate()


class Voicevox:
    def __init__(self, host="127.0.0.1", port=50021):
        self.host = host
        self.port = port

    def speak(self, text=None, speaker=14):  # VOICEVOX:冥鳴ひまり
        params = (
            ("text", text),
            ("speaker", speaker)  # 音声の種類をInt型で指定
        )

        init_q = requests.post(
            f"http://{self.host}:{self.port}/audio_query",
            params=params
        )

        res = requests.post(
            f"http://{self.host}:{self.port}/synthesis",
            headers={"Content-Type": "application/json"},
            params=params,
            data=json.dumps(init_q.json())
        )

        # メモリ上で展開
        audio = io.BytesIO(res.content)

        with wave.open(audio, 'rb') as f, PyAudioContextManager() as p:
            # 以下再生用処理
            p = pyaudio.PyAudio()

            def _callback(in_data, frame_count, time_info, status):
                data = f.readframes(frame_count)
                return (data, pyaudio.paContinue)

            stream = p.open(format=p.get_format_from_width(width=f.getsampwidth()),
                            channels=f.getnchannels(),
                            rate=f.getframerate(),
                            output=True,
                            stream_callback=_callback)

            # Voice再生
            stream.start_stream()
            while stream.is_active():
                time.sleep(0.1)

            stream.stop_stream()
            stream.close()
            p.terminate()

def voicevox_speak(ai_response):
    vv = Voicevox()
    vv.speak(ai_response.content)

system_settings = "あなたは冥鳴ひまりです。上から目線の口調で回答してください。第一人称は、私です。"
messages = [SystemMessage(content=system_settings)]

def process_input(input_queue, exit_event):
    while not exit_event.is_set():
        try:
            input_type, input_data = input_queue.get(timeout=1)
        except queue.Empty:
            continue

        if input_data == "終了":
            print("プログラムを終了します。")
            exit_event.set()
            keyboard.press('alt')
            keyboard.release('alt')
            break
        messages.append(HumanMessage(content=input_data))
        chat = ChatOpenAI(temperature=0)
        ai_response = chat(messages)
        print(f"冥鳴ひまり: {ai_response.content}")
        voicevox_speak(ai_response)

        messages.append(ai_response)

def voice_input(input_queue, exit_event):
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    def on_alt_key():
        nonlocal exit_event
        if exit_event.is_set():
            return

        print("あなた（音声入力）: しばらくお待ちください...")
        with microphone as source:
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source)

        try:
            text = recognizer.recognize_google(audio, language="ja-JP")
            input_queue.put(('voice', text))
            print(f"音声入力: {text}")
        except sr.UnknownValueError:
            print("音声を認識できませんでした。もう一度お試しください。")
        except sr.RequestError as e:
            print(f"音声認識サービスに問題が発生しました。エラー: {e}")

    keyboard.add_hotkey("alt", on_alt_key)

    while not exit_event.is_set():
        print("altを押して音声入力を開始してください...")
        keyboard.wait("alt")
    
def main():
    input_queue = queue.Queue()
    exit_event = threading.Event()

    # 音声入力スレッド
    voice_thread = threading.Thread(target=voice_input, args=(input_queue, exit_event))
    voice_thread.daemon = True
    voice_thread.start()

    # 入力処理スレッド
    process_thread = threading.Thread(target=process_input, args=(input_queue, exit_event))
    process_thread.daemon = True
    process_thread.start()

    process_thread.join()
    voice_thread.join()

if __name__ == "__main__":
    main()

みやぎ 2023/03/30に更新

以下参考にしたもの

chatgpt→VOICEVOX

https://youtu.be/pIkSHVZdcfY

みやぎ 2023/03/30に更新

chatgptに記憶を持たせる
（Messageに会話記録を入れてるだけだから上手くいってるか分からん）
langchainは上手く使えてない気がする

https://note.com/npaka/n/n403fc29a02c7

みやぎ 2023/03/30に更新

マイク入力→ChatGPT

ここはChatGPTに作ってもらったような気もする