🔖
RAG機能付きチャットボットを作ろう-7_RAGの回答作成

2024/12/09に公開
 TL;DR前回の記事では、promptに質問文を入れて、検索結果を返す関数を作成しました。本稿では
検索結果を読み込ませて、回答する

を行います。

 実装
 主な変更点主な変更点は以下です。
RAG用の回答生成の関数の作成

 コードimport streamlit as st
from openai import OpenAI
from dotenv import load_dotenv
import os
import chromadb
from langchain_chroma import Chroma
import openai
from pydantic import BaseModel
from typing import List
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import glob

# .envファイルから環境変数を読み込む
load_dotenv(".env")


# OpenAIのAPIクライアントを初期化

client = OpenAI(
    api_key=os.environ['OPENAI_API_KEY']
    )


# text splitterの定義
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=100,
    length_function=len,
    separators=["\n", " ", ".", ",", ";", ":", "(", ")", "[", "]", "{", "}", "<", ">", '"', "'", "、", "。", "，", "；", "：", "（", "）", "【", "】", "「", "」", "『", "』", "〈", "〉", "《", "》", "“", "”"],
    is_separator_regex=False,
)


# ベクトルDBのためのEmbeddingsの定義
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# Chromaの初期化
vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)


def pdf_to_vector():
    # 特定フォルダのpdfのリスト化
    path = "./pdf_docs"
    # pdf_list_stored.txt を読み込んで、pdf_list に格納
    pdf_list = []
    if os.path.exists("pdf_list_stored.txt"):
        with open("pdf_list_stored.txt", "r") as f:
            for line in f:
                pdf_list.append(line.strip())
    else:
        # pdf_list_stored.txt が存在しない場合は、ファイルを作成
        with open("pdf_list_stored.txt", "w") as f:
            pass


    # globを使ってファイル名のリストを取得
    # サブフォルダも含める場合は、"**/*.pdf"とする

    pdf_list_current = glob.glob(path + "/**.pdf")
    pdf_list_new = list(set(pdf_list_current) - set(pdf_list))


    docs = []
    for pdf in pdf_list_new:
        loader = PyMuPDFLoader(pdf)
        text = loader.load_and_split(text_splitter)
        docs.append(text)
        # Vector storeに保存
        vector_store.add_documents(text)
        pdf_list.append(pdf)

    # pdf_list を pdf_list_stored.txt に保存
    with open("pdf_list_stored.txt", "w") as f:
        for pdf in pdf_list:
            f.write(pdf + "\n")

## ベクトル検索のretireverの定義
retriever = vector_store.as_retriever(
    search_type="mmr", search_kwargs={"k": 1, "fetch_k": 10}
)
       

# プロンプトを入力すると、チャットボットが返答を返す関数を定義
# 入力はOpenAIのAPIクライアントとプロンプト
def default_chat(client, prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini", # 好きなモデルを選択
        messages=[
            {"role": "system", "content": "You are AI assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content


# プロンプトとRetrieverの結果を使って、回答を返す関数を定義
def chat_with_retriever(client, retriever, prompt):
    retriever_response = retriever.invoke(prompt)
    RAG_response = client.chat.completions.create(
        model="gpt-4o-mini", # 好きなモデルを選択
        messages=[
            {"role": "system", "content": """
             You are AI assistant.
             User will give you documents and question.
             You need to find the answer in the documents.
             You should NOT answer besides the documents.
             If you can not find the answer, you can ask the user for more information.
             """},
            {"role": "user", "content": f"""
             You are given document as below.
             {retriever_response}
             """},
            {"role": "assistant", "content": "I will find the answer in the document."},
            {"role": "user", "content": prompt}
        ]
    )
    return RAG_response.choices[0].message.content

# streamlitのsession_stateにチャット履歴を保存する
# もしチャット履歴がなければ、空のリストを作成
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []

# チャット履歴を表示する関数
def display_chat_history():
    for chat in st.session_state.chat_history:
        if chat["role"] == "user":
            st.markdown(
                # 背景をグレーにして、角を丸くする
                f'<div style="background-color: #f0f0f0; border-radius: 10px; padding: 10px;">'
                f"ユーザー: {chat['content']}"
                '</div>', unsafe_allow_html=True)

        else:
            st.markdown(
                # 背景を青にして、角を丸くする
                f'<div style="background-color: #cfe2ff; border-radius: 10px; padding: 10px;">'
                f"チャットボット: {chat['content']}"
                '</div>', unsafe_allow_html=True)

st.title('RAG機能付きチャットボットを作ろう')
st.write('streamlitを使ったUIの作成')

# チャット履歴を表示
display_chat_history()

prompt = st.text_area('プロンプト入力欄', )

button1, button2, button3 = st.columns(3)
if button1.button('チャット'):
    chat_response = default_chat(client, prompt)
    # チャット履歴に追加
    # ユーザーの入力を追加、roleはuser
    st.session_state.chat_history.append({"role": "user", "content": prompt})
    # チャットボットの返答を追加、roleはsystem
    st.session_state.chat_history.append({"role": "system", "content": chat_response})
    st.rerun()
if button2.button('RAG'):
    # 
    RAG_response = chat_with_retriever(client, retriever, prompt)
    st.session_state.chat_history.append({"role": "user", "content": prompt})
    st.session_state.chat_history.append({"role": "system", 
                                          "content": RAG_response})
    st.rerun()

if button3.button('PDFをベクトル化'):
    pdf_to_vector()
    st.rerun()

 RAG用の回答生成の関数の作成RAG用の回答生成の関数を作成します。

まずsystemプロンプトに、提供するDocumentに基づいて回答するように指示を出します。

次にuserからDocument(Retriverの結果)を受け取り、そのDocumentを表示します。

その後、assistantがDocumentを受け取った(という履歴を擬似的に作成しています)。

最後に、userからの質問を受け取り、回答を返します。
# プロンプトとRetrieverの結果を使って、回答を返す関数を定義
def chat_with_retriever(client, retriever, prompt):
    retriever_response = retriever.invoke(prompt)
    RAG_response = client.chat.completions.create(
        model="gpt-4o-mini", # 好きなモデルを選択
        messages=[
            {"role": "system", "content": """
             You are AI assistant.
             User will give you documents and question.
             You need to find the answer in the documents.
             You should NOT answer besides the documents.
             If you can not find the answer, you can ask the user for more information.
             """},
            {"role": "user", "content": f"""
             You are given document as below.
             {retriever_response}
             """},
            {"role": "assistant", "content": "I will find the answer in the document."},
            {"role": "user", "content": prompt}
        ]
    )
    return RAG_response.choices[0].message.content

この関数を以下のボタンを押すと実行されるようにしています。
では、
streamlit run main.py
を実行して、動作を確認してみましょう。

次のような画面がでれば成功です。

 まとめ以上で、RAG機能付きチャットボットを作成が完了しました。

プロンプトの調整や、retrieverの設定を変えれば、様々な用途に使えるかと思います。

次回は、回答のまとめを楽にするために構造化出力(Structured Output)を追加していきます。

 リンクRAG機能付きチャットボットを作ろう-1:

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_01
RAG機能付きチャットボットを作ろう-2:

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_02
RAG機能付きチャットボットを作ろう-3:

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_03
RAG機能付きチャットボットを作ろう-4:

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_04
RAG機能付きチャットボットを作ろう-5:

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_05
RAG機能付きチャットボットを作ろう-6:

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_06
RAG機能付きチャットボットを作ろう-7: (完成版)

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_07
RAG機能付きチャットボットを作ろう-8: (Structured Outputを追加)

https://zenn.dev/bluetang/articles/chatbot_with_lc_st_chromadb_08
Discussion

ログインするとコメントできます