📘

LlamaとGitHubで実現するリポジトリQ&A機能

2024/09/08に公開

Amazon CodeCatalyst や Cursor のようなリポジトリのコードに対して Q&A する仕組みを作ってみた

前提

最低限、以下は必要(pipreqs より取得)

pip install openai
pip install streamlit
pip install faiss-cpu
pip install llama-index
pip install llama-index-readers-github

コード例

・LLM: OpenAI
・Vector Store: FAISS
・ChatBot: Streamlit
・Repository: GitHub

import streamlit as st
import openai
import faiss
from llama_index.core import load_index_from_storage, VectorStoreIndex, StorageContext, Settings
from llama_index.llms.openai import OpenAI
from llama_index.readers.github import GithubRepositoryReader, GithubClient
from llama_index.vector_stores.faiss import FaissVectorStore

# OpenAI設定
openai.api_key = "" # APIキー
Settings.llm = OpenAI(
    model="gpt-4o-mini",
    temperature=0.1,
    max_tokens=16000,
)
# Vector Store設定
d = 1536
faiss_index = faiss.IndexFlatL2(d)
# GitHubクライアントの設定
github_token = "" # トークン
github_client = GithubClient(github_token=github_token, verbose=True)
# タイトル
st.title("GitHub Q&A with Llama")
# GitHubリポジトリ情報入力用のフォーム
with st.form(key="repo_form"):
    owner = st.text_input("Owner", value="Flupinochan")
    repo = st.text_input("Repository", value="zenn-content")
    branch = st.text_input("Branch", value="master")
    filter_directories_input = st.text_input("EXCLUDE Filter Directories (comma-separated)", value="books,images")
    filter_file_extensions_input = st.text_input("INCLUDE Filter File Extensions (comma-separated)", value=".md,.ts,.tsx,.py")
    submit_button = st.form_submit_button("Submit")
# セッション初期化
if "query_engine" not in st.session_state:
    st.session_state.query_engine = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
# Submitボタン実行時の処理
if submit_button:
    with st.spinner("Initializing index..."):
        try:
            filter_directories = [dir.strip() for dir in filter_directories_input.split(",") if dir.strip()]
            filter_file_extensions = [ext.strip() for ext in filter_file_extensions_input.split(",") if ext.strip()]
            reader = GithubRepositoryReader(
                github_client=github_client,
                owner=owner,
                repo=repo,
                use_parser=True,
                verbose=False,
                filter_directories=(filter_directories, GithubRepositoryReader.FilterType.EXCLUDE),
                filter_file_extensions=(filter_file_extensions, GithubRepositoryReader.FilterType.INCLUDE),
                concurrent_requests=5,
                retries=5,
                timeout=300,
            )
            documents = reader.load_data(branch=branch) # GitHubリポジトリを読み込む
            vector_store = FaissVectorStore(faiss_index=faiss_index)
            storage_context = StorageContext.from_defaults(vector_store=vector_store)
            index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) # Vectorに保存/Index作成
            index.storage_context.persist()
            vector_store = FaissVectorStore.from_persist_dir("./storage")
            storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir="./storage") # カレントディレクトリstorageに永久保存
            st.session_state.query_engine = load_index_from_storage(storage_context=storage_context).as_query_engine(streaming=True, similarity_top_k=1)
            st.success("Index initialized successfully.")
        except Exception as e:
            st.error(f"An error occurred: {e}")

# チャットメッセージの入力欄と送信ボタン
user_message = st.text_input("Enter your message:")
if st.button("Send"):
    if user_message:
        if st.session_state.query_engine:
            with st.spinner("Processing your message..."):
                try:
                    instruction = "以下の内容について、長文で良いので詳細かつ丁寧に最低でも2000字以上でMarkdown形式で回答してください。"
                    final_prompt = f"{instruction}\n{user_message}"
                    streaming_response = st.session_state.query_engine.query(final_prompt) # 質問実行
                    placeholder = st.empty()
                    chat_history = st.session_state.get("chat_history", [])
                    # ストリーミング出力の処理
                    full_text = ""
                    for text in streaming_response.response_gen:
                        full_text += text
                        placeholder.write(full_text)
                    chat_history.append(full_text)
                    st.session_state.chat_history = chat_history
                except Exception as e:
                    st.error(f"An error occurred while querying: {e}")
        else:
            st.write("Please submit the form to initialize the index.")
    else:
        st.write("Please enter a message.")

実行例

指定した GitHub リポジトリを読み込む

質問する





P.S. GitHub REST API で一覧取得する方法と大差なかった…

GitHubで編集を提案

Discussion