🤖

Azure OpenAI x LangChain サンプル集

2023/04/19に公開

こんにちはたけです〜
AzureのGPT4のWaitlistが通ったので、LangChainと組み合わせたサンプルをいくつか書こうと思う。

何か質問があったらTwitter(@take_btc)までどうぞ

事前設定

requirements.txt
openai 
langchain
tiktoken

faiss-cpu

OpenAIと似てるが、いくつか追加で設定しないといけない項目がある。

import openai

openai.api_type = "azure"
openai.api_base = "https://<???>.openai.azure.com/"
openai.api_version = "2023-03-15-preview"
openai.api_key = "???"

チャット

基本的なチャット形式の対話を実現するサンプル。チャット履歴を渡すこともできる。
get_openai_callbackを使えば使ったトークンやコストを取得することができる。

※2023/04/19時点ではバグで全部0となってしまうようだ。

import openai
from langchain.callbacks import get_openai_callback

from langchain.chat_models import AzureChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage


with get_openai_callback() as cb:
    chat = AzureChatOpenAI(
        client=None,
        deployment_name="gpt-35-turbo",
        openai_api_base=openai.api_base,
        openai_api_version=openai.api_version or "",
        openai_api_key=openai.api_key or "",
        temperature=0,
        request_timeout=180,
    )
    content = chat(
        [
            SystemMessage(
                content="""You are great teacher. Please answer the following question."""
            ),
            HumanMessage(content="What is the meaning of life?"),
            AIMessage(content="42"),
            HumanMessage(content="Today is a good day."),
        ]
    )
    print("successful_requests: ", cb.successful_requests)
    print("total_tokens: ", cb.total_tokens)
    print("prompt_tokens: ", cb.prompt_tokens)
    print("completion_tokens: ", cb.completion_tokens)
    print("total_cost: ", cb.total_cost)
    print("content", content)

要約

map_reduce形式でlong_textの内容を要約する。
長文を細かく区切って、それぞれについて要約(map_prompt)した後に、それぞれの要約の要約(combine_prompt)を行う。

import openai

from langchain import PromptTemplate
from langchain.callbacks import get_openai_callback
from langchain.chat_models import AzureChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain


long_text = "some long document"

llm = AzureChatOpenAI(
    client=None,
    deployment_name="gpt-35-turbo",
    openai_api_base=openai.api_base,
    openai_api_version=openai.api_version or "",
    openai_api_key=openai.api_key or "",
    temperature=0,
    request_timeout=180,
)
text_splitter = CharacterTextSplitter(chunk_size=1000)
texts = text_splitter.split_text(long_text)
docs = [Document(page_content=t) for t in texts]

map_prompt = PromptTemplate(
    input_variables=["text"],
    template="""Please summarize the following sentences.

{text}""",
)
combine_prompt = PromptTemplate(
    input_variables=["text"],
    template="""Please summarize the following sentences.

{text}""",
)
chain = load_summarize_chain(
    llm,
    chain_type="map_reduce",
    map_prompt=map_prompt,
    combine_prompt=combine_prompt,
    verbose=True,
)
with get_openai_callback() as cb:
    content = chain.run(docs)
    print("successful_requests: ", cb.successful_requests)
    print("total_tokens: ", cb.total_tokens)
    print("prompt_tokens: ", cb.prompt_tokens)
    print("completion_tokens: ", cb.completion_tokens)
    print("total_cost: ", cb.total_cost)
    print("content", content)

Q&A

map_reduce形式で長いドキュメントに対する検索をするサンプル。履歴を使うこともできる。
結果には回答に使用したソースも含まれている。

import openai

from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.callbacks import get_openai_callback

from langchain.chat_models import AzureChatOpenAI

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader


llm = AzureChatOpenAI(
    client=None,
    deployment_name="gpt-35-turbo",
    openai_api_base=openai.api_base,
    openai_api_version=openai.api_version or "",
    openai_api_key=openai.api_key or "",
    temperature=0,
    request_timeout=180,
)
loader = TextLoader("source.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
chat = AzureChatOpenAI(
    client=None,
    deployment_name="gpt-35-turbo",
    openai_api_base=openai.api_base,
    openai_api_version=openai.api_version or "",
    openai_api_key=openai.api_key or "",
    temperature=0,
    request_timeout=180,
)
embeddings = OpenAIEmbeddings(
    client=None,
    document_model_name="text-embedding-ada-002",
    chunk_size=1,
    openai_api_key=openai.api_key,
)
db = FAISS.from_documents(documents=docs, embedding=embeddings)
doc_chain = load_qa_chain(
    chat,
    chain_type="map_reduce",
)
condense_question_chain = LLMChain(llm=chat, prompt=CONDENSE_QUESTION_PROMPT)
qa = ConversationalRetrievalChain(
    retriever=db.as_retriever(),
    combine_docs_chain=doc_chain,
    question_generator=condense_question_chain,
    return_source_documents=True,
    verbose=True,
)

with get_openai_callback() as cb:
    content = qa(
        {
            "question": "主人公の年齢は?",
            # 履歴はQとAのタプルのリスト
            "chat_history": [("主人公の性別は?", "男")],
            # 検索距離が0.9以上の結果を返すようにする
            "vectordbkwargs": {"search_distance": 0.9},
        }
    )
    print("successful_requests: ", cb.successful_requests)
    print("total_tokens: ", cb.total_tokens)
    print("prompt_tokens: ", cb.prompt_tokens)
    print("completion_tokens: ", cb.completion_tokens)
    print("total_cost: ", cb.total_cost)
    print("content", content)

Discussion