Open5
momento vector index とlangchain, llama_indexを連携してドキュメントを格納する

from llama_index import download_loader
ZendeskReader = download_loader("ZendeskReader")
で
loader.load_langchain_documents()
して取得したDocumentをmomento vector indexに入れる

Document.metadata["id"] の値をintからstrに変換しないとSDK Failedエラー。
momento.errors.exceptions.UnknownException: UnknownException(message='SDK Failed to process the request.', error_code=<MomentoErrorCode.UNKNOWN_ERROR: 15>, transport_details=None, message_wrapper='Unknown error has occurred')
こうしたらいけた
docs_payload = []
for d in documents:
d.metadata["id"] = str(d.metadata["id"])
docs_payload.append(d)
vector_db = MomentoVectorIndex.from_documents(
docs_payload,
OpenAIEmbeddings(),
index_name="test",
)

langchain経由でupsertするとなんか重複して入っている気がする?

こんな感じ
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import MomentoVectorIndex
from llama_index import download_loader
def load_index():
ZendeskReader = download_loader("ZendeskReader")
loader = ZendeskReader(zendesk_subdomain="your_zendesk", locale="ja")
# load data from Zendesk
docs = loader.load_langchain_documents()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0,
)
documents = text_splitter.split_documents(docs)
docs_payload = []
for d in documents:
d.metadata["id"] = str(
d.metadata["id"]
) # need to convert to string for momento
docs_payload.append(d)
vector_db = MomentoVectorIndex.from_documents(
docs_payload,
OpenAIEmbeddings(),
index_name="zendesk_smartmat",
)
if __name__ == "__main__":
load_index()

momento python sdk v1.11.0の更新で以下でindexを作れるようになった。
def load_index():
ZendeskReader = download_loader("ZendeskReader")
loader = ZendeskReader(zendesk_subdomain=ZENDESK_SUBDOMAIN, locale=LOCALE)
# load data from Zendesk
docs = loader.load_langchain_documents()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(docs)
MomentoVectorIndex.from_documents(
documents,
OpenAIEmbeddings(),
index_name=INDEX_NAME,
)
if __name__ == "__main__":
load_index()