Open3
Cloudflare Vectorize で遊ぶ
TODO: ここにやってることを書く
まずは cloudflare/ai のモデルを使って Embedding Vector を作る
基本的にはここを参照しつつ、 Deno でその REST API を叩く
import type { } from "npm:@cloudflare/workers-types@4.20240524.0";
const CF_API_TOKEN = Deno.env.get('CLOUDFLARE_AI_API_TOKEN')!;
const CF_ACCOUNT_ID = Deno.env.get('CLOUDFLARE_ACCOUNT_ID')!;
interface EmbeddingResponse {
result: {
shape: number[];
data: number[][];
},
success: boolean;
errors: any[];
messages: any[];
}
async function runCfAi(model: string, args: any) {
const endpoint = `https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/ai/run/${model}`;
return fetch(
endpoint,
{
headers: {
'Authorization': `Bearer ${CF_API_TOKEN}`,
'Content-Type': 'application/json',
},
method: "POST",
body: JSON.stringify(args),
}
).then((res) => res.json());
}
async function getEmbeddingVectors(args: { text: string[] }): Promise<EmbeddingResponse> {
return await runCfAi('@cf/baai/bge-base-en-v1.5', args) as any;
}
const texts = [
'This is a story about an orange cloud',
'This is a story about a llama',
'This is a story about a hugging emoji'
];
const res = await getEmbeddingVectors({
text: texts,
});
console.log(res);
cloudflare vectorize を使って、vector の保存とクエリ実行を実装する。
import type { } from "npm:@cloudflare/workers-types@4.20240524.0";
const CF_API_TOKEN = Deno.env.get('CLOUDFLARE_AI_API_TOKEN')!;
const CF_ACCOUNT_ID = Deno.env.get('CLOUDFLARE_ACCOUNT_ID')!;
const INDEX_NAME = 'embeddings-index';
interface EmbeddingResponse {
result: {
shape: number[];
data: number[][];
},
success: boolean;
errors: any[];
messages: any[];
}
async function runCfAi(model: string, args: any) {
const endpoint = `https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/ai/run/${model}`;
return fetch(
endpoint,
{
headers: {
'Authorization': `Bearer ${CF_API_TOKEN}`,
'Content-Type': 'application/json',
},
method: "POST",
body: JSON.stringify(args),
}
).then((res) => res.json());
}
async function getEmbeddingVectors(args: { text: string[] }): Promise<EmbeddingResponse> {
return await runCfAi('@cf/baai/bge-base-en-v1.5', args) as any;
}
async function runCfVectorize(method: string, args: any, { ndjson = true }: {
ndjson?: boolean,
} = {}) {
const endpoint = `https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/vectorize/indexes/${INDEX_NAME}/${method}`;
return fetch(
endpoint,
{
headers: {
'Authorization': `Bearer ${CF_API_TOKEN}`,
'Content-Type': ndjson ? 'application/x-ndjson' : 'application/json',
},
method: "POST",
body: ndjson ? args.map((arg: any) => JSON.stringify(arg)).join('\n') : JSON.stringify(args),
}
).then((res) => res.json());
}
async function upsertVectors(vectors: VectorizeVector[]) {
return runCfVectorize('upsert', vectors, { ndjson: true });
}
async function insertVectors(vectors: VectorizeVector[]) {
return runCfVectorize('insert', vectors, { ndjson: true });
}
async function queryVectors(
vectors: number[],
options: VectorizeQueryOptions
): Promise<VectorizeMatches> {
return runCfVectorize('query', {
...options,
vector: vectors,
}, { ndjson: false }) as any;
}
const texts = [
'This is a story about an orange cloud',
'This is a story about a llama',
'This is a story about a hugging emoji'
];
const res = await getEmbeddingVectors({
text: texts,
});
const vectors: VectorizeVector[] = [];
res.result.data.forEach((vector, id) => {
vectors.push({
id: id.toString(),
values: vector,
metadata: { title: texts[id] }
});
});
const userQuery = 'orange cloud';
const queryVector: EmbeddingResponse = await getEmbeddingVectors({
text: [userQuery],
});
const query = queryVector.result.data[0];
const matches = await queryVectors(query, { topK: 3, returnValues: false, returnMetadata: false });
console.log(matches);
注意する点として、 insert/upsert が受け付ける Content-Type
は application/json
ではなく、 x-application/ndjson
型なので、ndjson, つまり行分割してJSON列としてフォーマットする必要がある。
すべてのエンドポイントがそうではないので、気をつける。