配列の要素に codeblock を発見したら改行でまとめる関数を考える
どういう環境?
- discord.js で ChatGPT を使ったボットを作ってる
-
stream: true
response_format: { type: "json_object" }
を使用している { "message": ["返答が入る"] }
message
に返答が入るが、system prompt である程度縛ってもこうなってしまうのでそれを直す。
こういう response になってしまってるから codeblock が壊れるので、 codeblock が始まったら終わるまでは改行コードで結合して buffer すればいいはず。
{
"message": [
"```css",
"div {", " color: blue;", "}",
"```",
"Some text",
"```html",
"<p>Hello</p>",
"```"
]
}
{
"message": [
"```css\ndiv {\n color: blue;\n}\n```",
"Some text",
"```html\n<p>Hello</p>\n```"
]
}
ChatGPT と会話しながらとりあえず作ったものを好きな形に成形した。
generator 使ったり、for of
で状態持ったりしてみた
これになった
import { isSingleLine, isStart, isEnd } from "./internal";
export function formatCodeBlocks(input: string[], lines: string[] = []): string[] {
const [head, ...tail] = input;
const collecting = lines.length > 0;
if (head === undefined) {
return lines.length > 0 ? [lines.join("\n")] : [];
}
if (isSingleLine(head)) return [head, ...formatCodeBlocks(tail)];
const next = [...lines, head];
if (isStart(head) && !collecting) return formatCodeBlocks(tail, next);
if (isEnd(head) && collecting) return [next.join("\n"), ...formatCodeBlocks(tail, [])];
// コードブロック内のテキスト行の処理
if (collecting) return formatCodeBlocks(tail, next);
// 通常のテキスト行の処理
return [head, ...formatCodeBlocks(tail)];
}
export function isSingleLine(line: string): boolean {
return line.startsWith("```") && line.endsWith("```") && line !== "```";
}
export function isStart(line: string): boolean {
return line.startsWith("```"); // && !line.endsWith("```");
}
export function isEnd(line: string): boolean {
return line === "```";
}
テストコード
import { formatCodeBlocks } from ".";
describe("formatCodeBlocks", () => {
it("handles basic code block", () => {
const input = ["css", "body {", " background-color: red;", "}", "
"];
const expected = ["css\nbody {\n background-color: red;\n}\n
"];
expect(formatCodeBlocks(input)).toEqual(expected);
});
it("handles single line code block with start and end tags", () => {
const input = ["html <div></div>
"];
const expected = ["html <div></div>
"];
expect(formatCodeBlocks(input)).toEqual(expected);
});
it("handles multiple code blocks", () => {
const input = ["css", "div {", " color: blue;", "}", "
", "Some text", "html", "<p>Hello</p>", "
"];
const expected = ["css\ndiv {\n color: blue;\n}\n
", "Some text", "html\n<p>Hello</p>\n
"];
expect(formatCodeBlocks(input)).toEqual(expected);
});
it("handles plain text only", () => {
const input = ["Just some text.", "More text here."];
const expected = ["Just some text.", "More text here."];
expect(formatCodeBlocks(input)).toEqual(expected);
});
it("handles empty input", () => {
const input: string[] = [];
const expected: string[] = [];
expect(formatCodeBlocks(input)).toEqual(expected);
});
it("handles code block without language specification", () => {
const input = ["", "Here is some generic code", "
"];
const expected = ["\nHere is some generic code\n
"];
expect(formatCodeBlocks(input)).toEqual(expected);
});
it("handles code block without language but with single line content", () => {
const input = ["", "Single line content", "
"];
const expected = ["\nSingle line content\n
"];
expect(formatCodeBlocks(input)).toEqual(expected);
});
});
rxjs の operator ではこれにした
import { filter, map, scan, share } from "rxjs/operators";
import { isEnd, isSingleLine, isStart } from "./internal";
import type { OperatorFunction } from "rxjs";
type State = {
collecting: boolean;
lines: string[];
};
const defaultState: State = {
collecting: false,
lines: [],
};
export const formatCodeBlocksOperator = (): OperatorFunction<string, string> => (source) => {
const shared = source.pipe(share());
return shared.pipe(
scan((state, line) => {
if (isSingleLine(line)) {
return {
collecting: false,
lines: [...state.lines, line],
};
}
if (isStart(line) && !state.collecting) {
return {
collecting: true,
lines: [line],
};
}
if (isEnd(line) && state.collecting) {
return {
collecting: false,
lines: [...state.lines, line],
};
}
if (state.collecting) {
return {
...state,
lines: [...state.lines, line],
};
}
return {
...state,
lines: [line],
};
}, defaultState),
filter((state) => !state.collecting),
map((state) => state.lines.join("\n")),
);
};
テストコード
import { from, firstValueFrom } from "rxjs";
import { toArray } from "rxjs/operators";
import { formatCodeBlocksOperator } from "./operator";
describe("formatCodeBlocksOperator", () => {
it("handles single line code block with start and end tags", async () => {
const input = ["html <div></div>
"];
const expected = ["html <div></div>
"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
it("handles basic code block", async () => {
const input = ["css", "body {", " background-color: red;", "}", "
"];
const expected = ["css\nbody {\n background-color: red;\n}\n
"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
it("handles multiple code blocks", async () => {
const input = ["css", "div {", " color: blue;", "}", "
", "Some text", "html", "<p>Hello</p>", "
"];
const expected = ["css\ndiv {\n color: blue;\n}\n
", "Some text", "html\n<p>Hello</p>\n
"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
it("handles plain text only", async () => {
const input = ["Just some text.", "More text here."];
const expected = ["Just some text.", "More text here."];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
it("handles empty input", async () => {
const input: string[] = [];
const expected: string[] = [];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
it("handles code block without language specification", async () => {
const input = ["", "Here is some generic code", "
"];
const expected = ["\nHere is some generic code\n
"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
});
今のところ nested な codeblock には対応してない(discord が対応してないから)ので depth を考慮できたらもっといいと思う。
あと、もっとこうしたらいいと思うってのがあったら教えてください。