Closed6

配列の要素に codeblock を発見したら改行でまとめる関数を考える

naporitannaporitan

どういう環境?

  • discord.js で ChatGPT を使ったボットを作ってる
  • stream: true response_format: { type: "json_object" } を使用している
  • { "message": ["返答が入る"] }

message に返答が入るが、system prompt である程度縛ってもこうなってしまうのでそれを直す。

codeblock が効いてない dicord のメッセージ

naporitannaporitan

こういう response になってしまってるから codeblock が壊れるので、 codeblock が始まったら終わるまでは改行コードで結合して buffer すればいいはず。

{
  "message": [
    "```css",
    "div {", "  color: blue;", "}",
    "```",
    "Some text",
    "```html",
    "<p>Hello</p>",
    "```"
  ]
}
{
  "message": [
    "```css\ndiv {\n  color: blue;\n}\n```",
    "Some text",
    "```html\n<p>Hello</p>\n```"
  ]
}
naporitannaporitan

これになった

import { isSingleLine, isStart, isEnd } from "./internal";

export function formatCodeBlocks(input: string[], lines: string[] = []): string[] {
  const [head, ...tail] = input;
  const collecting = lines.length > 0;

  if (head === undefined) {
    return lines.length > 0 ? [lines.join("\n")] : [];
  }

  if (isSingleLine(head)) return [head, ...formatCodeBlocks(tail)];

  const next = [...lines, head];

  if (isStart(head) && !collecting) return formatCodeBlocks(tail, next);
  if (isEnd(head) && collecting) return [next.join("\n"), ...formatCodeBlocks(tail, [])];
  // コードブロック内のテキスト行の処理
  if (collecting) return formatCodeBlocks(tail, next);

  // 通常のテキスト行の処理
  return [head, ...formatCodeBlocks(tail)];
}

export function isSingleLine(line: string): boolean {
  return line.startsWith("```") && line.endsWith("```") && line !== "```";
}
export function isStart(line: string): boolean {
  return line.startsWith("```"); // && !line.endsWith("```");
}
export function isEnd(line: string): boolean {
  return line === "```";
}
テストコード

import { formatCodeBlocks } from ".";

describe("formatCodeBlocks", () => {
it("handles basic code block", () => {
const input = ["css", "body {", " background-color: red;", "}", ""];
const expected = ["css\nbody {\n background-color: red;\n}\n"];
expect(formatCodeBlocks(input)).toEqual(expected);
});

it("handles single line code block with start and end tags", () => {
const input = ["html <div></div> "];
const expected = ["html <div></div> "];
expect(formatCodeBlocks(input)).toEqual(expected);
});

it("handles multiple code blocks", () => {
const input = ["css", "div {", " color: blue;", "}", "", "Some text", "html", "<p>Hello</p>", ""];
const expected = ["css\ndiv {\n color: blue;\n}\n", "Some text", "html\n<p>Hello</p>\n"];
expect(formatCodeBlocks(input)).toEqual(expected);
});

it("handles plain text only", () => {
const input = ["Just some text.", "More text here."];
const expected = ["Just some text.", "More text here."];
expect(formatCodeBlocks(input)).toEqual(expected);
});

it("handles empty input", () => {
const input: string[] = [];
const expected: string[] = [];
expect(formatCodeBlocks(input)).toEqual(expected);
});

it("handles code block without language specification", () => {
const input = ["", "Here is some generic code", ""];
const expected = ["\nHere is some generic code\n"];
expect(formatCodeBlocks(input)).toEqual(expected);
});

it("handles code block without language but with single line content", () => {
const input = ["", "Single line content", ""];
const expected = ["\nSingle line content\n"];
expect(formatCodeBlocks(input)).toEqual(expected);
});
});

naporitannaporitan

rxjs の operator ではこれにした

import { filter, map, scan, share } from "rxjs/operators";

import { isEnd, isSingleLine, isStart } from "./internal";

import type { OperatorFunction } from "rxjs";

type State = {
  collecting: boolean;
  lines: string[];
};
const defaultState: State = {
  collecting: false,
  lines: [],
};

export const formatCodeBlocksOperator = (): OperatorFunction<string, string> => (source) => {
  const shared = source.pipe(share());

  return shared.pipe(
    scan((state, line) => {
      if (isSingleLine(line)) {
        return {
          collecting: false,
          lines: [...state.lines, line],
        };
      }

      if (isStart(line) && !state.collecting) {
        return {
          collecting: true,
          lines: [line],
        };
      }

      if (isEnd(line) && state.collecting) {
        return {
          collecting: false,
          lines: [...state.lines, line],
        };
      }

      if (state.collecting) {
        return {
          ...state,
          lines: [...state.lines, line],
        };
      }

      return {
        ...state,
        lines: [line],
      };
    }, defaultState),
    filter((state) => !state.collecting),
    map((state) => state.lines.join("\n")),
  );
};
テストコード

import { from, firstValueFrom } from "rxjs";
import { toArray } from "rxjs/operators";

import { formatCodeBlocksOperator } from "./operator";

describe("formatCodeBlocksOperator", () => {
it("handles single line code block with start and end tags", async () => {
const input = ["html <div></div> "];
const expected = ["html <div></div> "];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});

it("handles basic code block", async () => {
const input = ["css", "body {", " background-color: red;", "}", ""];
const expected = ["css\nbody {\n background-color: red;\n}\n"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});

it("handles multiple code blocks", async () => {
const input = ["css", "div {", " color: blue;", "}", "", "Some text", "html", "<p>Hello</p>", ""];
const expected = ["css\ndiv {\n color: blue;\n}\n", "Some text", "html\n<p>Hello</p>\n"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});

it("handles plain text only", async () => {
const input = ["Just some text.", "More text here."];
const expected = ["Just some text.", "More text here."];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});

it("handles empty input", async () => {
const input: string[] = [];
const expected: string[] = [];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});

it("handles code block without language specification", async () => {
const input = ["", "Here is some generic code", ""];
const expected = ["\nHere is some generic code\n"];
const result = await firstValueFrom(from(input).pipe(formatCodeBlocksOperator(), toArray()));
expect(result).toEqual(expected);
});
});

naporitannaporitan

今のところ nested な codeblock には対応してない(discord が対応してないから)ので depth を考慮できたらもっといいと思う。
あと、もっとこうしたらいいと思うってのがあったら教えてください。

このスクラップは6ヶ月前にクローズされました