👋

ChatGPTで正規表現を自動生成しよう

2023/06/21に公開

regex

はじめに

修正や追加等はコメントまたはGitHubで編集リクエストをお待ちしております。

実装編

最近話題のFunction callingで正規表現を生成します。

関数を定義する

関数の説明を書きます。関数の名前、説明、仕様、引数の名前と役割など
functions のtypeには、jsonschemaの定義を書きます。

functions = [
    {
        "name": "check_regex",
        "description": "正規表現が正しいかどうかを判定する関数。戻り値がTrueの場合は正規表現は正しいと判断する、Falseの場合は正しくないと判断する。",
        "parameters": {
            "type": "object",
            "properties": {
                "regex": {
                    "type": "string",
                    "description": "正規表現",
                },
                "test_strs": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "テストする文字列の配列",
                },
                "result_with_test_strs": {
                    "type": "array",
                    "items": {"type": "boolean"},
                    "description": "テストする文字列に対する推定している結果の配列",
                },
            },
            "required": ["regex", "test_strs", "result_with_test_strs"],
        },
    }
]

# 上で定義した関数を実装する

def check_regex(regex, test_strs, result_with_test_strs):
    import re
    result = []
    for test_str in test_strs:
        if re.match(regex, test_str):
            result.append(True)
        else:
            result.append(False)
    return result == result_with_test_strs

ChatGPTで正規表現を生成する

成功するまで繰り返します。

import openai
from dotenv import load_dotenv
import os
import json

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
messages = []
flag = True
model_name = "gpt-3.5-turbo-0613"
try:
    while True:
        if flag:
            question = input(">> ")
            if question == "exit":
                break
            messages.append({"role": "user", "content": question})
        else:
            flag = True
        responses = openai.ChatCompletion.create(
            model=model_name,
            messages=messages,
            # ここで関数を定義した配列を渡す
            functions=functions,
            # autoにすると自動で関数を呼ぶかどうかを判断してくれる
            function_call="auto",
        )
        message_tmp = responses["choices"][0]["message"]
        # function_callがある場合は関数を呼び出す
        if message_tmp.get("function_call"):
            try:
                arguments = json.loads(message_tmp["function_call"]["arguments"])
            except json.decoder.JSONDecodeError:
                flag = False
                continue
            # 関数を呼び出す
            function_response = check_regex(
                regex=arguments["regex"],
                test_strs=arguments["test_strs"],
                result_with_test_strs=arguments["result_with_test_strs"],
            )
            if function_response:
                print(
                    arguments["regex"],
                    arguments["test_strs"],
                    arguments["result_with_test_strs"],
                )
                flag = True
            # 関数の戻り値を追加する
            messages.append(
                {
                    "role": "function",
                    "name": message_tmp["function_call"]["name"],
                    # 関数の戻り値はstrに変換する
                    "content": str(function_response),
                }
            )

            if not function_response:
                messages.append(
                    {
                        "role": "user",
                        "content": "正規表現が間違っているようです。もう一度入力してください。",
                    }
                )
                flag = False
        else:
            messages.append({"role": "assistant", "content": message_tmp["content"]})
            messages.append({"role": "user", "content": "testして"})
            flag = False
except KeyboardInterrupt:
    pass

コード全文

import openai
from dotenv import load_dotenv
import os
import json

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
messages = []
flag = True
model_name = "gpt-3.5-turbo-0613"

functions = [
    {
        "name": "check_regex",
        "description": "正規表現が正しいかどうかを判定する関数。戻り値がTrueの場合は正規表現は正しいと判断する、Falseの場合は正しくないと判断する。",
        "parameters": {
            "type": "object",
            "properties": {
                "regex": {
                    "type": "string",
                    "description": "正規表現",
                },
                "test_strs": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "テストする文字列の配列",
                },
                "result_with_test_strs": {
                    "type": "array",
                    "items": {"type": "boolean"},
                    "description": "テストする文字列に対する推定している結果の配列",
                },
            },
            "required": ["regex", "test_strs", "result_with_test_strs"],
        },
    }
]

# 上で定義した関数を実装する

def check_regex(regex, test_strs, result_with_test_strs):
    import re

    result = []
    for test_str in test_strs:
        if re.match(regex, test_str):
            result.append(True)
        else:
            result.append(False)
    return result == result_with_test_strs


try:
    while True:
        if flag:
            question = input(">> ")
            if question == "exit":
                break
            messages.append({"role": "user", "content": question})
        else:
            flag = True
        responses = openai.ChatCompletion.create(
            model=model_name,
            messages=messages,
            functions=functions,
            # autoにすると自動で関数を呼ぶかどうかを判断してくれる
            function_call="auto",
        )
        message_tmp = responses["choices"][0]["message"]
        # function_callがある場合は関数を呼び出す
        if message_tmp.get("function_call"):
            try:
                arguments = json.loads(message_tmp["function_call"]["arguments"])
            except json.decoder.JSONDecodeError:
                flag = False
                continue
            function_response = check_regex(
                regex=arguments["regex"],
                test_strs=arguments["test_strs"],
                result_with_test_strs=arguments["result_with_test_strs"],
            )
            if function_response:
                print(
                    arguments["regex"],
                    arguments["test_strs"],
                    arguments["result_with_test_strs"],
                )
                flag = True
            messages.append(
                {
                    "role": "function",
                    "name": message_tmp["function_call"]["name"],
                    "content": str(function_response),
                }
            )

            if not function_response:
                messages.append(
                    {
                        "role": "user",
                        "content": "正規表現が間違っているようです。もう一度入力してください。",
                    }
                )
                flag = False
        else:
            messages.append({"role": "assistant", "content": message_tmp["content"]})
            messages.append({"role": "user", "content": "testして"})
            flag = False
except KeyboardInterrupt:
    pass

まとめ

今回はChatGPTを使って正規表現の生成を試してみました。
Function callingを使うことで、実装は簡単に、動作は安定という良いとこ取りができました。

GitHubで編集を提案