Closed7
できる限り RFC3986 に準拠した URI 抽出用正規表現
忙しいですか?こちらです
/(?<scheme>[a-zA-Z]([a-zA-Z0-9+.-])*):(?<hier_part>\/\/((?<userinfo>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:)*@)?(?<host>\[(?<ipv6address>(((?<h16_0>([0-9a-fA-F]{1,4})):){6}(?<ls32_0>((?<h16_1>([0-9a-fA-F]{1,4})):(?<h16_2>([0-9a-fA-F]{1,4}))|(?<ipv4address_0>((?<dec_octet_0>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_1>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_2>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_3>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|::((?<h16_3>([0-9a-fA-F]{1,4})):){5}(?<ls32_1>((?<h16_4>([0-9a-fA-F]{1,4})):(?<h16_5>([0-9a-fA-F]{1,4}))|(?<ipv4address_1>((?<dec_octet_4>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_5>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_6>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_7>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|((?<h16_6>([0-9a-fA-F]{1,4})))?::((?<h16_7>([0-9a-fA-F]{1,4})):){4}(?<ls32_2>((?<h16_8>([0-9a-fA-F]{1,4})):(?<h16_9>([0-9a-fA-F]{1,4}))|(?<ipv4address_2>((?<dec_octet_8>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_9>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_10>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_11>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_10>([0-9a-fA-F]{1,4})):){0,1}(?<h16_11>([0-9a-fA-F]{1,4})))?::((?<h16_12>([0-9a-fA-F]{1,4})):){3}(?<ls32_3>((?<h16_13>([0-9a-fA-F]{1,4})):(?<h16_14>([0-9a-fA-F]{1,4}))|(?<ipv4address_3>((?<dec_octet_12>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_13>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_14>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_15>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_15>([0-9a-fA-F]{1,4})):){0,2}(?<h16_16>([0-9a-fA-F]{1,4})))?::((?<h16_17>([0-9a-fA-F]{1,4})):){2}(?<ls32_4>((?<h16_18>([0-9a-fA-F]{1,4})):(?<h16_19>([0-9a-fA-F]{1,4}))|(?<ipv4address_4>((?<dec_octet_16>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_17>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_18>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_19>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_20>([0-9a-fA-F]{1,4})):){0,3}(?<h16_21>([0-9a-fA-F]{1,4})))?::(?<h16_22>([0-9a-fA-F]{1,4})):(?<ls32_5>((?<h16_23>([0-9a-fA-F]{1,4})):(?<h16_24>([0-9a-fA-F]{1,4}))|(?<ipv4address_5>((?<dec_octet_20>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_21>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_22>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_23>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_25>([0-9a-fA-F]{1,4})):){0,4}(?<h16_26>([0-9a-fA-F]{1,4})))?::(?<ls32_6>((?<h16_27>([0-9a-fA-F]{1,4})):(?<h16_28>([0-9a-fA-F]{1,4}))|(?<ipv4address_6>((?<dec_octet_24>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_25>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_26>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_27>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_29>([0-9a-fA-F]{1,4})):){0,5}(?<h16_30>([0-9a-fA-F]{1,4})))?::(?<h16_31>([0-9a-fA-F]{1,4}))|(((?<h16_32>([0-9a-fA-F]{1,4})):){0,6}(?<h16_33>([0-9a-fA-F]{1,4})))?::))\]|(?<ipv4address_7>((?<dec_octet_28>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_29>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_30>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_31>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))|(?<reg_name>([a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=])*))(:(?<port>[0-9]*))?(?<path>(?<path_abempty>(\/(?<segment_path_abempty>((?<pchar_segment_path_abempty>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*)|(?<path_absolute>(\/(?<segment_nz_path_absolute>((?<pchar_segment_nz_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_absolute>((?<pchar_segment_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))+?))|(?<path_noscheme>((?<segment_nz_nc_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|@)(\/(?<segment_path_noscheme>((?<pchar_segment_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?<path_rootless>((?<segment_nz_path_rootless>((?<pchar_segment_nz_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_rootless>((?<pchar_segment_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?:)))(\?(?<query>((?<pchar_query>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?(#(?<fragment>((?<pchar_fragment>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?/
過去のもの
1
/(?<scheme>[a-zA-Z]([a-zA-Z0-9+.-])*):(?<hier_part>\/\/((?<userinfo>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:)*@)?(?<host>((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?<reg_name>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=])*)(:(?<port>[0-9]*))?(?<path>(?<path_abempty>\/(?<segment_path_abempty>(?<pchar_segment_path_abempty>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*)*|(?<path_absolute>\/(?<segment_nz_path_absolute>(?<pchar_segment_nz_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+(\/(?<segment_path_absolute>(?<pchar_segment_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*)+?)|(?<path_noscheme>(?<segment_nz_nc_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|@)(\/(?<segment_path_noscheme>(?<pchar_segment_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*)*)|(?<path_rootless>(?<segment_nz_path_rootless>(?<pchar_segment_nz_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+(\/(?<segment_path_rootless>(?<pchar_segment_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*)*)|(?:)))(\?(?<query>(?<pchar_query>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*)?(#(?<fragment>(?<pchar_fragment>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*)?/
2
/(?<scheme>[a-zA-Z]([a-zA-Z0-9+.-])*):(?<hier_part>\/\/((?<userinfo>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:)*@)?(?<host>((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?<reg_name>([a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=])*))(:(?<port>[0-9]*))?(?<path>(?<path_abempty>(\/(?<segment_path_abempty>((?<pchar_segment_path_abempty>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*)|(?<path_absolute>(\/(?<segment_nz_path_absolute>((?<pchar_segment_nz_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_absolute>((?<pchar_segment_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))+?))|(?<path_noscheme>((?<segment_nz_nc_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|@)(\/(?<segment_path_noscheme>((?<pchar_segment_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?<path_rootless>((?<segment_nz_path_rootless>((?<pchar_segment_nz_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_rootless>((?<pchar_segment_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?:)))(\?(?<query>((?<pchar_query>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?(#(?<fragment>((?<pchar_fragment>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?/
解説
文書によると URI は
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
らしい
私は ABNF が読めないのでノリで生成しました.
生成に使ったコードが以下の通りです.
const source = (/** @type {any[]} */ ...r) => r.map(x => typeof x === "string" ? x : x instanceof RegExp ? x.source : String(x)).join("");
const group = (name, /** @type {(string | RegExp)[]} */ ...r) => {
return "(" + (name ? "?<" + name + ">" : "") + r.map(x => source(x)).join("|") + ")";
}
const opt = (/** @type {string} */ r) => source(r) + "?";
export const gen_delims = new RegExp("[:/?#[]@]");
export const sub_delims = new RegExp("[!$&'()*+,;=]");
export const reserved = new RegExp(group("reserved", gen_delims, sub_delims));
export const unserved = new RegExp("[a-zA-Z0-9._~-]");
export const pct_encoded = new RegExp("%[0-9a-fA-F]{2}");
export const scheme = new RegExp(group("scheme", "[a-zA-Z]" + group("", "[a-zA-Z0-9+.-]") + "*"));
export const userinfo = new RegExp(group("userinfo", unserved, pct_encoded, sub_delims, ":") + "*");
export const reg_name = new RegExp(group("reg_name", unserved, pct_encoded, sub_delims) + "*");
export const ipv4 = /((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])/;
export const host = new RegExp(group("host", ipv4, reg_name));
export const port = new RegExp(group("port", "[0-9]*"));
export const authority = new RegExp(opt(group("", source(userinfo) + "@")) + source(host) + opt(group("", ":" + source(port))));
export const pchar = (/** @type {string} */ name) => new RegExp(group("pchar_" + name, unserved, pct_encoded, sub_delims, ":", "@"));
export const segment = (/** @type {string} */ name) => new RegExp(group("segment_" + name, pchar("segment_" + name)) + "*");
export const segment_nz = (/** @type {string} */ name) => new RegExp(group("segment_nz_" + name, pchar("segment_nz_" + name)) + "+");
export const segment_nz_nc = (/** @type {string} */ name) => new RegExp(group("segment_nz_nc_" + name, unserved, pct_encoded, sub_delims, "@"));
export const path_abempty = new RegExp(group("path_abempty", "/" + source(segment("path_abempty"))) + "*");
export const path_absolute = new RegExp(group("path_absolute", "/" + opt(source(segment_nz("path_absolute")) + group("", "/" + source(segment("path_absolute"))) + "+")));
export const path_noscheme = new RegExp(group("path_noscheme", source(segment_nz_nc("path_noscheme")) + group("", "/" + source(segment("path_noscheme"))) + "*"));
export const path_rootless = new RegExp(group("path_rootless", source(segment_nz("path_rootless")) + group("", "/" + source(segment("path_rootless"))) + "*"));
export const path = new RegExp(group("path", path_abempty, path_absolute, path_noscheme, path_rootless, "(?:)"));
export const query = new RegExp(group("query", pchar("query"), "/", "\\?") + "*");
export const fragment = new RegExp(group("fragment", pchar("fragment"), "/", "\\?") + "*");
export const hier_part = new RegExp(group("hier_part", "//" + source(authority) + source(path)));
export const absolute_uri = new RegExp(source(scheme) + ":" + source(hier_part) + opt(group("", "\\?" + query)));
export const uri = new RegExp(source(scheme) + ":" + source(hier_part) + opt(group("", "\\?" + source(query))) + opt(group("", "#" + source(fragment))));
まあひどい.
scheme の正規表現で redos 攻撃受けそうな感じあるけど諦めてる.
group がいっぱいついているのでホスト名を抽出したい時とか,結構嬉しいかも?
改善できそうなところがあるので帰ったら書く
なんか group の処理がおかしかったので修正.IPv4 アドレス以外の部分は多分これでよし.
const source = (/** @type {any[]} */ ...r) => r.map(x => typeof x === "string" ? x : x instanceof RegExp ? x.source : String(x)).join("");
const group = (name, /** @type {(string | RegExp)[]} */ ...r) => {
return "(" + (name ? "?<" + name + ">" : "") + r.map(x => source(x)).join("|") + ")";
}
const opt = (/** @type {string} */ r) => source(r) + "?";
export const gen_delims = new RegExp("[:/?#[]@]");
export const sub_delims = new RegExp("[!$&'()*+,;=]");
export const reserved = new RegExp(group("reserved", gen_delims, sub_delims));
export const unserved = new RegExp("[a-zA-Z0-9._~-]");
export const pct_encoded = new RegExp("%[0-9a-fA-F]{2}");
export const scheme = new RegExp(group("scheme", "[a-zA-Z]" + group("", "[a-zA-Z0-9+.-]") + "*"));
export const userinfo = new RegExp(group("userinfo", unserved, pct_encoded, sub_delims, ":") + "*");
export const reg_name = new RegExp(group("reg_name", group("", unserved, pct_encoded, sub_delims) + "*"));
export const ipv4 = /((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])/;
export const host = new RegExp(group("host", ipv4, reg_name));
export const port = new RegExp(group("port", "[0-9]*"));
export const authority = new RegExp(opt(group("", source(userinfo) + "@")) + source(host) + opt(group("", ":" + source(port))));
export const pchar = (/** @type {string} */ name) => new RegExp(group("pchar_" + name, unserved, pct_encoded, sub_delims, ":", "@"));
export const segment = (/** @type {string} */ name) => new RegExp(group("segment_" + name, group("", pchar("segment_" + name)) + "*"));
export const segment_nz = (/** @type {string} */ name) => new RegExp(group("segment_nz_" + name, group("", pchar("segment_nz_" + name)) + "+"));
export const segment_nz_nc = (/** @type {string} */ name) => new RegExp(group("segment_nz_nc_" + name, unserved, pct_encoded, sub_delims, "@"));
export const path_abempty = new RegExp(group("path_abempty", group("", "/" + source(segment("path_abempty"))) + "*"));
export const path_absolute = new RegExp(group("path_absolute", group("", "/" + opt(source(segment_nz("path_absolute")) + group("", "/" + source(segment("path_absolute"))) + "+"))));
export const path_noscheme = new RegExp(group("path_noscheme", group("", source(segment_nz_nc("path_noscheme")) + group("", "/" + source(segment("path_noscheme"))) + "*")));
export const path_rootless = new RegExp(group("path_rootless", group("", source(segment_nz("path_rootless")) + group("", "/" + source(segment("path_rootless"))) + "*")));
export const path = new RegExp(group("path", path_abempty, path_absolute, path_noscheme, path_rootless, "(?:)"));
export const query = new RegExp(group("query", group("", pchar("query"), "/", "\\?") + "*"));
export const fragment = new RegExp(group("fragment", group("", pchar("fragment"), "/", "\\?") + "*"));
export const hier_part = new RegExp(group("hier_part", "//" + source(authority) + source(path)));
export const absolute_uri = new RegExp(source(scheme) + ":" + source(hier_part) + opt(group("", "\\?" + query)));
export const uri = new RegExp(source(scheme) + ":" + source(hier_part) + opt(group("", "\\?" + source(query))) + opt(group("", "#" + source(fragment))));
/(?<scheme>[a-zA-Z]([a-zA-Z0-9+.-])*):(?<hier_part>\/\/((?<userinfo>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:)*@)?(?<host>((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?<reg_name>([a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=])*))(:(?<port>[0-9]*))?(?<path>(?<path_abempty>(\/(?<segment_path_abempty>((?<pchar_segment_path_abempty>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*)|(?<path_absolute>(\/(?<segment_nz_path_absolute>((?<pchar_segment_nz_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_absolute>((?<pchar_segment_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))+?))|(?<path_noscheme>((?<segment_nz_nc_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|@)(\/(?<segment_path_noscheme>((?<pchar_segment_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?<path_rootless>((?<segment_nz_path_rootless>((?<pchar_segment_nz_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_rootless>((?<pchar_segment_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?:)))(\?(?<query>((?<pchar_query>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?(#(?<fragment>((?<pchar_fragment>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?/
実行結果:
❯ node .
[
'https://zenn.dev/yuimaru/scraps/8ce132bc8aa6c9?k=v&i#unko',
'https',
's',
'//zenn.dev/yuimaru/scraps/8ce132bc8aa6c9',
undefined,
undefined,
'zenn.dev',
undefined,
undefined,
undefined,
undefined,
undefined,
'zenn.dev',
'v',
undefined,
undefined,
'/yuimaru/scraps/8ce132bc8aa6c9',
'/yuimaru/scraps/8ce132bc8aa6c9',
'/8ce132bc8aa6c9',
'8ce132bc8aa6c9',
'9',
'9',
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
undefined,
'?k=v&i',
'k=v&i',
'i',
'i',
'#unko',
'unko',
'o',
'o',
index: 0,
input: 'https://zenn.dev/yuimaru/scraps/8ce132bc8aa6c9?k=v&i#unko',
groups: [Object: null prototype] {
scheme: 'https',
hier_part: '//zenn.dev/yuimaru/scraps/8ce132bc8aa6c9',
userinfo: undefined,
host: 'zenn.dev',
reg_name: 'zenn.dev',
port: undefined,
path: '/yuimaru/scraps/8ce132bc8aa6c9',
path_abempty: '/yuimaru/scraps/8ce132bc8aa6c9',
segment_path_abempty: '8ce132bc8aa6c9',
pchar_segment_path_abempty: '9',
path_absolute: undefined,
segment_nz_path_absolute: undefined,
pchar_segment_nz_path_absolute: undefined,
segment_path_absolute: undefined,
pchar_segment_path_absolute: undefined,
path_noscheme: undefined,
segment_nz_nc_path_noscheme: undefined,
segment_path_noscheme: undefined,
pchar_segment_path_noscheme: undefined,
path_rootless: undefined,
segment_nz_path_rootless: undefined,
pchar_segment_nz_path_rootless: undefined,
segment_path_rootless: undefined,
pchar_segment_path_rootless: undefined,
query: 'k=v&i',
pchar_query: 'i',
fragment: 'unko',
pchar_fragment: 'o'
}
]
クソの IPv4 は帰ったら書く
クソの IPv4 アドレスのついでにクソの IPv6 アドレスを追加して 4829 文字の正規表現が爆誕!
生成に使ったもの
// @ts-check
const source = (/** @type {any[]} */ ...r) =>
r
.map((x) =>
typeof x === "string" ? x : x instanceof RegExp ? x.source : String(x),
)
.join("");
const group = (name, /** @type {(string | RegExp)[]} */ ...r) => {
return (
"(" +
(name ? "?<" + name + ">" : "") +
r.map((x) => source(x)).join("|") +
")"
);
};
const opt = (/** @type {string} */ r) => source(r) + "?";
export const gen_delims = new RegExp("[:/?#[]@]");
export const sub_delims = new RegExp("[!$&'()*+,;=]");
export const reserved = new RegExp(group("reserved", gen_delims, sub_delims));
export const unserved = new RegExp("[a-zA-Z0-9._~-]");
export const pct_encoded = new RegExp("%[0-9a-fA-F]{2}");
export const scheme = new RegExp(
group("scheme", "[a-zA-Z]" + group("", "[a-zA-Z0-9+.-]") + "*"),
);
export const userinfo = new RegExp(
group("userinfo", unserved, pct_encoded, sub_delims, ":") + "*",
);
export const reg_name = new RegExp(
group("reg_name", group("", unserved, pct_encoded, sub_delims) + "*"),
);
let dec_octetC = 0;
export const dec_octet = () =>
new RegExp(
group(
"dec_octet_" + (dec_octetC++).toString(),
group(
"",
new RegExp("[0-9]"),
new RegExp("[1-9][0-9]"),
new RegExp("1[0-9][0-9]"),
new RegExp("2[0-4][0-9]"),
new RegExp("25[0-5]"),
),
),
);
let ipv4addressC = 0;
export const ipv4address = () =>
new RegExp(
group(
"ipv4address_" + (ipv4addressC++).toString(),
group(
"",
source(dec_octet()) +
"\\." +
source(dec_octet()) +
"\\." +
source(dec_octet()) +
"\\." +
source(dec_octet()),
),
),
);
let h16C = 0;
export const h16 = () =>
new RegExp(
group("h16_" + (h16C++).toString(), group("", "[0-9a-fA-F]{1,4}")),
);
let ls32C = 0;
export const ls32 = () =>
new RegExp(
group(
"ls32_" + (ls32C++).toString(),
group("", source(h16()) + ":" + source(h16()), ipv4address()),
),
);
export const ipv6address = new RegExp(
group(
"ipv6address",
group(
"",
group("", source(h16()) + ":") + "{6}" + source(ls32()),
"::" + group("", source(h16()) + ":") + "{5}" + source(ls32()),
opt(group("", h16())) +
"::" +
group("", source(h16()) + ":") +
"{4}" +
source(ls32()),
opt(group("", group("", source(h16()) + ":") + "{0,1}" + source(h16()))) +
"::" +
group("", source(h16()) + ":") +
"{3}" +
source(ls32()),
opt(group("", group("", source(h16()) + ":") + "{0,2}" + source(h16()))) +
"::" +
group("", source(h16()) + ":") +
"{2}" +
source(ls32()),
opt(group("", group("", source(h16()) + ":") + "{0,3}" + source(h16()))) +
"::" +
source(h16()) +
":" +
source(ls32()),
opt(group("", group("", source(h16()) + ":") + "{0,4}" + source(h16()))) +
"::" +
source(ls32()),
opt(group("", group("", source(h16()) + ":") + "{0,5}" + source(h16()))) +
"::" +
source(h16()),
opt(group("", group("", source(h16()) + ":") + "{0,6}" + source(h16()))) +
"::",
),
),
);
export const ipliteral = new RegExp("\\[" + source(ipv6address) + "\\]");
export const host = new RegExp(
group("host", ipliteral, ipv4address(), reg_name),
);
export const port = new RegExp(group("port", "[0-9]*"));
export const authority = new RegExp(
opt(group("", source(userinfo) + "@")) +
source(host) +
opt(group("", ":" + source(port))),
);
export const pchar = (/** @type {string} */ name) =>
new RegExp(
group("pchar_" + name, unserved, pct_encoded, sub_delims, ":", "@"),
);
export const segment = (/** @type {string} */ name) =>
new RegExp(
group("segment_" + name, group("", pchar("segment_" + name)) + "*"),
);
export const segment_nz = (/** @type {string} */ name) =>
new RegExp(
group("segment_nz_" + name, group("", pchar("segment_nz_" + name)) + "+"),
);
export const segment_nz_nc = (/** @type {string} */ name) =>
new RegExp(
group("segment_nz_nc_" + name, unserved, pct_encoded, sub_delims, "@"),
);
export const path_abempty = new RegExp(
group("path_abempty", group("", "/" + source(segment("path_abempty"))) + "*"),
);
export const path_absolute = new RegExp(
group(
"path_absolute",
group(
"",
"/" +
opt(
source(segment_nz("path_absolute")) +
group("", "/" + source(segment("path_absolute"))) +
"+",
),
),
),
);
export const path_noscheme = new RegExp(
group(
"path_noscheme",
group(
"",
source(segment_nz_nc("path_noscheme")) +
group("", "/" + source(segment("path_noscheme"))) +
"*",
),
),
);
export const path_rootless = new RegExp(
group(
"path_rootless",
group(
"",
source(segment_nz("path_rootless")) +
group("", "/" + source(segment("path_rootless"))) +
"*",
),
),
);
export const path = new RegExp(
group(
"path",
path_abempty,
path_absolute,
path_noscheme,
path_rootless,
"(?:)",
),
);
export const query = new RegExp(
group("query", group("", pchar("query"), "/", "\\?") + "*"),
);
export const fragment = new RegExp(
group("fragment", group("", pchar("fragment"), "/", "\\?") + "*"),
);
export const hier_part = new RegExp(
group("hier_part", "//" + source(authority) + source(path)),
);
export const absolute_uri = new RegExp(
source(scheme) + ":" + source(hier_part) + opt(group("", "\\?" + query)),
);
export const uri = new RegExp(
source(scheme) +
":" +
source(hier_part) +
opt(group("", "\\?" + source(query))) +
opt(group("", "#" + source(fragment))),
);
成果物
/(?<scheme>[a-zA-Z]([a-zA-Z0-9+.-])*):(?<hier_part>\/\/((?<userinfo>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:)*@)?(?<host>\[(?<ipv6address>(((?<h16_0>([0-9a-fA-F]{1,4})):){6}(?<ls32_0>((?<h16_1>([0-9a-fA-F]{1,4})):(?<h16_2>([0-9a-fA-F]{1,4}))|(?<ipv4address_0>((?<dec_octet_0>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_1>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_2>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_3>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|::((?<h16_3>([0-9a-fA-F]{1,4})):){5}(?<ls32_1>((?<h16_4>([0-9a-fA-F]{1,4})):(?<h16_5>([0-9a-fA-F]{1,4}))|(?<ipv4address_1>((?<dec_octet_4>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_5>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_6>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_7>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|((?<h16_6>([0-9a-fA-F]{1,4})))?::((?<h16_7>([0-9a-fA-F]{1,4})):){4}(?<ls32_2>((?<h16_8>([0-9a-fA-F]{1,4})):(?<h16_9>([0-9a-fA-F]{1,4}))|(?<ipv4address_2>((?<dec_octet_8>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_9>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_10>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_11>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_10>([0-9a-fA-F]{1,4})):){0,1}(?<h16_11>([0-9a-fA-F]{1,4})))?::((?<h16_12>([0-9a-fA-F]{1,4})):){3}(?<ls32_3>((?<h16_13>([0-9a-fA-F]{1,4})):(?<h16_14>([0-9a-fA-F]{1,4}))|(?<ipv4address_3>((?<dec_octet_12>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_13>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_14>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_15>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_15>([0-9a-fA-F]{1,4})):){0,2}(?<h16_16>([0-9a-fA-F]{1,4})))?::((?<h16_17>([0-9a-fA-F]{1,4})):){2}(?<ls32_4>((?<h16_18>([0-9a-fA-F]{1,4})):(?<h16_19>([0-9a-fA-F]{1,4}))|(?<ipv4address_4>((?<dec_octet_16>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_17>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_18>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_19>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_20>([0-9a-fA-F]{1,4})):){0,3}(?<h16_21>([0-9a-fA-F]{1,4})))?::(?<h16_22>([0-9a-fA-F]{1,4})):(?<ls32_5>((?<h16_23>([0-9a-fA-F]{1,4})):(?<h16_24>([0-9a-fA-F]{1,4}))|(?<ipv4address_5>((?<dec_octet_20>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_21>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_22>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_23>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_25>([0-9a-fA-F]{1,4})):){0,4}(?<h16_26>([0-9a-fA-F]{1,4})))?::(?<ls32_6>((?<h16_27>([0-9a-fA-F]{1,4})):(?<h16_28>([0-9a-fA-F]{1,4}))|(?<ipv4address_6>((?<dec_octet_24>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_25>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_26>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_27>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))))|(((?<h16_29>([0-9a-fA-F]{1,4})):){0,5}(?<h16_30>([0-9a-fA-F]{1,4})))?::(?<h16_31>([0-9a-fA-F]{1,4}))|(((?<h16_32>([0-9a-fA-F]{1,4})):){0,6}(?<h16_33>([0-9a-fA-F]{1,4})))?::))\]|(?<ipv4address_7>((?<dec_octet_28>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_29>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_30>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))\.(?<dec_octet_31>([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))))|(?<reg_name>([a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=])*))(:(?<port>[0-9]*))?(?<path>(?<path_abempty>(\/(?<segment_path_abempty>((?<pchar_segment_path_abempty>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*)|(?<path_absolute>(\/(?<segment_nz_path_absolute>((?<pchar_segment_nz_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_absolute>((?<pchar_segment_path_absolute>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))+?))|(?<path_noscheme>((?<segment_nz_nc_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|@)(\/(?<segment_path_noscheme>((?<pchar_segment_path_noscheme>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?<path_rootless>((?<segment_nz_path_rootless>((?<pchar_segment_nz_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))+)(\/(?<segment_path_rootless>((?<pchar_segment_path_rootless>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@))*))*))|(?:)))(\?(?<query>((?<pchar_query>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?(#(?<fragment>((?<pchar_fragment>[a-zA-Z0-9._~-]|%[0-9a-fA-F]{2}|[!$&'()*+,;=]|:|@)|\/|\?)*))?/
もはや group がいっぱい付いていてお得!wぐらいのノリでしか使えない気がする.
泣いていいかな,これ.
仕様守れているかはもうわかりません.寝るか
このスクラップは2024/01/17にクローズされました