😀

apacheのrequest_dateをawkでdatetimeフォーマット形式を変換する

2022/11/28に公開

awkって便利ですよね。
apacheログのrequest_dateって以外と自力でパースすると面倒臭っかたのでメモ
(正規表現は自信がないのでこっちのほうがはやんいじゃねぇ?)ってツッコミ大歓迎です。

access_log.awk
{
    request_date = $4 $5
    #両端にある[]を除去する
    req_length = length(request_date);
    request_date = substr(request_date, 2, req_length)
    request_date = substr(request_date, 1, req_length - 2 )
    #print request_date

    split(request_date, arr, "/");
    day = arr[1]
    month = arr[2]
    #月の英字月を数字に変換
    sub(/(January|Jan|JANUARY|JAN)/, "01", month)
    sub(/(February|Feb|FEBRUARY|FEB)/, "02", month)
    sub(/(March|Mar|MARCH|MAR)/, "03", month)
    sub(/(April|Apr|APRIL|APR)/, "04", month)
    sub(/(May|MAY)/, 05, month)
    sub(/(June|Jun|JUNE|JUN)/, "06", month)
    sub(/(July|Jul|JULY|JUL)/, "07", month)
    sub(/(August|Aug|AUGUST|AUG)/, "08", month)
    sub(/(September|Sep|SEPTEMBER|SEP)/, "09", month)
    sub(/(October|Oct|OCTOBER|OCT)/, "10", month)
    sub(/(November|Nov|NOVEMBER|NOV)/, "11", month)
    sub(/(December|Dec|DECEMBER|DEC)/, "12", month)

    split(arr[3], arr2, ":");
    year = arr2[1]
    hour = arr2[2]
    minute = arr2[3]
    split(arr2[4], arr3, "+")
    second = arr3[1]
    request_date = year "-" month "-" day " " hour ":" minute ":" second
}
{
    method = $6
    path = $7
    code = $9
}
#tsvだとimportとかしやすいのでお好みで
{print request_date "\t" method"\t"path "\t"code}

使い方

#画像,css,jsは除外してます
cat -r /path/to/access_log | grep -v .jpg | grep -v .png | grep -v .gif | grep -v .css | grep -v .js | awk -f access_log.awk

Discussion