💽

TerraformでCloudTrailのAthenaのテーブルを作る、日付とリージョンのパーティション付き

2024/08/02に公開

前準備

  • この辺りを参考にCloudTrailのイベントをS3に貯めます

https://docs.aws.amazon.com/ja_jp/awscloudtrail/latest/userguide/cloudtrail-tutorial.html
https://zenn.dev/issy/articles/zenn-cloudtrail-overview

コード

Terraform

# Athenaデータベースの作成
resource "aws_athena_database" "cloudtrail" {
  name          = "cloudtrail"
  bucket        = "クエリ結果を保存するバケットを指定する"
  force_destroy = true
}

data "template_file" "cloudtrail" {
  template = file("${path.module}/cloudtrail.tpl.sql")
  vars = { # DDL側に渡す変数
    table_name = "cloudtrail"
    account_id = "XXXXXXX"
    s3_bucket  = "CloudTrailのイベントが入っているバケットを指定する"
    region     = "ap-northeast-1"
  }
}

resource "aws_athena_named_query" "cloudtrail" {
  name      = "cloudtrail"
  database  = aws_athena_database.cloudtrail.name
  query     = data.template_file.cloudtrail.rendered
  workgroup = aws_athena_workgroup.main.id
}

DDL

  • GUIからcloudTrailの設定を行うとテーブルを作成するDDLが発行される
    • マルチリージョンのイベント取得を設定し、日付(YYYY/MM/DD)とリージョンに対してパーティション化した
  • パーティション化にははまりどころがある
    • 例えば'storage.location.template'='s3://${s3_bucket}/AWSLogs/${account_id}/CloudTrail/$${region}/$${timestamp}/'における書き方
      • ${s3_bucket}${account_id}/はTerraform側から渡す変数
      • $${region}$${timestamp}はパーティション、$が2つです
CREATE EXTERNAL TABLE ${table_name} (
    eventversion string COMMENT 'from deserializer',
    useridentity struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>,ec2roledelivery:string,webidfederationdata:map<string,string>>> COMMENT 'from deserializer',
    eventtime string COMMENT 'from deserializer',
    eventsource string COMMENT 'from deserializer',
    eventname string COMMENT 'from deserializer',
    awsregion string COMMENT 'from deserializer',
    sourceipaddress string COMMENT 'from deserializer',
    useragent string COMMENT 'from deserializer',
    errorcode string COMMENT 'from deserializer',
    errormessage string COMMENT 'from deserializer',
    requestparameters string COMMENT 'from deserializer',
    responseelements string COMMENT 'from deserializer',
    additionaleventdata string COMMENT 'from deserializer',
    requestid string COMMENT 'from deserializer',
    eventid string COMMENT 'from deserializer',
    resources array<struct<arn:string,accountid:string,type:string>> COMMENT 'from deserializer',
    eventtype string COMMENT 'from deserializer',
    apiversion string COMMENT 'from deserializer',
    readonly string COMMENT 'from deserializer',
    recipientaccountid string COMMENT 'from deserializer',
    serviceeventdetails string COMMENT 'from deserializer',
    sharedeventid string COMMENT 'from deserializer',
    vpcendpointid string COMMENT 'from deserializer',
    tlsdetails struct<tlsversion:string,ciphersuite:string,clientprovidedhostheader:string> COMMENT 'from deserializer'
)
PARTITIONED BY (
    region string,
    `timestamp` string
)
ROW FORMAT SERDE
    'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS INPUTFORMAT
    'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT
    'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
    's3://${s3_bucket}/AWSLogs/${account_id}/CloudTrail/'
TBLPROPERTIES (
    'classification'='cloudtrail',
    'projection.enabled'='true',
    'projection.region.type'='enum',
    'projection.region.values'='ap-northeast-1,ap-northeast-2,ap-northeast-3,ap-south-1,ap-southeast-1,ap-southeast-2,ca-central-1,eu-central-1,eu-north-1,eu-west-1,eu-west-2,eu-west-3,sa-east-1,us-east-1,us-east-2,us-west-1,us-west-2',
    'projection.timestamp.format'='yyyy/MM/dd',
    'projection.timestamp.interval'='1',
    'projection.timestamp.interval.unit'='DAYS',
    'projection.timestamp.range'='2013/01/01,NOW',
    'projection.timestamp.type'='date',
    'storage.location.template'='s3://${s3_bucket}/AWSLogs/${account_id}/CloudTrail/$${region}/$${timestamp}/'
);

クエリするときの例

  • 1ヶ月分のログを対象

コンソールで

  • 保存したクエリペインでワークグループを指定して該当のクエリのIDを押すとエディタにSQLが展開される
    • 最初右すぎて気が付かなかった

東京リージョンでAPIコールが失敗したとき

SELECT
    substr(CAST(date_add('hour', 9, from_iso8601_timestamp(eventTime)) AS varchar), 1, 19) AS eventTimeJST,
    eventName,
    userIdentity.arn AS arn,
    userIdentity.userName AS userName,
    errorCode,
    errorMessage,
    requestParameters
FROM
    cloudtrail
WHERE
    region = 'ap-northeast-1' 
    AND timestamp BETWEEN '2024/07/01' AND '2024/08/01' 
    AND errorCode IN ('AccessDenied', 'UnauthorizedOperation', 'Client.UnauthorizedOperation')

東京リージョン以外でのリソースの作成を調べる

SELECT *
FROM cloudtrail
WHERE
    region != 'ap-northeast-1'
    AND timestamp BETWEEN '2024/08/01' AND '2024/08/06' 
    AND eventname LIKE 'Create%'
Livesense Engineers

Discussion