Closed4

AWS Lambda で S3 に定期的に CSV を出力する with Terraform

9sako69sako6

Snowflake の外部ステージとして S3 を使っている。
S3 以降のデータロードはなんとかなったので、データソースから S3 にデータをファイルで出力する処理の実験をする。

9sako69sako6

data_integrate_lambda modle として AWS Lambda を作る。

main.tf
terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
  }
}

resource "aws_iam_role" "data_integration_lambda" {
  name               = "data_integration_lambda_role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Principal = {
          Service = "lambda.amazonaws.com"
        }
        Effect = "Allow"
        Sid    = ""
      },
    ]
  })
}

data "archive_file" "lambda" {
  type        = "zip"
  source_file = "${path.module}/lambda.js"
  output_path = "${path.module}/lambda_function_payload.zip"
}

resource "aws_lambda_function" "data_integration" {
  filename      = "${path.module}/lambda_function_payload.zip"
  function_name = "data_integration"
  role          = aws_iam_role.data_integration_lambda.arn
  handler       = "lambda.handler"

  source_code_hash = data.archive_file.lambda.output_base64sha256

  runtime = "nodejs18.x"

  environment {
    variables = {
      S3_BUCKET_NAME = var.s3_bucket_name
      S3_REGION      = var.s3_region
    }
  }
}

resource "aws_iam_policy" "lambda_s3_put" {
  name   = "LambdaS3PutPolicy"
  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action   = [
          "s3:PutObject",
        ]
        Effect   = "Allow"
        Resource = "${var.s3_bucket_arn}/*"
      },
    ]
  })
}

resource "aws_iam_role_policy_attachment" "lambda_s3_put" {
  role       = aws_iam_role.data_integration_lambda.name
  policy_arn = aws_iam_policy.lambda_s3_put.arn
}


variables.tf
variable "s3_bucket_arn" {
  description = "The ARN of the S3 bucket"
}

variable "s3_bucket_name" {
  description = "The name of the S3 bucket"
}

variable "s3_region" {
  description = "The region of the S3 bucket"
}

lambda.js
const { S3Client, PutObjectCommand } = require('@aws-sdk/client-s3');


exports.handler = async (event) => {
  const bucketName = process.env.S3_BUCKET_NAME;
  const region = process.env.S3_REGION;

  if (!bucketName || !region) {
    throw new Error('environment variables are required');
  }

  const s3Client = new S3Client({ region });
  const objectKey = 'example.csv';
  const csvContent = 'Name, Age\nAlice, 24\nBob, 30';

  try {
    const params = {
      Bucket: bucketName,
      Key: objectKey,
      Body: csvContent,
      ContentType: 'text/csv'
    };

    const command = new PutObjectCommand(params);
    const response = await s3Client.send(command);
    console.log('Success', response);
    return response;
  } catch (err) {
    console.error('Error', err);
    throw err;
  }
};
9sako69sako6

EventBridge を使って定期実行する。
以前 Amazon CloudWatch Events の進化系らしい。リソース名は aws_cloudwatch_event_* になっている。試しに1分ごとに出力するようにする。

event_bridge.tf
resource "aws_cloudwatch_event_rule" "default" {
  name                = "lambda_schedule"
  schedule_expression = "rate(1 minute)"
}

resource "aws_cloudwatch_event_target" "default" {
  rule      = aws_cloudwatch_event_rule.default.name
  target_id = "TargetFunction"
  arn       = aws_lambda_function.data_integration.arn
}

resource "aws_lambda_permission" "default" {
  statement_id  = "AllowExecutionFromCloudWatch"
  action        = "lambda:InvokeFunction"
  function_name = aws_lambda_function.data_integration.function_name
  principal     = "events.amazonaws.com"
  source_arn    = aws_cloudwatch_event_rule.default.arn
}

lambda.js でファイル名にタイムスタンプをつけるように修正した。定期的に出力されているのを確認できた。

このスクラップは5ヶ月前にクローズされました