🪣

AWS S3にGeoJSONをPUTしたらPMTilesに変換する仕組みをCDKで記述する

に公開

Architecture

CDK Stack

LambdaはvCPUが2になるラインのメモリ設定にしたけど、実際どれくらいパフォーマンスに影響しているかは測定していない。

import * as cdk from 'aws-cdk-lib';
import { Construct } from 'constructs';
import * as s3 from 'aws-cdk-lib/aws-s3';
import * as lambda from 'aws-cdk-lib/aws-lambda';
import * as s3n from 'aws-cdk-lib/aws-s3-notifications';
import * as path from 'path';

export class AutoTilerStack extends cdk.Stack {
	constructor(scope: Construct, id: string, props?: cdk.StackProps) {
		super(scope, id, props);

		// S3 buckets for input GeoJSON and output PMTiles
		const geojsonBucket = new s3.Bucket(this, 'GeojsonBucket', {
			removalPolicy: cdk.RemovalPolicy.DESTROY,
			autoDeleteObjects: true,
		});

		const pmtilesBucket = new s3.Bucket(this, 'PmtilesBucket', {
			removalPolicy: cdk.RemovalPolicy.DESTROY,
			autoDeleteObjects: true,
		});

		// Lambda function to run tippecanoe for conversion
		const tippecanoeLambda = new lambda.DockerImageFunction(
			this,
			'TippecanoeLambda',
			{
				code: lambda.DockerImageCode.fromImageAsset(
					path.join(__dirname, '../lambda'),
				),
				timeout: cdk.Duration.minutes(15),
				memorySize: 1770, // 2 vCPUs
				environment: {
					OUTPUT_BUCKET: pmtilesBucket.bucketName,
				},
				architecture: lambda.Architecture.ARM_64,
			},
		);

		// Grant permissions
		geojsonBucket.grantRead(tippecanoeLambda);
		pmtilesBucket.grantReadWrite(tippecanoeLambda);

		// Configure S3 event notification to trigger Lambda directly
		geojsonBucket.addEventNotification(
			s3.EventType.OBJECT_CREATED,
			new s3n.LambdaDestination(tippecanoeLambda),
			{ suffix: '.geojson' }, // Only trigger for .geojson files
		);

		// Output the bucket names
		new cdk.CfnOutput(this, 'GeojsonBucketName', {
			value: geojsonBucket.bucketName,
			description: 'Name of the S3 bucket for GeoJSON files',
		});

		new cdk.CfnOutput(this, 'PMTilesBucketName', {
			value: pmtilesBucket.bucketName,
			description: 'Name of the S3 bucket for PMTiles files',
		});
	}
}

Lambda code

tippecanoeが利用出来ることを想定したLambdaコード

import * as AWS from 'aws-sdk';
import { S3Event, S3EventRecord } from 'aws-lambda';
import { spawn, ChildProcess } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';

const s3 = new AWS.S3();

export const handler = async (event: S3Event): Promise<any> => {
	console.log('Received event:', JSON.stringify(event, null, 2));

	// Process only the first record (we expect only one per invocation)
	const record: S3EventRecord = event.Records[0];

	// Get the S3 bucket and key from the event
	const srcBucket = record.s3.bucket.name;
	const srcKey = decodeURIComponent(record.s3.object.key.replace(/\+/g, ' '));

	// Get the output bucket from environment variables
	const dstBucket = process.env.OUTPUT_BUCKET as string;

	// Create temporary directory for processing
	const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'geojson-'));
	const localGeojsonPath = path.join(tmpDir, 'input.geojson');
	const outputDir = path.join(tmpDir, 'output');

	try {
		// Create output directory
		fs.mkdirSync(outputDir, { recursive: true });

		// Download the GeoJSON file from S3
		console.log(`Downloading ${srcKey} from ${srcBucket}`);
		const geojsonObject = await s3
			.getObject({
				Bucket: srcBucket,
				Key: srcKey,
			})
			.promise();

		// Write the GeoJSON to a local file
		fs.writeFileSync(localGeojsonPath, geojsonObject.Body as Buffer);

		// Extract the base name without extension for the tile directory
		const baseName = path.basename(srcKey, path.extname(srcKey));

		// Run tippecanoe to convert GeoJSON to PMTiles
		console.log('Running tippecanoe');
		await runTippecanoe(localGeojsonPath, outputDir, baseName);

		// Upload the PMTiles file to S3
		console.log('Uploading PMTiles to S3');
		const pmtilesPath = path.join(outputDir, `${baseName}.pmtiles`);
		await uploadPMTiles(pmtilesPath, dstBucket, '');

		return {
			statusCode: 200,
			body: JSON.stringify({
				message: 'GeoJSON successfully converted to PMTiles',
				source: srcKey,
				destination: `${baseName}.pmtiles`,
			}),
		};
	} catch (error) {
		console.error('Error:', error);
		throw error;
	} finally {
		// Clean up temporary files
		try {
			fs.rmSync(tmpDir, { recursive: true, force: true });
		} catch (err) {
			console.error('Error cleaning up temporary files:', err);
		}
	}
};

// Function to run tippecanoe
async function runTippecanoe(
	inputFile: string,
	outputDir: string,
	baseName: string,
): Promise<void> {
	return new Promise((resolve, reject) => {
		const tileDir = path.join(outputDir, baseName);
		fs.mkdirSync(tileDir, { recursive: true });

		const tippecanoe = spawn('tippecanoe', [
			'-o',
			`${tileDir}.pmtiles`,
			'-z',
			'14', // Maximum zoom level
			'-M',
			'1000000', // max filesize of each tile
			'-l',
			baseName, // Layer name
			inputFile,
		]);

		setupProcessListeners(tippecanoe, 'tippecanoe', (code) => {
			if (code !== 0) {
				reject(new Error(`tippecanoe process exited with code ${code}`));
				return;
			}

			resolve();
		});
	});
}

// Helper function to set up process listeners
function setupProcessListeners(
	process: ChildProcess,
	name: string,
	onClose: (code: number | null) => void,
): void {
	process.stdout?.on('data', (data) => {
		console.log(`${name} stdout: ${data}`);
	});

	process.stderr?.on('data', (data) => {
		console.error(`${name} stderr: ${data}`);
	});

	process.on('close', onClose);
}

// Function to upload a PMTiles file to S3
async function uploadPMTiles(
	pmtilesPath: string,
	bucket: string,
	prefix: string,
): Promise<void> {
	const fileContent = fs.readFileSync(pmtilesPath);
	const fileName = path.basename(pmtilesPath);
	// Use the same path format as in the response
	const s3Key = prefix === '' ? fileName : `${prefix}/${fileName}`;

	await s3
		.putObject({
			Bucket: bucket,
			Key: s3Key,
			Body: fileContent,
			ContentType: 'application/octet-stream',
		})
		.promise();

	console.log(`Uploaded ${s3Key} to ${bucket}`);
}

memo: StepFunctions

最初は変換タスクをStepFunctionsとして定義してみようとしていて、Eventの引数が少し違ったのでメモ

import * as AWS from 'aws-sdk';
import { S3NotificationEvent } from 'aws-lambda';
import { spawn, ChildProcess } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';

const s3 = new AWS.S3();

export const handler = async (event: S3NotificationEvent): Promise<any> => {
	console.log('Received event:', JSON.stringify(event, null, 2));

	// Get the S3 bucket and key from the event
	const srcBucket = event.detail.bucket.name;
	const srcKey = decodeURIComponent(
		event.detail.object.key.replace(/\+/g, ' '),
	);
// 以下略

StepFunctions内のLambdaだと渡ってくるEventが少し違う。S3NotificationEventでひとまず型定義は一致したけど、正しい型がある気がする。

MIERUNEのZennブログ

Discussion