🐕
RustでAWSのAthenaで一覧を大量に楽して取得したい
目的
ユニークビジョン株式会社 Advent Calendar 2024の12/2の記事です。
RustでAWSのS3で一覧を大量に楽して取得したい
RustでAWSのDynamoDBで一覧を大量に楽して取得したい
Athena版です。他の記事と同じやり方です。
コード
Cargo.toml
[package]
name = "athena_fold"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1"
aws-config = "1.5.10"
aws-sdk-athena = "1.53.0"
aws-smithy-types-convert = { version = "0.60.2", features = ["convert-streams"] }
futures = "0.3.31"
futures-util = "0.3.31"
thiserror = "2"
tokio = { version="1.41.1", features=["macros", "rt-multi-thread", "fs", "time"] }
main.rs
use aws_config::BehaviorVersion;
use aws_sdk_athena::operation::get_query_results::GetQueryResultsOutput;
use aws_smithy_types_convert::stream::PaginationStreamExt;
use futures::Stream;
use futures_util::stream::TryStreamExt;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("GetQueryResultsError {0}")]
GetQueryResultsError(
#[from]
aws_sdk_athena::error::SdkError<
aws_sdk_athena::operation::get_query_results::GetQueryResultsError,
>,
),
}
fn get_query_results_stream(
client: &aws_sdk_athena::Client,
execution_id: &str,
) -> impl Stream<Item = Result<GetQueryResultsOutput, Error>> {
client
.get_query_results()
.set_query_execution_id(Some(execution_id.to_owned()))
.into_paginator()
.send()
.into_stream_03x()
.err_into()
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
let client = aws_sdk_athena::Client::new(&config);
let query_execution_id = "xxxxx".to_string();
let mut stream = get_query_results_stream(&client, &query_execution_id);
while let Some(value) = stream.try_next().await? {
let Some(result_set) = value.result_set() else {
continue;
};
for row in result_set.rows() {
let mut line = vec![];
for datum in row.data() {
line.push(datum.var_char_value().unwrap_or("").to_string());
}
}
}
Ok(())
}
Discussion