Closed8
Dify での生成 AI アプリケーション構築ワークショップ by AWSをやる
CFnで環境構築と、Bedrockでモデルの有効化
作ってるものはVPC, EC2くらい。
UserDataでdocker-composeしている。

UserData
UserData:
Fn::Base64: |
#!/bin/bash
max_attempts=5
attempt_num=1
success=false
while [ $success = false ] && [ $attempt_num -le $max_attempts ]; do
sudo dnf install -y git docker
if [ $? -eq 0 ]; then
echo "dnf install succeeded"
success=true
else
echo "dnf install $attempt_num failed. trying again..."
sleep 3
((attempt_num++))
fi
done
sudo systemctl start docker
sudo gpasswd -a ec2-user docker
sudo gpasswd -a ssm-user docker
sudo chgrp docker /var/run/docker.sock
sudo service docker restart
sudo systemctl enable docker
sudo curl -L "https://github.com/docker/compose/releases/download/v2.28.1/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
cd /opt
sudo git clone https://github.com/langgenius/dify.git
cd /opt/dify
sudo git checkout 0.9.1-fix1
sudo git pull origin 0.9.1-fix1
cd /opt/dify/docker
sudo cp .env.example .env
docker-compose up -d
初期設定
この辺は画面に沿って進める。
バージョン違いか微妙に画面違ったりするがそのまま。
とりあえずここでEC2に入ってみる。
AmazonSSMManagedInstanceCoreが付与されているのでstart-sessionでログインできる。
動いているコンテナは以下
[root@ip-192-168-0-153 dify]# docker container ls
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
a63e6182672d nginx:latest "sh -c 'cp /docker-e…" 16 minutes ago Up 16 minutes 0.0.0.0:80->80/tcp, :::80->80/tcp, 0.0.0.0:443->443/tcp, :::443->443/tcp docker-nginx-1
6d7781449334 langgenius/dify-api:0.9.1-fix1 "/bin/bash /entrypoi…" 16 minutes ago Up 16 minutes 5001/tcp docker-worker-1
f01fd92bba03 langgenius/dify-api:0.9.1-fix1 "/bin/bash /entrypoi…" 16 minutes ago Up 16 minutes 5001/tcp docker-api-1
b7f270c3a6ca postgres:15-alpine "docker-entrypoint.s…" 16 minutes ago Up 16 minutes (healthy) 5432/tcp docker-db-1
ccb44147d465 langgenius/dify-web:0.9.1-fix1 "/bin/sh ./entrypoin…" 16 minutes ago Up 16 minutes 3000/tcp docker-web-1
0993492cd43f semitechnologies/weaviate:1.19.0 "/bin/weaviate --hos…" 16 minutes ago Up 16 minutes docker-weaviate-1
29036cce0a37 langgenius/dify-sandbox:0.2.9 "/main" 16 minutes ago Up 16 minutes (healthy) docker-sandbox-1
b50bcdb85c2e ubuntu/squid:latest "sh -c 'cp /docker-e…" 16 minutes ago Up 16 minutes 3128/tcp docker-ssrf_proxy-1
0048aba15fd0 redis:6-alpine "docker-entrypoint.s…" 16 minutes ago Up 16 minutes (healthy) 6379/tcp docker-redis-1
[root@ip-192-168-0-153 dify]#
一応リポジトリに絵もあった。
チャットbotの開発を進める
いったん一通り動かした。
- 基本的にシンプルで色々便利そう。ワークフロー使い勝手良さそう。頑張ってスクリプト作ってRAGもどき作ってた時代とは大違い
- だけどapp管理は大変そう。grafana dashboard管理みたいな気配
- あと精度の調整も大変そう。ユースケース複雑になると不意に動かなくなったりするんだろうなあ
postgresqlの中身
postgres=# \c dify
You are now connected to database "dify" as user "postgres".
dify=# \d
List of relations
Schema | Name | Type | Owner
--------+-----------------------------------+----------+----------
public | account_integrates | table | postgres
public | accounts | table | postgres
public | alembic_version | table | postgres
public | api_based_extensions | table | postgres
public | api_requests | table | postgres
public | api_tokens | table | postgres
public | app_annotation_hit_histories | table | postgres
public | app_annotation_settings | table | postgres
public | app_dataset_joins | table | postgres
public | app_model_configs | table | postgres
public | apps | table | postgres
public | celery_taskmeta | table | postgres
public | celery_tasksetmeta | table | postgres
public | conversations | table | postgres
public | data_source_api_key_auth_bindings | table | postgres
public | data_source_oauth_bindings | table | postgres
public | dataset_collection_bindings | table | postgres
public | dataset_keyword_tables | table | postgres
public | dataset_permissions | table | postgres
public | dataset_process_rules | table | postgres
public | dataset_queries | table | postgres
public | dataset_retriever_resources | table | postgres
public | datasets | table | postgres
public | dify_setups | table | postgres
public | document_segments | table | postgres
public | documents | table | postgres
public | embeddings | table | postgres
public | end_users | table | postgres
public | external_knowledge_apis | table | postgres
public | external_knowledge_bindings | table | postgres
public | installed_apps | table | postgres
public | invitation_codes | table | postgres
public | invitation_codes_id_seq | sequence | postgres
public | load_balancing_model_configs | table | postgres
public | message_agent_thoughts | table | postgres
public | message_annotations | table | postgres
public | message_chains | table | postgres
public | message_feedbacks | table | postgres
public | message_files | table | postgres
public | messages | table | postgres
public | operation_logs | table | postgres
public | pinned_conversations | table | postgres
public | provider_model_settings | table | postgres
public | provider_models | table | postgres
public | provider_orders | table | postgres
public | providers | table | postgres
public | recommended_apps | table | postgres
public | saved_messages | table | postgres
public | sites | table | postgres
public | tag_bindings | table | postgres
public | tags | table | postgres
public | task_id_sequence | sequence | postgres
public | taskset_id_sequence | sequence | postgres
public | tenant_account_joins | table | postgres
public | tenant_default_models | table | postgres
public | tenant_preferred_model_providers | table | postgres
public | tenants | table | postgres
public | tool_api_providers | table | postgres
public | tool_builtin_providers | table | postgres
public | tool_conversation_variables | table | postgres
public | tool_files | table | postgres
public | tool_label_bindings | table | postgres
public | tool_model_invokes | table | postgres
public | tool_providers | table | postgres
public | tool_published_apps | table | postgres
public | tool_workflow_providers | table | postgres
public | trace_app_config | table | postgres
public | upload_files | table | postgres
public | workflow_app_logs | table | postgres
public | workflow_conversation_variables | table | postgres
public | workflow_node_executions | table | postgres
public | workflow_runs | table | postgres
public | workflows | table | postgres
(73 rows)
dify=#
weaviateのschema
/ # curl -H "Authorization: Bearer $WEAVIATE_API_KEY" http://localhost:8080/v1/schema | jq
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 3855 0 3855 0 0 118k 0 --:--:-- --:--:-- --:--:-- 121k
{
"classes": [
{
"class": "Vector_index_51f07ef0_e19f_41d5_9a3e_0b6443bd7172_Node",
"invertedIndexConfig": {
"bm25": {
"b": 0.75,
"k1": 1.2
},
"cleanupIntervalSeconds": 60,
"stopwords": {
"additions": null,
"preset": "en",
"removals": null
}
},
"properties": [
{
"dataType": [
"text"
],
"indexFilterable": true,
"indexSearchable": true,
"name": "text",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:24:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "document_id",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:24:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "dataset_id",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:24:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "doc_id",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:24:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "doc_hash",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:24:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "source",
"tokenization": "word"
},
{
"dataType": [
"number"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:24:24 2025",
"indexFilterable": true,
"indexSearchable": false,
"name": "page"
}
],
"replicationConfig": {
"factor": 1
},
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"actualCount": 1,
"desiredVirtualCount": 128,
"actualVirtualCount": 128,
"key": "_id",
"strategy": "hash",
"function": "murmur3"
},
"vectorIndexConfig": {
"skip": false,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine",
"pq": {
"enabled": false,
"bitCompression": false,
"segments": 0,
"centroids": 256,
"encoder": {
"type": "kmeans",
"distribution": "log-normal"
}
}
},
"vectorIndexType": "hnsw",
"vectorizer": "none"
},
{
"class": "Vector_index_4de96e9f_d1b9_4206_813a_bdd21de04cb7_Node",
"invertedIndexConfig": {
"bm25": {
"b": 0.75,
"k1": 1.2
},
"cleanupIntervalSeconds": 60,
"stopwords": {
"additions": null,
"preset": "en",
"removals": null
}
},
"properties": [
{
"dataType": [
"text"
],
"indexFilterable": true,
"indexSearchable": true,
"name": "text",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:31:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "annotation_id",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:31:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "app_id",
"tokenization": "word"
},
{
"dataType": [
"text"
],
"description": "This property was generated by Weaviate's auto-schema feature on Sun Feb 2 05:31:24 2025",
"indexFilterable": true,
"indexSearchable": true,
"name": "doc_id",
"tokenization": "word"
}
],
"replicationConfig": {
"factor": 1
},
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"actualCount": 1,
"desiredVirtualCount": 128,
"actualVirtualCount": 128,
"key": "_id",
"strategy": "hash",
"function": "murmur3"
},
"vectorIndexConfig": {
"skip": false,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine",
"pq": {
"enabled": false,
"bitCompression": false,
"segments": 0,
"centroids": 256,
"encoder": {
"type": "kmeans",
"distribution": "log-normal"
}
}
},
"vectorIndexType": "hnsw",
"vectorizer": "none"
}
]
}
/ #
WeaviateのClaude解説
このデータはWeaviateのスキーマ定義で、2つのベクトルインデックスクラスが定義されています:
Vector_index_51f07ef0_e19f_41d5_9a3e_0b6443bd7172_Node
- ドキュメント関連のデータを格納するクラス
- プロパティ:
- text: テキストデータ
- document_id: ドキュメントID
- dataset_id: データセットID
- doc_id: ドキュメントID
- doc_hash: ドキュメントのハッシュ値
- source: ソース情報
- page: ページ番号
Vector_index_4de96e9f_d1b9_4206_813a_bdd21de04cb7_Node
- アノテーション関連のデータを格納するクラス
- プロパティ:
- text: テキストデータ
- annotation_id: アノテーションID
- app_id: アプリケーションID
- doc_id: ドキュメントID
両クラスとも以下の特徴があります:
- HNSWベクトルインデックスを使用
- コサイン距離でベクトル類似度を計算
- BM25による検索機能を実装
- 英語のストップワードを使用
- ワードベースのトークン化を採用
- 全てのテキストフィールドは検索可能かつフィルタリング可能
これは典型的なドキュメント管理システムのスキーマで、ドキュメントとそれに関連するアノテーションを管理するための構造を持っています。
最後はCFn削除して完了。
お疲れ様でした。
このスクラップは2025/02/02にクローズされました