📝
Ec2でrinnaとopen-clamを動かすメモ
はじめに
AWSのEc2でrinnna/open-calmt等のLLMを動かすためのセットアップ手順
環境
- AWS Ec2 p3.8xlage
- Deep Learning AMI GPU PyTorch 2.0.0 (Amazon Linux 2) 20230406
セットアップ
1.yumアップデート & pathの追加
setup.sh
export PATH=~/.local/bin:$PATH
source ~/.bash_profile
sudo yum update -y
sudo yum install -y amazon-efs-utils # efsを使う場合
sudo yum remove -y openssl-devel #いらないかも
sudo yum install -y openssl11 openssl11-devel #いらないかも
sudo yum install git-all -y
sudo amazon-linux-extras install epel -y
sudo yum-config-manager --enable epel
sudo yum install git-lfs -y
exec $SHELL -l # shellの再起動
2.pythonライブラリ インストール
require.txt
#-------------rwkv用------------
ninja
pytorch-lightning==1.8.3
rwkv
#--------------------------------
deepspeed==0.8.3
transformers
peft
datasets
sentencepiece #rinna tokenizer用
pytorchを有効化後
source activate pytorch
一応pythonのバージョン確認
python -V #Python 3.10.11
pip -V #pip 23.1.2 from /opt/conda/envs/pytorch/lib/python3.10/site-packages/pip (python 3.10)
インストール
pip install -r require.txt
rinnaと遊ぶ
rinna_chat.py
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
#モデルの重みデータ保存場所変更
#デフォルトは~/.cache/transeformers/
os.environ['TRANSFORMERS_CACHE'] = '/mnt/efs/llm/cache'
tokenizer = AutoTokenizer.from_pretrained(
"rinna/japanese-gpt-neox-3.6b-instruction-sft", use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
"rinna/japanese-gpt-neox-3.6b-instruction-sft")
prompt_log = []
if torch.cuda.is_available():
model = model.to("cuda")
while True:
prompt_log = []
input_line = str(input("入力:"))
if input_line == "+reset":
prompt_log = []
elif input_line == "+exit":
break
else:
prompt_log.append({
"speaker": "ユーザー",
"text": input_line
})
# print(prompt_log)
prompt = [
f"{uttr['speaker']}: {uttr['text']}"
for uttr in prompt_log
]
# print(prompt)
prompt = "<NL>".join(prompt)
prompt = (
prompt
+ "<NL>"
+ "システム: "
)
# print(prompt)
token_ids = tokenizer.encode(
prompt, add_special_tokens=False, return_tensors="pt")
with torch.no_grad():
output_ids = model.generate(
token_ids.to(model.device),
do_sample=True,
max_new_tokens=128,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id,
bos_token_id=tokenizer.bos_token_id,
eos_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(output_ids.tolist()[0][token_ids.size(1):])
output = output.replace("<NL>", "\n")
prompt_log.append({
"speaker": "システム",
"text": output
})
print("出力:" + output)
open-calmと遊ぶ
calm-chat.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(
"cyberagent/open-calm-7b", device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("cyberagent/open-calm-7b")
while True:
input_line = str(input("入力:"))
if input_line == "+exit":
break
else:
inputs = tokenizer(input_line, return_tensors="pt").to(model.device)
with torch.no_grad():
tokens = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id,
)
output = tokenizer.decode(tokens[0], skip_special_tokens=True)
print("出力:" + output)
Discussion