📝

Ec2でrinnaとopen-clamを動かすメモ

2023/07/03に公開

はじめに

AWSのEc2でrinnna/open-calmt等のLLMを動かすためのセットアップ手順

環境

  • AWS Ec2 p3.8xlage
  • Deep Learning AMI GPU PyTorch 2.0.0 (Amazon Linux 2) 20230406

セットアップ

1.yumアップデート & pathの追加

setup.sh
export PATH=~/.local/bin:$PATH
source ~/.bash_profile
sudo yum update -y
sudo yum install -y amazon-efs-utils # efsを使う場合
sudo yum remove -y openssl-devel #いらないかも
sudo yum install -y openssl11 openssl11-devel #いらないかも

sudo yum install git-all -y
sudo amazon-linux-extras install epel -y
sudo yum-config-manager --enable epel
sudo yum install git-lfs -y
exec $SHELL -l # shellの再起動

2.pythonライブラリ インストール

require.txt
#-------------rwkv用------------
ninja
pytorch-lightning==1.8.3
rwkv
#--------------------------------
deepspeed==0.8.3
transformers
peft
datasets
sentencepiece #rinna tokenizer用

pytorchを有効化後

source activate pytorch

一応pythonのバージョン確認

python -V #Python 3.10.11
pip -V #pip 23.1.2 from /opt/conda/envs/pytorch/lib/python3.10/site-packages/pip (python 3.10)

インストール

pip install -r require.txt

rinnaと遊ぶ

rinna_chat.py
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
#モデルの重みデータ保存場所変更
#デフォルトは~/.cache/transeformers/
os.environ['TRANSFORMERS_CACHE'] = '/mnt/efs/llm/cache'

tokenizer = AutoTokenizer.from_pretrained(
   "rinna/japanese-gpt-neox-3.6b-instruction-sft", use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
   "rinna/japanese-gpt-neox-3.6b-instruction-sft")

prompt_log = []

if torch.cuda.is_available():
   model = model.to("cuda")


while True:
   prompt_log = []
   input_line = str(input("入力:"))

   if input_line == "+reset":
       prompt_log = []

   elif input_line == "+exit":
       break

   else:
       prompt_log.append({
           "speaker": "ユーザー",
           "text": input_line
       })
       # print(prompt_log)
       prompt = [
           f"{uttr['speaker']}: {uttr['text']}"
           for uttr in prompt_log
       ]
       # print(prompt)
       prompt = "<NL>".join(prompt)
       prompt = (
           prompt
           + "<NL>"
           + "システム: "
       )
       # print(prompt)
       token_ids = tokenizer.encode(
           prompt, add_special_tokens=False, return_tensors="pt")

       with torch.no_grad():
           output_ids = model.generate(
               token_ids.to(model.device),
               do_sample=True,
               max_new_tokens=128,
               temperature=0.7,
               pad_token_id=tokenizer.pad_token_id,
               bos_token_id=tokenizer.bos_token_id,
               eos_token_id=tokenizer.eos_token_id
           )

       output = tokenizer.decode(output_ids.tolist()[0][token_ids.size(1):])
       output = output.replace("<NL>", "\n")
       prompt_log.append({
           "speaker": "システム",
           "text": output
       })
       print("出力:" + output)

open-calmと遊ぶ

calm-chat.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
   "cyberagent/open-calm-7b", device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("cyberagent/open-calm-7b")


while True:
   input_line = str(input("入力:"))

   if input_line == "+exit":
       break

   else:

       inputs = tokenizer(input_line, return_tensors="pt").to(model.device)
       with torch.no_grad():
           tokens = model.generate(
               **inputs,
               max_new_tokens=256,
               do_sample=True,
               temperature=0.7,
               pad_token_id=tokenizer.pad_token_id,
           )

       output = tokenizer.decode(tokens[0], skip_special_tokens=True)
       print("出力:" + output)

Discussion