Open12
Stable-Diffusion を適当に変換試行する
git clone https://github.com/CompVis/stable-diffusion.git
cd stable-diffusion
git checkout 69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc
xhost +local: && \
docker run --gpus all -it --rm \
-v `pwd`:/home/user/workdir \
-v /tmp/.X11-unix/:/tmp/.X11-unix:rw \
--device /dev/video0:/dev/video0:mwr \
--net=host \
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
-e DISPLAY=$DISPLAY \
--privileged \
ghcr.io/pinto0309/openvino2tensorflow:latest
pip install \
transformers==4.19.2 \
diffusers==0.2.4 \
invisible-watermark==0.1.5 \
omegaconf==2.2.3 \
einops==0.4.1 \
pytorch_lightning==1.7.4 \
taming-transformers-rom1504==0.0.6 \
clip==0.2.0 \
kornia==0.6.7
pip install -e .
docker ps -a
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
97ea63046add ghcr.io/pinto0309/openvino2tensorflow:latest "bash" 2 minutes ago Up 2 minutes charming_chaum
docker commit charming_chaum pinto0309/stablediffusion_export
docker push pinto0309/stablediffusion_export:latest
mkdir -p models/ldm/stable-diffusion-v1/
cd models/ldm/stable-diffusion-v1/
- stable-diffusion のチェックポイント
https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt
https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4-full-ema.ckpt
mv sd-v1-4.ckpt model.ckpt
cd ../../..
python scripts/txt2img.py --prompt "a photograph of an cat into a bottle" --plms
RuntimeError: CUDA out of memory. Tried to allocate 1.50 GiB (GPU 0; 7.77 GiB total capacity; 5.62 GiB already allocated; 418.62 MiB free; 5.78 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
exit
xhost +local: && \
docker run -it --rm \
-v `pwd`:/home/user/workdir \
-v /tmp/.X11-unix/:/tmp/.X11-unix:rw \
--device /dev/video0:/dev/video0:mwr \
--net=host \
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
-e DISPLAY=$DISPLAY \
--privileged \
pinto0309/stablediffusion_export:latest
scripts/txt2img.py
def load_model_from_config(config, ckpt, verbose=False):
print(f"Loading model from {ckpt}")
pl_sd = torch.load(ckpt, map_location="cpu")
if "global_step" in pl_sd:
print(f"Global Step: {pl_sd['global_step']}")
sd = pl_sd["state_dict"]
model = instantiate_from_config(config.model)
m, u = model.load_state_dict(sd, strict=False)
if len(m) > 0 and verbose:
print("missing keys:")
print(m)
if len(u) > 0 and verbose:
print("unexpected keys:")
print(u)
model.cuda()
model.eval()
return model
scripts/txt2img.py
def load_model_from_config(config, ckpt, verbose=False):
print(f"Loading model from {ckpt}")
pl_sd = torch.load(ckpt, map_location="cpu")
if "global_step" in pl_sd:
print(f"Global Step: {pl_sd['global_step']}")
sd = pl_sd["state_dict"]
model = instantiate_from_config(config.model)
m, u = model.load_state_dict(sd, strict=False)
if len(m) > 0 and verbose:
print("missing keys:")
print(m)
if len(u) > 0 and verbose:
print("unexpected keys:")
print(u)
model.cpu()
model.eval()
return model
ldm/modules/encoders/modules.py
class FrozenCLIPEmbedder(AbstractEncoder):
"""Uses the CLIP transformer encoder for text (from Hugging Face)"""
def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77):
super().__init__()
self.tokenizer = CLIPTokenizer.from_pretrained(version)
self.transformer = CLIPTextModel.from_pretrained(version)
self.device = device
self.max_length = max_length
self.freeze()
ldm/modules/encoders/modules.py
class FrozenCLIPEmbedder(AbstractEncoder):
"""Uses the CLIP transformer encoder for text (from Hugging Face)"""
def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77):
super().__init__()
self.tokenizer = CLIPTokenizer.from_pretrained(version)
self.transformer = CLIPTextModel.from_pretrained(version)
self.device = device
self.max_length = max_length
self.freeze()
ldm/models/diffusion/plms.py
class PLMSSampler(object):
def __init__(self, model, schedule="linear", **kwargs):
super().__init__()
self.model = model
self.ddpm_num_timesteps = model.num_timesteps
self.schedule = schedule
def register_buffer(self, name, attr):
if type(attr) == torch.Tensor:
if attr.device != torch.device("cuda"):
attr = attr.to(torch.device("cuda"))
setattr(self, name, attr)
ldm/models/diffusion/plms.py
class PLMSSampler(object):
def __init__(self, model, schedule="linear", **kwargs):
super().__init__()
self.model = model
self.ddpm_num_timesteps = model.num_timesteps
self.schedule = schedule
def register_buffer(self, name, attr):
if type(attr) == torch.Tensor:
if attr.device != torch.device("cuda"):
pass
setattr(self, name, attr)
- 推論テスト
python scripts/txt2img.py --prompt "a photograph of an cat into a bottle" --plms