add control net support (#156)

Jackwensen · web-flow · commit e8159df11f6f · 2023-03-27T15:15:03.000+08:00
还有一些问题需要讨论和确定 1. 为支持Control Net需要的升级： - huggingface官方的 [diffusers 0.14.0](https://github.com/huggingface/diffusers/releases/tag/v0.14.0) 支持了Control Net pipeline。所以我们要支持的话也需要安装0.14.0版本的diffusers `python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.14.0"` - canny example 中为了检测Canny edge用到了 cv2，需要安装 `pip install opencv-contrib-python` 2. 可能还需要测试升级diffusers版本后对其他pipeline的影响 3. 第一次提PR不知还有什么步骤需要完善 ### Reference - https://huggingface.co/docs/diffusers/v0.14.0/en/api/pipelines/stable_diffusion/controlnet - huggingface/diffusers#2407
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ Please refer to this [wiki](https://github.com/Oneflow-Inc/diffusers/wiki/How-to
 ## Quick demo
 
 ```
-python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.12.1"
+python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.14.0"
 python3 -m pip uninstall accelerate -y
 python3 -m pip install -U onediff
 python3 -m onediff.demo
@@ -28,7 +28,7 @@ OneFlow's main [repo](https://github.com/Oneflow-Inc/oneflow)
 ```
 git clone https://github.com/Oneflow-Inc/diffusers.git onediff
 cd onediff
-python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.12.1"
+python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.14.0"
 python3 -m pip uninstall accelerate -y
 python3 -m pip install -e .
 ```
@@ -45,7 +45,7 @@ python3 -m pip uninstall diffusers -y
 2. install transformers and diffusers
 
 ```
-python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.12.1"
+python3 -m pip install "transformers>=4.26" "diffusers[torch]==0.14.0"
 python3 -m pip uninstall accelerate -y
 ```
 
diff --git a/examples/control_net_canny.py b/examples/control_net_canny.py
@@ -0,0 +1,45 @@
+import cv2
+from PIL import Image
+import numpy as np
+
+from onediff import OneFlowStableDiffusionControlNetPipeline
+
+import oneflow as flow
+flow.mock_torch.enable()
+
+from diffusers.utils import load_image
+from diffusers import ControlNetModel
+
+image = load_image(
+    "http://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
+)
+
+image = np.array(image)
+
+low_threshold = 100
+high_threshold = 200
+
+image = cv2.Canny(image, low_threshold, high_threshold)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+canny_image = Image.fromarray(image)
+
+controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=flow.float16)
+    
+pipe = OneFlowStableDiffusionControlNetPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=flow.float16
+)
+
+pipe.to("cuda")
+
+generator = flow.manual_seed(0)
+
+prompt = "disco dancer with colorful lights, best quality, extremely detailed"
+negative_prompt = "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"
+
+
+out_images = pipe(
+    prompt = prompt, negative_prompt=negative_prompt, num_inference_steps=20, generator=generator, image=canny_image
+).images
+for i, image in enumerate(out_images):
+    image.save(f"{prompt}-of-{i}.png")
diff --git a/examples/image_to_image.py b/examples/image_to_image.py
@@ -1,7 +1,8 @@
-import oneflow as flow
 from PIL import Image
-flow.mock_torch.enable()
+
 from onediff import OneFlowStableDiffusionImg2ImgPipeline
+import oneflow as flow
+flow.mock_torch.enable()
 
 pipe = OneFlowStableDiffusionImg2ImgPipeline.from_pretrained(
     "stabilityai/stable-diffusion-2",
@@ -22,7 +23,6 @@
         image=img,
         guidance_scale=10,
         num_inference_steps=100,
-        compile_unet=False,
         output_type="np",
     ).images
     for i, image in enumerate(images):
diff --git a/examples/text_to_image.py b/examples/text_to_image.py
@@ -1,7 +1,7 @@
-import oneflow as flow
+from onediff import OneFlowStableDiffusionPipeline
 
+import oneflow as flow
 flow.mock_torch.enable()
-from onediff import OneFlowStableDiffusionPipeline
 
 pipe = OneFlowStableDiffusionPipeline.from_pretrained(
     "CompVis/stable-diffusion-v1-4",
diff --git a/examples/text_to_image_alt.py b/examples/text_to_image_alt.py
@@ -1,8 +1,7 @@
-import oneflow as flow
-
-flow.mock_torch.enable()
 from onediff import OneFlowAltDiffusionPipeline
 
+import oneflow as flow
+flow.mock_torch.enable()
 
 pipe = OneFlowAltDiffusionPipeline.from_pretrained("BAAI/AltDiffusion-m9", torch_dtype=flow.float16)
 pipe = pipe.to("cuda")
diff --git a/examples/text_to_image_dpmsolver.py b/examples/text_to_image_dpmsolver.py
@@ -1,8 +1,9 @@
-import oneflow as flow
+from onediff import OneFlowStableDiffusionPipeline
 
+import oneflow as flow
 flow.mock_torch.enable()
+
 from diffusers import DPMSolverMultistepScheduler
-from onediff import OneFlowStableDiffusionPipeline
 
 model_id = "CompVis/stable-diffusion-v1-4"
 
diff --git a/examples/text_to_image_inpaint.py b/examples/text_to_image_inpaint.py
@@ -1,22 +1,14 @@
-import oneflow as flow
-flow.mock_torch.enable()
-
-import PIL
-import requests
-from io import BytesIO
 from onediff import OneFlowStableDiffusionInpaintPipeline
 
+from diffusers.utils import load_image
 
-def download_image(url):
-    response = requests.get(url)
-    return PIL.Image.open(BytesIO(response.content)).convert("RGB")
-
+import oneflow as flow
+flow.mock_torch.enable()
 
 img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
 mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
-init_image = download_image(img_url).resize((512, 512))
-mask_image = download_image(mask_url).resize((512, 512))
-
+init_image = load_image(img_url).resize((512, 512))
+mask_image = load_image(mask_url).resize((512, 512))
 pipe = OneFlowStableDiffusionInpaintPipeline.from_pretrained(
     "runwayml/stable-diffusion-inpainting",
     torch_dtype=flow.float16,
diff --git a/examples/text_to_image_sd2.py b/examples/text_to_image_sd2.py
@@ -1,8 +1,9 @@
+from onediff import OneFlowStableDiffusionPipeline
+
 import oneflow as flow
 flow.mock_torch.enable()
 
 from diffusers import EulerDiscreteScheduler
-from onediff import OneFlowStableDiffusionPipeline
 
 model_id = "stabilityai/stable-diffusion-2"
 # Use the Euler scheduler here instead
diff --git a/src/onediff/__init__.py b/src/onediff/__init__.py
@@ -31,4 +31,5 @@ def dummy_randn(*args, **kwargs):
 from .pipeline_stable_diffusion_inpaint_oneflow import (
     OneFlowStableDiffusionInpaintPipeline,
 )
+from .pipeline_stable_diffusion_controlnet_oneflow import OneFlowStableDiffusionControlNetPipeline
 
diff --git a/src/onediff/pipeline_stable_diffusion_controlnet_oneflow.py b/src/onediff/pipeline_stable_diffusion_controlnet_oneflow.py
diff --git a/src/onediff/pipeline_stable_diffusion_img2img_oneflow.py b/src/onediff/pipeline_stable_diffusion_img2img_oneflow.py

Original file line number	Diff line number	Diff line change
`@@ -31,4 +31,5 @@ def dummy_randn(args, *kwargs):`
`31`	`31`	`from .pipeline_stable_diffusion_inpaint_oneflow import (`
`32`	`32`	`OneFlowStableDiffusionInpaintPipeline,`
`33`	`33`	`)`
	`34`	`+from .pipeline_stable_diffusion_controlnet_oneflow import OneFlowStableDiffusionControlNetPipeline`
`34`	`35`