-
Notifications
You must be signed in to change notification settings - Fork 6.2k
Add UNet 1d for RL model for planning + colab #105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 54 commits
8d1a17c
84e94d7
f67b036
e42d1c0
2dd514e
b4c6188
c53bba9
effcbdb
7865231
35b0a43
9b1379d
e97a610
8642560
f58c915
ad8376d
3b08bea
aae2a9a
dd872af
9b67bb7
db012eb
4db6e0b
634a526
aebf547
305ecd8
42855b9
95d3a1c
6cbb73b
ffb7355
a6314f6
48a7414
3acddb5
713e8f2
268ebdf
daa05fb
ea5f231
4f7a3a4
d90b8b1
ad8b6cf
e06a4a4
99b2c81
de4b6e4
ef6ca1f
6e3485c
e6f1a83
c35a925
949b93a
2f6462b
a2dd559
39dff73
d5eedff
faeacd5
be25030
72b7ee8
cf76a2d
2290356
a061f7e
0c58758
691ddee
4948ca7
ac88677
ba204db
915c41e
c901889
becc803
9b8e5ee
3684a8c
ebdef16
a259aae
1f7702c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -163,4 +163,6 @@ tags | |
*.lock | ||
|
||
# DS_Store (MacOS) | ||
.DS_Store | ||
.DS_Store | ||
# RL pipelines may produce mp4 outputs | ||
*.mp4 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Overview | ||
|
||
These examples show how to run (Diffuser)[https://arxiv.org/abs/2205.09991] in Diffusers. | ||
There are four scripts, | ||
1. `run_diffuser_locomotion.py` to sample actions and run them in the environment, | ||
2. and `run_diffuser_gen_trajectories.py` to just sample actions from the pre-trained diffusion model. | ||
|
||
You will need some RL specific requirements to run the examples: | ||
|
||
``` | ||
pip install -f https://download.pytorch.org/whl/torch_stable.html \ | ||
free-mujoco-py \ | ||
einops \ | ||
gym \ | ||
natolambert marked this conversation as resolved.
Show resolved
Hide resolved
|
||
protobuf==3.20.1 \ | ||
git+https://github.com/rail-berkeley/d4rl.git \ | ||
mediapy \ | ||
Pillow==9.0.0 | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import d4rl # noqa | ||
import gym | ||
import tqdm | ||
from diffusers import ValueGuidedRLPipeline | ||
|
||
|
||
config = dict( | ||
n_samples=64, | ||
horizon=32, | ||
num_inference_steps=20, | ||
n_guide_steps=0, | ||
scale_grad_by_std=True, | ||
scale=0.1, | ||
eta=0.0, | ||
t_grad_cutoff=2, | ||
device="cpu", | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
env_name = "hopper-medium-v2" | ||
env = gym.make(env_name) | ||
|
||
pipeline = ValueGuidedRLPipeline.from_pretrained( | ||
"bglick13/hopper-medium-v2-value-function-hor32", | ||
env=env, | ||
) | ||
|
||
env.seed(0) | ||
obs = env.reset() | ||
total_reward = 0 | ||
total_score = 0 | ||
T = 1000 | ||
rollout = [obs.copy()] | ||
try: | ||
for t in tqdm.tqdm(range(T)): | ||
# Call the policy | ||
denorm_actions = pipeline(obs, planning_horizon=32) | ||
|
||
# execute action in environment | ||
next_observation, reward, terminal, _ = env.step(denorm_actions) | ||
score = env.get_normalized_score(total_reward) | ||
# update return | ||
total_reward += reward | ||
total_score += score | ||
print( | ||
f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:" | ||
f" {total_score}" | ||
) | ||
# save observations for rendering | ||
rollout.append(next_observation.copy()) | ||
|
||
obs = next_observation | ||
except KeyboardInterrupt: | ||
pass | ||
|
||
print(f"Total reward: {total_reward}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import d4rl # noqa | ||
import gym | ||
import tqdm | ||
from diffusers import ValueGuidedRLPipeline | ||
|
||
|
||
config = dict( | ||
n_samples=64, | ||
horizon=32, | ||
num_inference_steps=20, | ||
n_guide_steps=2, | ||
scale_grad_by_std=True, | ||
scale=0.1, | ||
eta=0.0, | ||
t_grad_cutoff=2, | ||
device="cpu", | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
env_name = "hopper-medium-v2" | ||
env = gym.make(env_name) | ||
|
||
pipeline = ValueGuidedRLPipeline.from_pretrained( | ||
"bglick13/hopper-medium-v2-value-function-hor32", | ||
env=env, | ||
) | ||
|
||
env.seed(0) | ||
obs = env.reset() | ||
total_reward = 0 | ||
total_score = 0 | ||
T = 1000 | ||
rollout = [obs.copy()] | ||
try: | ||
for t in tqdm.tqdm(range(T)): | ||
# call the policy | ||
denorm_actions = pipeline(obs, planning_horizon=32) | ||
|
||
# execute action in environment | ||
next_observation, reward, terminal, _ = env.step(denorm_actions) | ||
score = env.get_normalized_score(total_reward) | ||
# update return | ||
total_reward += reward | ||
total_score += score | ||
print( | ||
f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:" | ||
f" {total_score}" | ||
) | ||
# save observations for rendering | ||
rollout.append(next_observation.copy()) | ||
|
||
obs = next_observation | ||
except KeyboardInterrupt: | ||
pass | ||
|
||
print(f"Total reward: {total_reward}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import json | ||
import os | ||
|
||
import torch | ||
|
||
from diffusers import UNet1DModel | ||
|
||
|
||
os.makedirs("hub/hopper-medium-v2/unet/hor32", exist_ok=True) | ||
os.makedirs("hub/hopper-medium-v2/unet/hor128", exist_ok=True) | ||
|
||
os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) | ||
|
||
|
||
def unet(hor): | ||
if hor == 128: | ||
down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") | ||
block_out_channels = (32, 128, 256) | ||
up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D") | ||
|
||
elif hor == 32: | ||
down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") | ||
block_out_channels = (32, 64, 128, 256) | ||
up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D") | ||
model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-mediumv2-hor{hor}.torch") | ||
state_dict = model.state_dict() | ||
config = dict( | ||
down_block_types=down_block_types, | ||
block_out_channels=block_out_channels, | ||
up_block_types=up_block_types, | ||
layers_per_block=1, | ||
) | ||
hf_value_function = UNet1DModel(**config) | ||
print(f"length of state dict: {len(state_dict.keys())}") | ||
print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") | ||
mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) | ||
for k, v in mapping.items(): | ||
state_dict[v] = state_dict.pop(k) | ||
hf_value_function.load_state_dict(state_dict) | ||
|
||
torch.save(hf_value_function.state_dict(), f"hub/hopper-medium-v2/unet/hor{hor}/diffusion_pytorch_model.bin") | ||
with open(f"hub/hopper-medium-v2/unet/hor{hor}/config.json", "w") as f: | ||
json.dump(config, f) | ||
|
||
|
||
def value_function(): | ||
config = dict( | ||
in_channels=14, | ||
down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), | ||
up_block_types=(), | ||
out_block_type="ValueFunction", | ||
mid_block_type="ValueFunctionMidBlock1D", | ||
block_out_channels=(32, 64, 128, 256), | ||
layers_per_block=1, | ||
always_downsample=True, | ||
) | ||
|
||
model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch") | ||
state_dict = model | ||
hf_value_function = UNet1DModel(**config) | ||
print(f"length of state dict: {len(state_dict.keys())}") | ||
print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") | ||
|
||
mapping = dict((k, hfk) for k, hfk in zip(state_dict.keys(), hf_value_function.state_dict().keys())) | ||
for k, v in mapping.items(): | ||
state_dict[v] = state_dict.pop(k) | ||
|
||
hf_value_function.load_state_dict(state_dict) | ||
|
||
torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/value_function/diffusion_pytorch_model.bin") | ||
with open("hub/hopper-medium-v2/value_function/config.json", "w") as f: | ||
json.dump(config, f) | ||
|
||
|
||
if __name__ == "__main__": | ||
unet(32) | ||
# unet(128) | ||
value_function() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
|
||
|
||
if is_torch_available(): | ||
from .experimental import ValueGuidedRLPipeline | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea to add But we should not import from diffusers.experimental import .... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this convention. |
||
from .modeling_utils import ModelMixin | ||
from .models import AutoencoderKL, Transformer2DModel, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel | ||
from .optimization import ( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# 🧨 Diffusers Experimental | ||
|
||
We are adding experimental code to support novel applications and usages of the Diffusers library. | ||
Currently, the following experiments are supported: | ||
* Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .rl import ValueGuidedRLPipeline |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .value_guided_sampling import ValueGuidedRLPipeline |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very cool!