Description
Describe the bug
When using AnimateDiffPipeline with a motion adapter (e.g., guoyww/animatediff-motion-adapter-v1-5-2 with motion_max_seq_length=32) and requesting num_frames in the pipeline call that is greater than the adapter's motion_max_seq_length (e.g., num_frames=64), a RuntimeError: The size of tensor a (64) must match the size of tensor b (32) at non-singleton dimension 1 occurs. This suggests the pipeline is not correctly chunking the frames for the motion adapter.
Reproduction
import torch
import diffusers
from diffusers import AnimateDiffPipeline, MotionAdapter, DDIMScheduler, AutoencoderKL
from diffusers.utils import export_to_video
import os
Script Information
print(f"Using Diffusers version: {diffusers.version}")
target_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Target device: {target_device}")
Configuration
prompt = "A cute baby elephant walking through a lush green forest, cinematic lighting, high detail"
negative_prompt = "blurry, distorted, low quality, worst quality, ugly, deformed"
output_dir = r"animatediff_bug_report" # Temporary output dir
num_total_frames = 64 # Requesting more frames than adapter's capacity
fps = 8
motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-2"
base_model_id = "runwayml/stable-diffusion-v1-5"
vae_model_id = "stabilityai/sd-vae-ft-mse"
Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
pipe = None
adapter = None
vae = None
try:
print(f"Loading motion adapter: {motion_adapter_id}...")
adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=torch.float32)
# Verify adapter's expected frame length
adapter_native_frames = 0
if hasattr(adapter, 'config') and 'motion_max_seq_length' in adapter.config:
adapter_native_frames = adapter.config.motion_max_seq_length
print(f"Motion adapter's reported 'motion_max_seq_length': {adapter_native_frames if adapter_native_frames > 0 else 'Could not determine'}")
if adapter_native_frames == 0:
print("WARNING: Could not determine motion_max_seq_length from adapter config. This might be part of the issue if config loading failed.")
print(f"Loading VAE: {vae_model_id}...")
vae = AutoencoderKL.from_pretrained(vae_model_id, torch_dtype=torch.float32)
print(f"Loading AnimateDiffPipeline with base model: {base_model_id}...")
pipe = AnimateDiffPipeline.from_pretrained(
base_model_id,
motion_adapter=adapter,
vae=vae,
torch_dtype=torch.float32
)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
print(f"Moving pipeline to {target_device}...")
pipe.to(target_device)
if target_device.type == 'cuda':
print("Enabling model CPU offload...")
pipe.enable_model_cpu_offload()
print(f"Attempting to generate {num_total_frames} frames (adapter native: {adapter_native_frames})...")
video_frames = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=25, # Kept reasonably low for faster reproduction
guidance_scale=7.5,
num_frames=num_total_frames, # Requesting more frames than adapter capacity
).frames[0]
print(f"Successfully generated {len(video_frames)} frames.") # This line won't be reached if error occurs
# Save video (optional for bug report, but good for completeness if it ever worked)
# video_path = os.path.join(output_dir, f"output_video_{num_total_frames}frames.mp4")
# export_to_video(video_frames, video_path, fps=fps)
# print(f"Video saved to {video_path}")
except RuntimeError as e:
if "size of tensor a" in str(e) and "must match the size of tensor b" in str(e):
print(f"\n--- ERROR: Tensor Mismatch ---")
print(f"{e}")
print(f"Diffusers version: {diffusers.version}")
print(f"Requested total frames: {num_total_frames}")
print(f"Adapter's native motion_max_seq_length: {adapter_native_frames if adapter_native_frames > 0 else 'Undetermined'}")
print(f"This indicates a potential issue with the pipeline's frame chunking logic.")
else:
print(f"\n--- Runtime Error ---")
print(f"{e}")
import traceback
traceback.print_exc()
except Exception as e:
print(f"\n--- Unexpected Error ---")
print(f"{e}")
import traceback
traceback.print_exc()
finally:
print("\nCleaning up (simulated for bug report)...")
del pipe, adapter, vae
if torch.cuda.is_available():
torch.cuda.empty_cache()
print("Done.")
Logs
System Info
Diffusers version: 0.34.0.dev0
PyTorch version: 2.7.0+cu128
Python version: 3.12.4150
Platform: Windows 11
CUDA version: 12.9
GPU model: RTX 5080 windforce
Who can help?
No response