Skip to content

AnimateDiffPipeline: Tensor Mismatch with num_frames > motion_max_seq_length (chunking failure) #11615

Open
@bpaviral

Description

@bpaviral

Describe the bug

When using AnimateDiffPipeline with a motion adapter (e.g., guoyww/animatediff-motion-adapter-v1-5-2 with motion_max_seq_length=32) and requesting num_frames in the pipeline call that is greater than the adapter's motion_max_seq_length (e.g., num_frames=64), a RuntimeError: The size of tensor a (64) must match the size of tensor b (32) at non-singleton dimension 1 occurs. This suggests the pipeline is not correctly chunking the frames for the motion adapter.

Reproduction

import torch
import diffusers
from diffusers import AnimateDiffPipeline, MotionAdapter, DDIMScheduler, AutoencoderKL
from diffusers.utils import export_to_video
import os

Script Information

print(f"Using Diffusers version: {diffusers.version}")
target_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Target device: {target_device}")

Configuration

prompt = "A cute baby elephant walking through a lush green forest, cinematic lighting, high detail"
negative_prompt = "blurry, distorted, low quality, worst quality, ugly, deformed"
output_dir = r"animatediff_bug_report" # Temporary output dir
num_total_frames = 64 # Requesting more frames than adapter's capacity
fps = 8

motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-2"
base_model_id = "runwayml/stable-diffusion-v1-5"
vae_model_id = "stabilityai/sd-vae-ft-mse"

Ensure output directory exists

os.makedirs(output_dir, exist_ok=True)

pipe = None
adapter = None
vae = None

try:
print(f"Loading motion adapter: {motion_adapter_id}...")
adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=torch.float32)

# Verify adapter's expected frame length
adapter_native_frames = 0
if hasattr(adapter, 'config') and 'motion_max_seq_length' in adapter.config:
    adapter_native_frames = adapter.config.motion_max_seq_length
print(f"Motion adapter's reported 'motion_max_seq_length': {adapter_native_frames if adapter_native_frames > 0 else 'Could not determine'}")
if adapter_native_frames == 0:
    print("WARNING: Could not determine motion_max_seq_length from adapter config. This might be part of the issue if config loading failed.")


print(f"Loading VAE: {vae_model_id}...")
vae = AutoencoderKL.from_pretrained(vae_model_id, torch_dtype=torch.float32)

print(f"Loading AnimateDiffPipeline with base model: {base_model_id}...")
pipe = AnimateDiffPipeline.from_pretrained(
    base_model_id,
    motion_adapter=adapter,
    vae=vae,
    torch_dtype=torch.float32
)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)

print(f"Moving pipeline to {target_device}...")
pipe.to(target_device)

if target_device.type == 'cuda':
    print("Enabling model CPU offload...")
    pipe.enable_model_cpu_offload()

print(f"Attempting to generate {num_total_frames} frames (adapter native: {adapter_native_frames})...")
video_frames = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=25, # Kept reasonably low for faster reproduction
    guidance_scale=7.5,
    num_frames=num_total_frames, # Requesting more frames than adapter capacity
).frames[0]

print(f"Successfully generated {len(video_frames)} frames.") # This line won't be reached if error occurs

# Save video (optional for bug report, but good for completeness if it ever worked)
# video_path = os.path.join(output_dir, f"output_video_{num_total_frames}frames.mp4")
# export_to_video(video_frames, video_path, fps=fps)
# print(f"Video saved to {video_path}")

except RuntimeError as e:
if "size of tensor a" in str(e) and "must match the size of tensor b" in str(e):
print(f"\n--- ERROR: Tensor Mismatch ---")
print(f"{e}")
print(f"Diffusers version: {diffusers.version}")
print(f"Requested total frames: {num_total_frames}")
print(f"Adapter's native motion_max_seq_length: {adapter_native_frames if adapter_native_frames > 0 else 'Undetermined'}")
print(f"This indicates a potential issue with the pipeline's frame chunking logic.")
else:
print(f"\n--- Runtime Error ---")
print(f"{e}")
import traceback
traceback.print_exc()
except Exception as e:
print(f"\n--- Unexpected Error ---")
print(f"{e}")
import traceback
traceback.print_exc()
finally:
print("\nCleaning up (simulated for bug report)...")
del pipe, adapter, vae
if torch.cuda.is_available():
torch.cuda.empty_cache()
print("Done.")

Logs

System Info

Diffusers version: 0.34.0.dev0
PyTorch version: 2.7.0+cu128
Python version: 3.12.4150
Platform: Windows 11
CUDA version: 12.9
GPU model: RTX 5080 windforce

Who can help?

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions