From 98223cf899b207c13c7e1a74120b7dbda8484b7f Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Thu, 13 Jun 2024 18:28:20 +0800 Subject: [PATCH] added camera motion to gradio (#135) --- gradio/app.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/gradio/app.py b/gradio/app.py index d70d682..844e076 100644 --- a/gradio/app.py +++ b/gradio/app.py @@ -197,7 +197,7 @@ device = torch.device("cuda") vae, text_encoder, stdit, scheduler = build_models(args.model_type, config, enable_optimization=args.enable_optimization) -def run_inference(mode, prompt_text, resolution, aspect_ratio, length, motion_strength, aesthetic_score, use_motion_strength, use_aesthetic_score, use_timestep_transform, reference_image, seed, sampling_steps, cfg_scale): +def run_inference(mode, prompt_text, resolution, aspect_ratio, length, motion_strength, aesthetic_score, use_motion_strength, use_aesthetic_score, camera_motion, reference_image, seed, sampling_steps, cfg_scale): torch.manual_seed(seed) with torch.inference_mode(): # ====================== @@ -267,6 +267,11 @@ def run_inference(mode, prompt_text, resolution, aspect_ratio, length, motion_st # process scores use_motion_strength = use_motion_strength and mode != "Text2Image" + if camera_motion != "none": + batch_prompts = [ + f"{prompt} camera motion: {camera_motion}." + for prompt in batch_prompts + ] batch_prompts = append_score_to_prompts( batch_prompts, aes=aesthetic_score if use_aesthetic_score else None, @@ -302,7 +307,6 @@ def run_inference(mode, prompt_text, resolution, aspect_ratio, length, motion_st scheduler_kwargs.pop('type') scheduler_kwargs['num_sampling_steps'] = sampling_steps scheduler_kwargs['cfg_scale'] = cfg_scale - scheduler_kwargs['use_timestep_transform'] = use_timestep_transform scheduler.__init__( **scheduler_kwargs @@ -344,7 +348,7 @@ def run_image_inference( aesthetic_score, use_motion_strength, use_aesthetic_score, - use_timestep_transform, + camera_motion, reference_image, seed, sampling_steps, @@ -359,7 +363,7 @@ def run_image_inference( aesthetic_score, use_motion_strength, use_aesthetic_score, - use_timestep_transform, + camera_motion, reference_image, seed, sampling_steps, @@ -375,7 +379,7 @@ def run_video_inference( aesthetic_score, use_motion_strength, use_aesthetic_score, - use_timestep_transform, + camera_motion, reference_image, seed, sampling_steps, @@ -394,7 +398,7 @@ def run_video_inference( aesthetic_score, use_motion_strength, use_aesthetic_score, - use_timestep_transform, + camera_motion, reference_image, seed, sampling_steps, @@ -498,7 +502,21 @@ def main(): ) use_aesthetic_score = gr.Checkbox(value=True, label="Enable") - use_timestep_transform = gr.Checkbox(value=True, label="Use Time Transform") + camera_motion = gr.Radio( + value="none", + label="Camera Motion", + choices=[ + "none", + "pan right", + "pan left", + "tilt up", + "tilt down", + "zoom in", + "zoom out", + "static" + ], + interactive=True + ) reference_image = gr.Image( @@ -519,12 +537,12 @@ def main(): image_gen_button.click( fn=run_image_inference, - inputs=[prompt_text, resolution, aspect_ratio, length, motion_strength, aesthetic_score, use_motion_strength, use_aesthetic_score, use_timestep_transform, reference_image, seed, sampling_steps, cfg_scale], + inputs=[prompt_text, resolution, aspect_ratio, length, motion_strength, aesthetic_score, use_motion_strength, use_aesthetic_score, camera_motion, reference_image, seed, sampling_steps, cfg_scale], outputs=reference_image ) video_gen_button.click( fn=run_video_inference, - inputs=[prompt_text, resolution, aspect_ratio, length, motion_strength, aesthetic_score, use_motion_strength, use_aesthetic_score, use_timestep_transform, reference_image, seed, sampling_steps, cfg_scale], + inputs=[prompt_text, resolution, aspect_ratio, length, motion_strength, aesthetic_score, use_motion_strength, use_aesthetic_score, camera_motion, reference_image, seed, sampling_steps, cfg_scale], outputs=output_video )