diff --git a/configs/dit/inference/16x256x256.py b/configs/dit/inference/16x256x256.py index 50c3fa5..9ed4d84 100644 --- a/configs/dit/inference/16x256x256.py +++ b/configs/dit/inference/16x256x256.py @@ -7,7 +7,7 @@ image_size = (256, 256) model = dict( type="DiT-XL/2", condition="text", - from_pretrained="YOUR_MODEL_PATH", + from_pretrained="PRETRAINED_MODEL", ) vae = dict( type="VideoAutoencoderKL", diff --git a/configs/dit/inference/1x256x256-class.py b/configs/dit/inference/1x256x256-class.py index 7e81ad4..24d1c8a 100644 --- a/configs/dit/inference/1x256x256-class.py +++ b/configs/dit/inference/1x256x256-class.py @@ -1,9 +1,8 @@ -# sample size num_frames = 1 fps = 1 image_size = (256, 256) -# model config +# Define model model = dict( type="DiT-XL/2", no_temporal_pos_emb=True, @@ -25,7 +24,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/imagenet_id.txt" diff --git a/configs/dit/inference/1x256x256.py b/configs/dit/inference/1x256x256.py index d99d7a7..31a5b9f 100644 --- a/configs/dit/inference/1x256x256.py +++ b/configs/dit/inference/1x256x256.py @@ -1,14 +1,13 @@ -# sample size num_frames = 1 fps = 1 image_size = (256, 256) -# model config +# Define model model = dict( type="DiT-XL/2", no_temporal_pos_emb=True, condition="text", - from_pretrained="YOUR_MODEL_PATH", + from_pretrained="PRETRAINED_MODEL", ) vae = dict( type="VideoAutoencoderKL", @@ -26,7 +25,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/imagenet_labels.txt" diff --git a/configs/dit/train/16x256x256.py b/configs/dit/train/16x256x256.py index df809e5..67d3736 100644 --- a/configs/dit/train/16x256x256.py +++ b/configs/dit/train/16x256x256.py @@ -1,21 +1,20 @@ -# sample size num_frames = 16 frame_interval = 3 image_size = (256, 256) -# dataset +# Define dataset root = None -data_path = "/mnt/hdd/data/csv/ucf101_videos.csv" +data_path = "CSV_PATH" use_image_transform = False num_workers = 4 -# acceleration -dtype = "fp16" +# Define acceleration +dtype = "bf16" grad_checkpoint = True plugin = "zero2" sp_size = 1 -# model config +# Define model model = dict( type="DiT-XL/2", from_pretrained="DiT-XL-2-256x256.pt", @@ -36,7 +35,7 @@ scheduler = dict( timestep_respacing="", ) -# runtime +# Others seed = 42 outputs = "outputs" wandb = False diff --git a/configs/dit/train/1x256x256.py b/configs/dit/train/1x256x256.py index 8cb8f9a..f8bd8d3 100644 --- a/configs/dit/train/1x256x256.py +++ b/configs/dit/train/1x256x256.py @@ -1,21 +1,20 @@ -# sample size num_frames = 1 frame_interval = 1 image_size = (256, 256) -# dataset +# Define dataset root = None -data_path = "/mnt/hdd/data/csv/imagenet_train.csv" +data_path = "CSV_PATH" use_image_transform = True num_workers = 4 -# acceleration -dtype = "fp16" +# Define acceleration +dtype = "bf16" grad_checkpoint = True plugin = "zero2" sp_size = 1 -# model config +# Define model model = dict( type="DiT-XL/2", no_temporal_pos_emb=True, @@ -36,7 +35,7 @@ scheduler = dict( timestep_respacing="", ) -# runtime +# Others seed = 42 outputs = "outputs" wandb = False diff --git a/configs/latte/inference/16x256x256-class.py b/configs/latte/inference/16x256x256-class.py index e08acb8..c46f4bc 100644 --- a/configs/latte/inference/16x256x256-class.py +++ b/configs/latte/inference/16x256x256-class.py @@ -1,9 +1,8 @@ -# sample size num_frames = 16 fps = 8 image_size = (256, 256) -# model config +# Define model model = dict( type="Latte-XL/2", condition="label_101", @@ -24,7 +23,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/ucf101_id.txt" diff --git a/configs/latte/inference/16x256x256.py b/configs/latte/inference/16x256x256.py index d74c995..cb50237 100644 --- a/configs/latte/inference/16x256x256.py +++ b/configs/latte/inference/16x256x256.py @@ -1,13 +1,12 @@ -# sample size num_frames = 16 fps = 8 image_size = (256, 256) -# model config +# Define model model = dict( type="Latte-XL/2", condition="text", - from_pretrained="YOUR_MODEL_PATH", + from_pretrained="PRETRAINED_MODEL", ) vae = dict( type="VideoAutoencoderKL", @@ -25,7 +24,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/ucf101_labels.txt" diff --git a/configs/latte/train/16x256x256.py b/configs/latte/train/16x256x256.py index 685996d..0bf6bd4 100644 --- a/configs/latte/train/16x256x256.py +++ b/configs/latte/train/16x256x256.py @@ -1,21 +1,20 @@ -# sample size num_frames = 16 frame_interval = 3 image_size = (256, 256) -# dataset +# Define dataset root = None -data_path = "/mnt/hdd/data/csv/ucf101_videos.csv" +data_path = "CSV_PATH" use_image_transform = False num_workers = 4 -# acceleration -dtype = "fp16" +# Define acceleration +dtype = "bf16" grad_checkpoint = True plugin = "zero2" sp_size = 1 -# model config +# Define model model = dict( type="Latte-XL/2", enable_flashattn=True, @@ -35,7 +34,7 @@ scheduler = dict( timestep_respacing="", ) -# runtime +# Others seed = 42 outputs = "outputs" wandb = False diff --git a/configs/pixart/inference/16x256x256.py b/configs/pixart/inference/16x256x256.py index 11ce918..6fc8ee6 100644 --- a/configs/pixart/inference/16x256x256.py +++ b/configs/pixart/inference/16x256x256.py @@ -1,9 +1,8 @@ -# sample size num_frames = 16 fps = 8 image_size = (256, 256) -# model config +# Define model model = dict( type="PixArt-XL/2", space_scale=0.5, @@ -26,7 +25,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/t2v_samples.txt" diff --git a/configs/pixart/inference/1x1024MS.py b/configs/pixart/inference/1x1024MS.py index 5e864b8..41cc97a 100644 --- a/configs/pixart/inference/1x1024MS.py +++ b/configs/pixart/inference/1x1024MS.py @@ -1,10 +1,9 @@ -# sample size num_frames = 1 fps = 1 image_size = (1920, 512) multi_resolution = True -# model config +# Define model model = dict( type="PixArtMS-XL/2", space_scale=2.0, @@ -28,7 +27,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/t2i_samples.txt" diff --git a/configs/pixart/inference/1x256x256.py b/configs/pixart/inference/1x256x256.py index 6dc614d..6dcbbba 100644 --- a/configs/pixart/inference/1x256x256.py +++ b/configs/pixart/inference/1x256x256.py @@ -1,9 +1,8 @@ -# sample size num_frames = 1 fps = 1 image_size = (256, 256) -# model config +# Define model model = dict( type="PixArt-XL/2", space_scale=1.0, @@ -29,7 +28,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/t2i_samples.txt" diff --git a/configs/pixart/inference/1x512x512.py b/configs/pixart/inference/1x512x512.py index 8c8c3fe..5674259 100644 --- a/configs/pixart/inference/1x512x512.py +++ b/configs/pixart/inference/1x512x512.py @@ -1,9 +1,8 @@ -# sample size num_frames = 1 fps = 1 image_size = (512, 512) -# model config +# Define model model = dict( type="PixArt-XL/2", space_scale=1.0, @@ -27,7 +26,7 @@ scheduler = dict( ) dtype = "fp16" -# prompts +# Others batch_size = 2 seed = 42 prompt_path = "./assets/texts/t2i_samples.txt" diff --git a/configs/pixart/train/16x256x256.py b/configs/pixart/train/16x256x256.py index 4db54ff..6819573 100644 --- a/configs/pixart/train/16x256x256.py +++ b/configs/pixart/train/16x256x256.py @@ -1,22 +1,20 @@ -# sample size num_frames = 16 frame_interval = 3 image_size = (256, 256) -# dataset +# Define dataset root = None -# data_path = "/mnt/hdd/data/csv/bak_00/pexels_inter4k_fmin_48_rp.csv" -data_path = "/mnt/hdd/data/csv/ucf101_videos.csv" +data_path = "CSV_PATH" use_image_transform = False num_workers = 4 -# acceleration -dtype = "fp16" +# Define acceleration +dtype = "bf16" grad_checkpoint = True plugin = "zero2" sp_size = 1 -# model config +# Define model model = dict( type="PixArt-XL/2", space_scale=0.5, @@ -40,7 +38,7 @@ scheduler = dict( timestep_respacing="", ) -# runtime +# Others seed = 42 outputs = "outputs" wandb = False diff --git a/configs/pixart/train/1x512x512.py b/configs/pixart/train/1x512x512.py index 56dc00a..619c9aa 100644 --- a/configs/pixart/train/1x512x512.py +++ b/configs/pixart/train/1x512x512.py @@ -1,21 +1,20 @@ -# sample size num_frames = 1 frame_interval = 1 image_size = (512, 512) -# dataset +# Define dataset root = None -data_path = "/mnt/hdd/data/csv/imagenet_train.csv" +data_path = "CSV_PATH" use_image_transform = True num_workers = 4 -# acceleration -dtype = "fp16" +# Define acceleration +dtype = "bf16" grad_checkpoint = True plugin = "zero2" sp_size = 1 -# model config +# Define model model = dict( type="PixArt-XL/2", space_scale=1.0, @@ -40,7 +39,7 @@ scheduler = dict( timestep_respacing="", ) -# runtime +# Others seed = 42 outputs = "outputs" wandb = False