update config

2026-04-11 13:14:44 +02:00 · 2024-03-16 17:26:29 +08:00 · 2024-03-16 17:26:29 +08:00 · 2af0f79f6d
commit 2af0f79f6d
parent 5880d01ee3
14 changed files with 49 additions and 63 deletions
--- a/configs/dit/inference/16x256x256.py
+++ b/configs/dit/inference/16x256x256.py
@ -7,7 +7,7 @@ image_size = (256, 256)
 model = dict(
    type="DiT-XL/2",
    condition="text",
-    from_pretrained="YOUR_MODEL_PATH",
+    from_pretrained="PRETRAINED_MODEL",
 )
 vae = dict(
    type="VideoAutoencoderKL",
--- a/configs/dit/inference/1x256x256-class.py
+++ b/configs/dit/inference/1x256x256-class.py
@ -1,9 +1,8 @@
-# sample size
 num_frames = 1
 fps = 1
 image_size = (256, 256)

-# model config
+# Define model
 model = dict(
    type="DiT-XL/2",
    no_temporal_pos_emb=True,
@ -25,7 +24,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/imagenet_id.txt"
--- a/configs/dit/inference/1x256x256.py
+++ b/configs/dit/inference/1x256x256.py
@ -1,14 +1,13 @@
-# sample size
 num_frames = 1
 fps = 1
 image_size = (256, 256)

-# model config
+# Define model
 model = dict(
    type="DiT-XL/2",
    no_temporal_pos_emb=True,
    condition="text",
-    from_pretrained="YOUR_MODEL_PATH",
+    from_pretrained="PRETRAINED_MODEL",
 )
 vae = dict(
    type="VideoAutoencoderKL",
@ -26,7 +25,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/imagenet_labels.txt"
--- a/configs/dit/train/16x256x256.py
+++ b/configs/dit/train/16x256x256.py
@ -1,21 +1,20 @@
-# sample size
 num_frames = 16
 frame_interval = 3
 image_size = (256, 256)

-# dataset
+# Define dataset
 root = None
-data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
+data_path = "CSV_PATH"
 use_image_transform = False
 num_workers = 4

-# acceleration
-dtype = "fp16"
+# Define acceleration
+dtype = "bf16"
 grad_checkpoint = True
 plugin = "zero2"
 sp_size = 1

-# model config
+# Define model
 model = dict(
    type="DiT-XL/2",
    from_pretrained="DiT-XL-2-256x256.pt",
@ -36,7 +35,7 @@ scheduler = dict(
    timestep_respacing="",
 )

-# runtime
+# Others
 seed = 42
 outputs = "outputs"
 wandb = False
--- a/configs/dit/train/1x256x256.py
+++ b/configs/dit/train/1x256x256.py
@ -1,21 +1,20 @@
-# sample size
 num_frames = 1
 frame_interval = 1
 image_size = (256, 256)

-# dataset
+# Define dataset
 root = None
-data_path = "/mnt/hdd/data/csv/imagenet_train.csv"
+data_path = "CSV_PATH"
 use_image_transform = True
 num_workers = 4

-# acceleration
-dtype = "fp16"
+# Define acceleration
+dtype = "bf16"
 grad_checkpoint = True
 plugin = "zero2"
 sp_size = 1

-# model config
+# Define model
 model = dict(
    type="DiT-XL/2",
    no_temporal_pos_emb=True,
@ -36,7 +35,7 @@ scheduler = dict(
    timestep_respacing="",
 )

-# runtime
+# Others
 seed = 42
 outputs = "outputs"
 wandb = False
--- a/configs/latte/inference/16x256x256-class.py
+++ b/configs/latte/inference/16x256x256-class.py
@ -1,9 +1,8 @@
-# sample size
 num_frames = 16
 fps = 8
 image_size = (256, 256)

-# model config
+# Define model
 model = dict(
    type="Latte-XL/2",
    condition="label_101",
@ -24,7 +23,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/ucf101_id.txt"
--- a/configs/latte/inference/16x256x256.py
+++ b/configs/latte/inference/16x256x256.py
@ -1,13 +1,12 @@
-# sample size
 num_frames = 16
 fps = 8
 image_size = (256, 256)

-# model config
+# Define model
 model = dict(
    type="Latte-XL/2",
    condition="text",
-    from_pretrained="YOUR_MODEL_PATH",
+    from_pretrained="PRETRAINED_MODEL",
 )
 vae = dict(
    type="VideoAutoencoderKL",
@ -25,7 +24,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/ucf101_labels.txt"
--- a/configs/latte/train/16x256x256.py
+++ b/configs/latte/train/16x256x256.py
@ -1,21 +1,20 @@
-# sample size
 num_frames = 16
 frame_interval = 3
 image_size = (256, 256)

-# dataset
+# Define dataset
 root = None
-data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
+data_path = "CSV_PATH"
 use_image_transform = False
 num_workers = 4

-# acceleration
-dtype = "fp16"
+# Define acceleration
+dtype = "bf16"
 grad_checkpoint = True
 plugin = "zero2"
 sp_size = 1

-# model config
+# Define model
 model = dict(
    type="Latte-XL/2",
    enable_flashattn=True,
@ -35,7 +34,7 @@ scheduler = dict(
    timestep_respacing="",
 )

-# runtime
+# Others
 seed = 42
 outputs = "outputs"
 wandb = False
--- a/configs/pixart/inference/16x256x256.py
+++ b/configs/pixart/inference/16x256x256.py
@ -1,9 +1,8 @@
-# sample size
 num_frames = 16
 fps = 8
 image_size = (256, 256)

-# model config
+# Define model
 model = dict(
    type="PixArt-XL/2",
    space_scale=0.5,
@ -26,7 +25,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/t2v_samples.txt"
--- a/configs/pixart/inference/1x1024MS.py
+++ b/configs/pixart/inference/1x1024MS.py
@ -1,10 +1,9 @@
-# sample size
 num_frames = 1
 fps = 1
 image_size = (1920, 512)
 multi_resolution = True

-# model config
+# Define model
 model = dict(
    type="PixArtMS-XL/2",
    space_scale=2.0,
@ -28,7 +27,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/t2i_samples.txt"
--- a/configs/pixart/inference/1x256x256.py
+++ b/configs/pixart/inference/1x256x256.py
@ -1,9 +1,8 @@
-# sample size
 num_frames = 1
 fps = 1
 image_size = (256, 256)

-# model config
+# Define model
 model = dict(
    type="PixArt-XL/2",
    space_scale=1.0,
@ -29,7 +28,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/t2i_samples.txt"
--- a/configs/pixart/inference/1x512x512.py
+++ b/configs/pixart/inference/1x512x512.py
@ -1,9 +1,8 @@
-# sample size
 num_frames = 1
 fps = 1
 image_size = (512, 512)

-# model config
+# Define model
 model = dict(
    type="PixArt-XL/2",
    space_scale=1.0,
@ -27,7 +26,7 @@ scheduler = dict(
 )
 dtype = "fp16"

-# prompts
+# Others
 batch_size = 2
 seed = 42
 prompt_path = "./assets/texts/t2i_samples.txt"
--- a/configs/pixart/train/16x256x256.py
+++ b/configs/pixart/train/16x256x256.py
@ -1,22 +1,20 @@
-# sample size
 num_frames = 16
 frame_interval = 3
 image_size = (256, 256)

-# dataset
+# Define dataset
 root = None
-# data_path = "/mnt/hdd/data/csv/bak_00/pexels_inter4k_fmin_48_rp.csv"
-data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
+data_path = "CSV_PATH"
 use_image_transform = False
 num_workers = 4

-# acceleration
-dtype = "fp16"
+# Define acceleration
+dtype = "bf16"
 grad_checkpoint = True
 plugin = "zero2"
 sp_size = 1

-# model config
+# Define model
 model = dict(
    type="PixArt-XL/2",
    space_scale=0.5,
@ -40,7 +38,7 @@ scheduler = dict(
    timestep_respacing="",
 )

-# runtime
+# Others
 seed = 42
 outputs = "outputs"
 wandb = False
--- a/configs/pixart/train/1x512x512.py
+++ b/configs/pixart/train/1x512x512.py
@ -1,21 +1,20 @@
-# sample size
 num_frames = 1
 frame_interval = 1
 image_size = (512, 512)

-# dataset
+# Define dataset
 root = None
-data_path = "/mnt/hdd/data/csv/imagenet_train.csv"
+data_path = "CSV_PATH"
 use_image_transform = True
 num_workers = 4

-# acceleration
-dtype = "fp16"
+# Define acceleration
+dtype = "bf16"
 grad_checkpoint = True
 plugin = "zero2"
 sp_size = 1

-# model config
+# Define model
 model = dict(
    type="PixArt-XL/2",
    space_scale=1.0,
@ -40,7 +39,7 @@ scheduler = dict(
    timestep_respacing="",
 )

-# runtime
+# Others
 seed = 42
 outputs = "outputs"
 wandb = False