update config

This commit is contained in:
pxy 2024-03-16 17:26:29 +08:00
parent 5880d01ee3
commit 2af0f79f6d
14 changed files with 49 additions and 63 deletions

View file

@ -7,7 +7,7 @@ image_size = (256, 256)
model = dict(
type="DiT-XL/2",
condition="text",
from_pretrained="YOUR_MODEL_PATH",
from_pretrained="PRETRAINED_MODEL",
)
vae = dict(
type="VideoAutoencoderKL",

View file

@ -1,9 +1,8 @@
# sample size
num_frames = 1
fps = 1
image_size = (256, 256)
# model config
# Define model
model = dict(
type="DiT-XL/2",
no_temporal_pos_emb=True,
@ -25,7 +24,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/imagenet_id.txt"

View file

@ -1,14 +1,13 @@
# sample size
num_frames = 1
fps = 1
image_size = (256, 256)
# model config
# Define model
model = dict(
type="DiT-XL/2",
no_temporal_pos_emb=True,
condition="text",
from_pretrained="YOUR_MODEL_PATH",
from_pretrained="PRETRAINED_MODEL",
)
vae = dict(
type="VideoAutoencoderKL",
@ -26,7 +25,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/imagenet_labels.txt"

View file

@ -1,21 +1,20 @@
# sample size
num_frames = 16
frame_interval = 3
image_size = (256, 256)
# dataset
# Define dataset
root = None
data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
data_path = "CSV_PATH"
use_image_transform = False
num_workers = 4
# acceleration
dtype = "fp16"
# Define acceleration
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1
# model config
# Define model
model = dict(
type="DiT-XL/2",
from_pretrained="DiT-XL-2-256x256.pt",
@ -36,7 +35,7 @@ scheduler = dict(
timestep_respacing="",
)
# runtime
# Others
seed = 42
outputs = "outputs"
wandb = False

View file

@ -1,21 +1,20 @@
# sample size
num_frames = 1
frame_interval = 1
image_size = (256, 256)
# dataset
# Define dataset
root = None
data_path = "/mnt/hdd/data/csv/imagenet_train.csv"
data_path = "CSV_PATH"
use_image_transform = True
num_workers = 4
# acceleration
dtype = "fp16"
# Define acceleration
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1
# model config
# Define model
model = dict(
type="DiT-XL/2",
no_temporal_pos_emb=True,
@ -36,7 +35,7 @@ scheduler = dict(
timestep_respacing="",
)
# runtime
# Others
seed = 42
outputs = "outputs"
wandb = False

View file

@ -1,9 +1,8 @@
# sample size
num_frames = 16
fps = 8
image_size = (256, 256)
# model config
# Define model
model = dict(
type="Latte-XL/2",
condition="label_101",
@ -24,7 +23,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/ucf101_id.txt"

View file

@ -1,13 +1,12 @@
# sample size
num_frames = 16
fps = 8
image_size = (256, 256)
# model config
# Define model
model = dict(
type="Latte-XL/2",
condition="text",
from_pretrained="YOUR_MODEL_PATH",
from_pretrained="PRETRAINED_MODEL",
)
vae = dict(
type="VideoAutoencoderKL",
@ -25,7 +24,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/ucf101_labels.txt"

View file

@ -1,21 +1,20 @@
# sample size
num_frames = 16
frame_interval = 3
image_size = (256, 256)
# dataset
# Define dataset
root = None
data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
data_path = "CSV_PATH"
use_image_transform = False
num_workers = 4
# acceleration
dtype = "fp16"
# Define acceleration
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1
# model config
# Define model
model = dict(
type="Latte-XL/2",
enable_flashattn=True,
@ -35,7 +34,7 @@ scheduler = dict(
timestep_respacing="",
)
# runtime
# Others
seed = 42
outputs = "outputs"
wandb = False

View file

@ -1,9 +1,8 @@
# sample size
num_frames = 16
fps = 8
image_size = (256, 256)
# model config
# Define model
model = dict(
type="PixArt-XL/2",
space_scale=0.5,
@ -26,7 +25,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/t2v_samples.txt"

View file

@ -1,10 +1,9 @@
# sample size
num_frames = 1
fps = 1
image_size = (1920, 512)
multi_resolution = True
# model config
# Define model
model = dict(
type="PixArtMS-XL/2",
space_scale=2.0,
@ -28,7 +27,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/t2i_samples.txt"

View file

@ -1,9 +1,8 @@
# sample size
num_frames = 1
fps = 1
image_size = (256, 256)
# model config
# Define model
model = dict(
type="PixArt-XL/2",
space_scale=1.0,
@ -29,7 +28,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/t2i_samples.txt"

View file

@ -1,9 +1,8 @@
# sample size
num_frames = 1
fps = 1
image_size = (512, 512)
# model config
# Define model
model = dict(
type="PixArt-XL/2",
space_scale=1.0,
@ -27,7 +26,7 @@ scheduler = dict(
)
dtype = "fp16"
# prompts
# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/t2i_samples.txt"

View file

@ -1,22 +1,20 @@
# sample size
num_frames = 16
frame_interval = 3
image_size = (256, 256)
# dataset
# Define dataset
root = None
# data_path = "/mnt/hdd/data/csv/bak_00/pexels_inter4k_fmin_48_rp.csv"
data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
data_path = "CSV_PATH"
use_image_transform = False
num_workers = 4
# acceleration
dtype = "fp16"
# Define acceleration
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1
# model config
# Define model
model = dict(
type="PixArt-XL/2",
space_scale=0.5,
@ -40,7 +38,7 @@ scheduler = dict(
timestep_respacing="",
)
# runtime
# Others
seed = 42
outputs = "outputs"
wandb = False

View file

@ -1,21 +1,20 @@
# sample size
num_frames = 1
frame_interval = 1
image_size = (512, 512)
# dataset
# Define dataset
root = None
data_path = "/mnt/hdd/data/csv/imagenet_train.csv"
data_path = "CSV_PATH"
use_image_transform = True
num_workers = 4
# acceleration
dtype = "fp16"
# Define acceleration
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1
# model config
# Define model
model = dict(
type="PixArt-XL/2",
space_scale=1.0,
@ -40,7 +39,7 @@ scheduler = dict(
timestep_respacing="",
)
# runtime
# Others
seed = 42
outputs = "outputs"
wandb = False