mirror of
https://github.com/hpcaitech/Open-Sora.git
synced 2026-04-11 13:14:44 +02:00
update config
This commit is contained in:
parent
5880d01ee3
commit
2af0f79f6d
|
|
@ -7,7 +7,7 @@ image_size = (256, 256)
|
|||
model = dict(
|
||||
type="DiT-XL/2",
|
||||
condition="text",
|
||||
from_pretrained="YOUR_MODEL_PATH",
|
||||
from_pretrained="PRETRAINED_MODEL",
|
||||
)
|
||||
vae = dict(
|
||||
type="VideoAutoencoderKL",
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
fps = 1
|
||||
image_size = (256, 256)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="DiT-XL/2",
|
||||
no_temporal_pos_emb=True,
|
||||
|
|
@ -25,7 +24,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/imagenet_id.txt"
|
||||
|
|
|
|||
|
|
@ -1,14 +1,13 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
fps = 1
|
||||
image_size = (256, 256)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="DiT-XL/2",
|
||||
no_temporal_pos_emb=True,
|
||||
condition="text",
|
||||
from_pretrained="YOUR_MODEL_PATH",
|
||||
from_pretrained="PRETRAINED_MODEL",
|
||||
)
|
||||
vae = dict(
|
||||
type="VideoAutoencoderKL",
|
||||
|
|
@ -26,7 +25,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/imagenet_labels.txt"
|
||||
|
|
|
|||
|
|
@ -1,21 +1,20 @@
|
|||
# sample size
|
||||
num_frames = 16
|
||||
frame_interval = 3
|
||||
image_size = (256, 256)
|
||||
|
||||
# dataset
|
||||
# Define dataset
|
||||
root = None
|
||||
data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
|
||||
data_path = "CSV_PATH"
|
||||
use_image_transform = False
|
||||
num_workers = 4
|
||||
|
||||
# acceleration
|
||||
dtype = "fp16"
|
||||
# Define acceleration
|
||||
dtype = "bf16"
|
||||
grad_checkpoint = True
|
||||
plugin = "zero2"
|
||||
sp_size = 1
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="DiT-XL/2",
|
||||
from_pretrained="DiT-XL-2-256x256.pt",
|
||||
|
|
@ -36,7 +35,7 @@ scheduler = dict(
|
|||
timestep_respacing="",
|
||||
)
|
||||
|
||||
# runtime
|
||||
# Others
|
||||
seed = 42
|
||||
outputs = "outputs"
|
||||
wandb = False
|
||||
|
|
|
|||
|
|
@ -1,21 +1,20 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
frame_interval = 1
|
||||
image_size = (256, 256)
|
||||
|
||||
# dataset
|
||||
# Define dataset
|
||||
root = None
|
||||
data_path = "/mnt/hdd/data/csv/imagenet_train.csv"
|
||||
data_path = "CSV_PATH"
|
||||
use_image_transform = True
|
||||
num_workers = 4
|
||||
|
||||
# acceleration
|
||||
dtype = "fp16"
|
||||
# Define acceleration
|
||||
dtype = "bf16"
|
||||
grad_checkpoint = True
|
||||
plugin = "zero2"
|
||||
sp_size = 1
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="DiT-XL/2",
|
||||
no_temporal_pos_emb=True,
|
||||
|
|
@ -36,7 +35,7 @@ scheduler = dict(
|
|||
timestep_respacing="",
|
||||
)
|
||||
|
||||
# runtime
|
||||
# Others
|
||||
seed = 42
|
||||
outputs = "outputs"
|
||||
wandb = False
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
# sample size
|
||||
num_frames = 16
|
||||
fps = 8
|
||||
image_size = (256, 256)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="Latte-XL/2",
|
||||
condition="label_101",
|
||||
|
|
@ -24,7 +23,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/ucf101_id.txt"
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
# sample size
|
||||
num_frames = 16
|
||||
fps = 8
|
||||
image_size = (256, 256)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="Latte-XL/2",
|
||||
condition="text",
|
||||
from_pretrained="YOUR_MODEL_PATH",
|
||||
from_pretrained="PRETRAINED_MODEL",
|
||||
)
|
||||
vae = dict(
|
||||
type="VideoAutoencoderKL",
|
||||
|
|
@ -25,7 +24,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/ucf101_labels.txt"
|
||||
|
|
|
|||
|
|
@ -1,21 +1,20 @@
|
|||
# sample size
|
||||
num_frames = 16
|
||||
frame_interval = 3
|
||||
image_size = (256, 256)
|
||||
|
||||
# dataset
|
||||
# Define dataset
|
||||
root = None
|
||||
data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
|
||||
data_path = "CSV_PATH"
|
||||
use_image_transform = False
|
||||
num_workers = 4
|
||||
|
||||
# acceleration
|
||||
dtype = "fp16"
|
||||
# Define acceleration
|
||||
dtype = "bf16"
|
||||
grad_checkpoint = True
|
||||
plugin = "zero2"
|
||||
sp_size = 1
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="Latte-XL/2",
|
||||
enable_flashattn=True,
|
||||
|
|
@ -35,7 +34,7 @@ scheduler = dict(
|
|||
timestep_respacing="",
|
||||
)
|
||||
|
||||
# runtime
|
||||
# Others
|
||||
seed = 42
|
||||
outputs = "outputs"
|
||||
wandb = False
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
# sample size
|
||||
num_frames = 16
|
||||
fps = 8
|
||||
image_size = (256, 256)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="PixArt-XL/2",
|
||||
space_scale=0.5,
|
||||
|
|
@ -26,7 +25,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/t2v_samples.txt"
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
fps = 1
|
||||
image_size = (1920, 512)
|
||||
multi_resolution = True
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="PixArtMS-XL/2",
|
||||
space_scale=2.0,
|
||||
|
|
@ -28,7 +27,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/t2i_samples.txt"
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
fps = 1
|
||||
image_size = (256, 256)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="PixArt-XL/2",
|
||||
space_scale=1.0,
|
||||
|
|
@ -29,7 +28,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/t2i_samples.txt"
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
fps = 1
|
||||
image_size = (512, 512)
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="PixArt-XL/2",
|
||||
space_scale=1.0,
|
||||
|
|
@ -27,7 +26,7 @@ scheduler = dict(
|
|||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# prompts
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
prompt_path = "./assets/texts/t2i_samples.txt"
|
||||
|
|
|
|||
|
|
@ -1,22 +1,20 @@
|
|||
# sample size
|
||||
num_frames = 16
|
||||
frame_interval = 3
|
||||
image_size = (256, 256)
|
||||
|
||||
# dataset
|
||||
# Define dataset
|
||||
root = None
|
||||
# data_path = "/mnt/hdd/data/csv/bak_00/pexels_inter4k_fmin_48_rp.csv"
|
||||
data_path = "/mnt/hdd/data/csv/ucf101_videos.csv"
|
||||
data_path = "CSV_PATH"
|
||||
use_image_transform = False
|
||||
num_workers = 4
|
||||
|
||||
# acceleration
|
||||
dtype = "fp16"
|
||||
# Define acceleration
|
||||
dtype = "bf16"
|
||||
grad_checkpoint = True
|
||||
plugin = "zero2"
|
||||
sp_size = 1
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="PixArt-XL/2",
|
||||
space_scale=0.5,
|
||||
|
|
@ -40,7 +38,7 @@ scheduler = dict(
|
|||
timestep_respacing="",
|
||||
)
|
||||
|
||||
# runtime
|
||||
# Others
|
||||
seed = 42
|
||||
outputs = "outputs"
|
||||
wandb = False
|
||||
|
|
|
|||
|
|
@ -1,21 +1,20 @@
|
|||
# sample size
|
||||
num_frames = 1
|
||||
frame_interval = 1
|
||||
image_size = (512, 512)
|
||||
|
||||
# dataset
|
||||
# Define dataset
|
||||
root = None
|
||||
data_path = "/mnt/hdd/data/csv/imagenet_train.csv"
|
||||
data_path = "CSV_PATH"
|
||||
use_image_transform = True
|
||||
num_workers = 4
|
||||
|
||||
# acceleration
|
||||
dtype = "fp16"
|
||||
# Define acceleration
|
||||
dtype = "bf16"
|
||||
grad_checkpoint = True
|
||||
plugin = "zero2"
|
||||
sp_size = 1
|
||||
|
||||
# model config
|
||||
# Define model
|
||||
model = dict(
|
||||
type="PixArt-XL/2",
|
||||
space_scale=1.0,
|
||||
|
|
@ -40,7 +39,7 @@ scheduler = dict(
|
|||
timestep_respacing="",
|
||||
)
|
||||
|
||||
# runtime
|
||||
# Others
|
||||
seed = 42
|
||||
outputs = "outputs"
|
||||
wandb = False
|
||||
|
|
|
|||
Loading…
Reference in a new issue