[exp] update config

This commit is contained in:
Zangwei Zheng 2024-05-07 14:17:10 +08:00
parent 10657621ef
commit 36486a4bee
5 changed files with 72 additions and 17 deletions

View file

@ -0,0 +1,56 @@
image_size = (240, 426)
num_frames = 34
fps = 24
frame_interval = 1
prompt_path = None
save_dir = "./samples/samples/"
seed = 42
batch_size = 1
multi_resolution = "STDiT2"
dtype = "bf16"
# Condition
prompt = [
'Drone view of waves crashing against the rugged cliffs along Big Sur\'s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff\'s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff\'s edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.{"reference_path": "assets/images/condition/cliff.png", "mask_strategy": "0,0,0,0;0,0,0,1;0,0,0,2;0,0,0,3;0,0,0,4"}',
]
loop = 1
condition_frame_length = 4
# Define model
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flashattn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderPipeline",
from_pretrained="pretrained_models/vae-v2",
micro_frame_size=17,
vae_2d=dict(
type="VideoAutoencoderKL",
from_pretrained="PixArt-alpha/pixart_sigma_sdxlvae_T5_diffusers",
subfolder="vae",
micro_batch_size=4,
local_files_only=True,
),
vae_temporal=dict(
type="VAE_Temporal_SD",
from_pretrained=None,
),
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
local_files_only=True,
)
scheduler = dict(
type="rflow",
use_discrete_timesteps=False,
use_timestep_transform=True,
num_sampling_steps=30,
cfg_scale=4.5,
)

View file

@ -1,6 +1,6 @@
image_size = (240, 426)
num_frames = 34
fps = 30
fps = 24
frame_interval = 1
prompt_path = "./assets/texts/t2v_samples.txt"

View file

@ -19,7 +19,7 @@ bucket_config = { # 20s/it
"1024": {1: (0.1, 20), 51: (0.3, 2), 102: (0.3, 1), 204: (0.0, None)},
# ---
"1080p": {1: (0.1, 10)},
# # ---
# ---
"2048": {1: (0.1, 5)},
}
grad_checkpoint = True
@ -41,8 +41,8 @@ model = dict(
)
vae = dict(
type="VideoAutoencoderPipeline",
from_pretrained="pretrained_models/vae-v2",
micro_frame_size=16,
from_pretrained="pretrained_models/vae-v3",
micro_frame_size=17,
vae_2d=dict(
type="VideoAutoencoderKL",
from_pretrained="PixArt-alpha/pixart_sigma_sdxlvae_T5_diffusers",
@ -71,16 +71,16 @@ scheduler = dict(
# Mask settings
mask_ratios = {
"mask_random": 0.4,
"mask_random": 0.2,
"mask_intepolate": 0.01,
"mask_quarter_random": 0.01,
"mask_quarter_head": 0.01,
"mask_quarter_tail": 0.01,
"mask_quarter_head_tail": 0.01,
"mask_image_random": 0.01,
"mask_image_head": 0.01,
"mask_image_tail": 0.01,
"mask_image_head_tail": 0.01,
"mask_image_random": 0.05,
"mask_image_head": 0.1,
"mask_image_tail": 0.05,
"mask_image_head_tail": 0.05,
}
# Log settings

View file

@ -36,11 +36,10 @@ model = dict(
qk_norm=True,
enable_flashattn=True,
enable_layernorm_kernel=True,
only_train_temporal=True,
)
vae = dict(
type="VideoAutoencoderPipeline",
from_pretrained="pretrained_models/vae-v2",
from_pretrained="pretrained_models/vae-v3",
micro_frame_size=17,
vae_2d=dict(
type="VideoAutoencoderKL",
@ -70,16 +69,16 @@ scheduler = dict(
# Mask settings
mask_ratios = {
"mask_random": 0.4,
"mask_random": 0.2,
"mask_intepolate": 0.01,
"mask_quarter_random": 0.01,
"mask_quarter_head": 0.01,
"mask_quarter_tail": 0.01,
"mask_quarter_head_tail": 0.01,
"mask_image_random": 0.01,
"mask_image_head": 0.01,
"mask_image_tail": 0.01,
"mask_image_head_tail": 0.01,
"mask_image_random": 0.05,
"mask_image_head": 0.1,
"mask_image_tail": 0.05,
"mask_image_head_tail": 0.05,
}
# Log settings

View file

@ -291,7 +291,7 @@ def main():
additional_args=model_args,
mask=masks, # scheduler must support mask
)
samples = vae.decode(samples.to(dtype))
samples = vae.decode(samples.to(dtype), num_frames=cfg.num_frames)
video_clips.append(samples)
# 4.7. save video