mirror of
https://github.com/hpcaitech/Open-Sora.git
synced 2026-05-20 17:35:58 +02:00
complete masked demo
This commit is contained in:
parent
dcbacb03a6
commit
7d478f5094
|
|
@ -35,13 +35,13 @@ dtype = "fp16"
|
|||
prompt_path = None
|
||||
prompt = [
|
||||
"Drone view of waves crashing against the rugged cliffs along Big Sur’s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff’s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff's edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.",
|
||||
"A stylish woman walks down a Tokyo street filled with warm glowing neon and animated city signage. She wears a black leather jacket, a long red dress, and black boots, and carries a black purse. She wears sunglasses and red lipstick. She walks confidently and casually. The street is damp and reflective, creating a mirror effect of the colorful lights. Many pedestrians walk about.",
|
||||
"Pirate ship in a cosmic maelstrom nebula.",
|
||||
]
|
||||
|
||||
loop = 1
|
||||
# condition_frame_length = 4
|
||||
reference_path = ["assets/images/condition/cliff.png"]
|
||||
mask_strategy = ["0,0,0,1,0"] # valid when reference_path is not None
|
||||
reference_path = ["assets/images/condition/cliff.png", "assets/images/condition/ship.png"]
|
||||
mask_strategy = ["0,0,0,1,0", "0,0,0,1,0"] # valid when reference_path is not None
|
||||
# (loop id, ref id, ref start, length, target start)
|
||||
|
||||
# Others
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
# scripts/inference_long.py
|
||||
num_frames = 16
|
||||
fps = 24 // 3
|
||||
image_size = (512, 512)
|
||||
|
||||
# Define model
|
||||
model = dict(
|
||||
type="STDiT-XL/2",
|
||||
space_scale=1.0,
|
||||
time_scale=1.0,
|
||||
use_x_mask=True,
|
||||
enable_flashattn=True,
|
||||
enable_layernorm_kernel=True,
|
||||
from_pretrained=None,
|
||||
)
|
||||
vae = dict(
|
||||
type="VideoAutoencoderKL",
|
||||
from_pretrained="stabilityai/sd-vae-ft-ema",
|
||||
micro_batch_size=4,
|
||||
)
|
||||
text_encoder = dict(
|
||||
type="t5",
|
||||
from_pretrained="DeepFloyd/t5-v1_1-xxl",
|
||||
model_max_length=120,
|
||||
)
|
||||
scheduler = dict(
|
||||
type="iddpm",
|
||||
# type="dpm-solver",
|
||||
num_sampling_steps=100,
|
||||
cfg_scale=7.0,
|
||||
)
|
||||
dtype = "fp16"
|
||||
|
||||
# Condition
|
||||
prompt_path = None
|
||||
prompt = [
|
||||
"Drone view of waves crashing against the rugged cliffs along Big Sur’s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff’s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff's edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.",
|
||||
"A sad small cactus with in the Sahara desert becomes happy.",
|
||||
]
|
||||
|
||||
loop = 1
|
||||
condition_frame_length = 4
|
||||
reference_path = [
|
||||
"assets/images/condition/cliff.png",
|
||||
"assets/images/condition/cactus-sad.png;assets/images/condition/cactus-happy.png",
|
||||
]
|
||||
mask_strategy = ["0,0,0,1,0;0,0,0,1,-1", "0,0,0,1,0;0,1,0,1,-1"] # valid when reference_path is not None
|
||||
# (loop id, ref id, ref start, length, target start)
|
||||
|
||||
# Others
|
||||
batch_size = 2
|
||||
seed = 42
|
||||
save_dir = "./outputs/samples/"
|
||||
|
|
@ -34,13 +34,14 @@ dtype = "fp16"
|
|||
# Condition
|
||||
prompt_path = None
|
||||
prompt = [
|
||||
"Drone view of waves crashing against the rugged cliffs along Big Sur’s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff’s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff's edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.",
|
||||
"In an ornate, historical hall, a massive tidal wave peaks and begins to crash. Two surfers, seizing the moment, skillfully navigate the face of the wave."
|
||||
]
|
||||
|
||||
loop = 5
|
||||
loop = 10
|
||||
condition_frame_length = 4
|
||||
reference_path = ["assets/images/condition/wave.png"]
|
||||
mask_strategy = ["0,0,0,1,0"] # valid when reference_path is not None
|
||||
reference_path = ["assets/images/condition/cliff.png", "assets/images/condition/wave.png"]
|
||||
mask_strategy = ["0,0,0,1,0", "0,0,0,1,0"] # valid when reference_path is not None
|
||||
# (loop id, ref id, ref start, length, target start)
|
||||
|
||||
# Others
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ vae = dict(
|
|||
)
|
||||
text_encoder = dict(
|
||||
type="t5",
|
||||
from_pretrained="./pretrained_models/t5_ckpts",
|
||||
from_pretrained="DeepFloyd/t5-v1_1-xxl",
|
||||
model_max_length=120,
|
||||
)
|
||||
scheduler = dict(
|
||||
|
|
@ -28,7 +28,9 @@ dtype = "fp16"
|
|||
|
||||
# prompt_path = "./assets/texts/t2i_samples.txt"
|
||||
prompt = [
|
||||
"A stylish woman walks down a Tokyo street filled with warm glowing neon and animated city signage. She wears a black leather jacket, a long red dress, and black boots, and carries a black purse. She wears sunglasses and red lipstick. She walks confidently and casually. The street is damp and reflective, creating a mirror effect of the colorful lights. Many pedestrians walk about.",
|
||||
"Pirate ship trapped in a cosmic maelstrom nebula.",
|
||||
"A small cactus with a happy face in the Sahara desert.",
|
||||
"A small cactus with a sad face in the Sahara desert.",
|
||||
]
|
||||
|
||||
# Others
|
||||
|
|
|
|||
|
|
@ -132,8 +132,10 @@ class PixArt(nn.Module):
|
|||
time_scale=1.0,
|
||||
enable_flashattn=False,
|
||||
enable_layernorm_kernel=False,
|
||||
enable_sequence_parallelism=False,
|
||||
):
|
||||
super().__init__()
|
||||
assert enable_sequence_parallelism is False, "Sequence parallelism is not supported in this version."
|
||||
self.pred_sigma = pred_sigma
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = in_channels * 2 if pred_sigma else in_channels
|
||||
|
|
|
|||
Loading…
Reference in a new issue