[feat] support for random sample

This commit is contained in:
zhengzangw 2024-06-14 08:50:40 +00:00
parent c93e846017
commit c5e0475749
2 changed files with 33 additions and 12 deletions

View file

@ -102,7 +102,7 @@ In this stage, we collect 2M video clips with 5k hours from all kinds of sources
While MiraData and Vript have captions from GPT, we use [PLLaVA](https://github.com/magic-research/PLLaVA) to caption the rest ones. We use 4 frames for each video captioning, and choose the 13B version. PLLaVA is more efficient and better finetuned for video than LLaVA. The accelerated PLLaVA is released in our tools.
We mainly train on 480p and 720p in this stage. The training config locates in [stage3.py](/configs/opensora-v1-2/train/stage3.py). We train the model for 15k steps, which is approximately 2 epochs.
We mainly train on 480p and 720p in this stage, with a mask ratio 25%. The training config locates in [stage3.py](/configs/opensora-v1-2/train/stage3.py). We train the model for 15k steps, which is approximately 2 epochs.
## Easy and effective model conditioning

View file

@ -201,14 +201,21 @@ def dframe_to_frame(num):
OPENAI_CLIENT = None
SYS_PROMPTS = None
SYS_PROMPTS_PATH = "assets/texts/t2v_pllava.txt"
SYS_RPOMPTS_TEMPLATE = """
REFINE_PROMPTS = None
REFINE_PROMPTS_PATH = "assets/texts/t2v_pllava.txt"
REFINE_PROMPTS_TEMPLATE = """
You need to refine user's input prompt. The user's input prompt is used for video generation task. You need to refine the user's prompt to make it more suitable for the task. Here are some examples of refined prompts:
{}
The refined prompt should pay attention to all objects in the video. The description should be useful for AI to re-generate the video. The description should be no more than six sentences. The refined prompt should be in English.
"""
RANDOM_PROMPTS = None
RANDOM_PROMPTS_TEMPLATE = """
You need to generate one input prompt for video generation task. The prompt should be suitable for the task. Here are some examples of refined prompts:
{}
The prompt should pay attention to all objects in the video. The description should be useful for AI to re-generate the video. The description should be no more than six sentences. The prompt should be in English.
"""
def get_openai_response(sys_prompt, usr_prompt, model="gpt-4o"):
@ -235,13 +242,23 @@ def get_openai_response(sys_prompt, usr_prompt, model="gpt-4o"):
return completion.choices[0].message.content
def refine_prompt_by_openai(prompt):
global SYS_PROMPTS
if SYS_PROMPTS is None:
examples = load_prompts(SYS_PROMPTS_PATH)
SYS_PROMPTS = SYS_RPOMPTS_TEMPLATE.format("\n".join(examples))
def get_random_prompt_by_openai():
global RANDOM_PROMPTS
if RANDOM_PROMPTS is None:
examples = load_prompts(REFINE_PROMPTS_PATH)
RANDOM_PROMPTS = RANDOM_PROMPTS_TEMPLATE.format("\n".join(examples))
response = get_openai_response(SYS_PROMPTS, prompt)
response = get_openai_response(RANDOM_PROMPTS, "Generate one example.")
return response
def refine_prompt_by_openai(prompt):
global REFINE_PROMPTS
if REFINE_PROMPTS is None:
examples = load_prompts(REFINE_PROMPTS_PATH)
REFINE_PROMPTS = REFINE_PROMPTS_TEMPLATE.format("\n".join(examples))
response = get_openai_response(REFINE_PROMPTS, prompt)
return response
@ -249,8 +266,12 @@ def refine_prompts_by_openai(prompts):
new_prompts = []
for prompt in prompts:
try:
new_prompt = refine_prompt_by_openai(prompt)
print(f"[Info] Refine prompt: {prompt} -> {new_prompt}")
if prompt.strip() == "":
new_prompt = get_random_prompt_by_openai()
print(f"[Info] Empty prompt detected, generate random prompt: {new_prompt}")
else:
new_prompt = refine_prompt_by_openai(prompt)
print(f"[Info] Refine prompt: {prompt} -> {new_prompt}")
new_prompts.append(new_prompt)
except Exception as e:
print(f"[Warning] Failed to refine prompt: {prompt} due to {e}")