From 30e276166cbb3567b04db89ef0a30a297c7050df Mon Sep 17 00:00:00 2001 From: zhengzangw Date: Mon, 17 Jun 2024 13:42:27 +0000 Subject: [PATCH 1/2] update --- README.md | 11 ++++++++++- opensora/utils/config_utils.py | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1bbedfc..0d26ea0 100644 --- a/README.md +++ b/README.md @@ -211,7 +211,7 @@ docker run -ti --gpus all -v {MOUNT_DIR}:/data opensora | Model | Model Size | Data | #iterations | Batch Size | URL | | --------- | ---------- | ---- | ----------- | ---------- | ------------------------------------------------------------- | | Diffusion | 1.1B | 30M | 70k | Dynamic | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v3) | -| VAE | 384M | 3M | 1.18M | 8 | [:link:](https://huggingface.co/hpcai-tech/OpenSora-VAE-v1.2) | +| VAE | 384M | 3M | 1.18M | 8 | [:link:](https://huggingface.co/hpcai-tech/OpenSora-VAE-v1.2) | See our **[report 1.2](docs/report_03.md)** for more infomation. @@ -327,6 +327,15 @@ python scripts/inference.py configs/opensora-v1-2/inference/sample.py \ For image to video generation and other functionalities, the API is compatible with Open-Sora 1.1. See [here](docs/commands.md) for more instructions. +If your installation do not contain `apex` and `flash-attn`, you need to disable them in the config file, or via the folowing command. + +```bash +python scripts/inference.py configs/opensora-v1-2/inference/sample.py \ + --num-frames 4s --resolution 720p \ + --layernorm-kernel False --flash-attn False \ + --prompt "a beautiful waterfall" +``` + ### GPT-4o Prompt Refinement We find that GPT-4o can refine the prompt and improve the quality of the generated video. With this feature, you can also use other language (e.g., Chinese) as the prompt. To enable this feature, you need prepare your openai api key in the environment: diff --git a/opensora/utils/config_utils.py b/opensora/utils/config_utils.py index f854124..f20138b 100644 --- a/opensora/utils/config_utils.py +++ b/opensora/utils/config_utils.py @@ -28,6 +28,7 @@ def parse_args(training=False): parser.add_argument("--layernorm-kernel", default=None, type=str2bool, help="enable layernorm kernel") parser.add_argument("--resolution", default=None, type=str, help="multi resolution") parser.add_argument("--data-path", default=None, type=str, help="path to data csv") + parser.add_argument("--dtype", default=None, type=str, help="data type") # ====================================================== # Inference From 9a5ba168fb9cea4f11f458c74146eae07eee820b Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Mon, 17 Jun 2024 21:45:58 +0800 Subject: [PATCH 2/2] [inference] renamed duplicated variables (#153) --- scripts/inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/inference.py b/scripts/inference.py index 5e02356..2a0f4a0 100644 --- a/scripts/inference.py +++ b/scripts/inference.py @@ -221,11 +221,11 @@ def main(): # recover the prompt list batched_prompt_segment_list = [] - start_idx = 0 + segment_start_idx = 0 all_prompts = broadcast_obj_list[0] for num_segment in prompt_segment_length: - batched_prompt_segment_list.append(all_prompts[start_idx : start_idx + num_segment]) - start_idx += num_segment + batched_prompt_segment_list.append(all_prompts[segment_start_idx : segment_start_idx + num_segment]) + segment_start_idx += num_segment # 2. append score for idx, prompt_segment_list in enumerate(batched_prompt_segment_list):