From 03198a12a7c9a4d9c21beb559398f2bb49cd9ad5 Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Mon, 17 Jun 2024 03:13:32 +0000 Subject: [PATCH 1/8] format --- eval/sample.sh | 33 +++++++++++++++++++++++++++++---- eval/vbench/launch.sh | 21 ++++++++++++++++++++- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/eval/sample.sh b/eval/sample.sh index 0123309..1c52506 100644 --- a/eval/sample.sh +++ b/eval/sample.sh @@ -9,6 +9,10 @@ VBENCH_END_INDEX=$6 VBENCH_RES=$7 VBENCH_ASP_RATIO=$8 +NUM_SAMPLING_STEPS=$9 +FLOW=${10} +LLM_REFINE=${11} + echo "NUM_FRAMES=${NUM_FRAMES}" if [ -z "${NUM_FRAMES}" ]; then @@ -238,10 +242,31 @@ function run_vbench() { --image-size $VBENCH_H $VBENCH_W \ --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 else - eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ - --prompt-path assets/texts/VBench/all_dimension.txt \ - --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \ - --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + if [ -z ${NUM_SAMPLING_STEPS} ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + if [ -z ${FLOW} ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + if [ -z ${LLM_REFINE} ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} --llm-refine ${LLM_REFINE} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + fi + fi + fi fi } diff --git a/eval/vbench/launch.sh b/eval/vbench/launch.sh index eedd9b3..c37526d 100644 --- a/eval/vbench/launch.sh +++ b/eval/vbench/launch.sh @@ -6,6 +6,10 @@ MODEL_NAME=$3 RES=$4 ASP_RATIO=$5 +NUM_SAMPLING_STEPS=$6 +FLOW=$7 +LLM_REFINE=$8 + if [[ $CKPT == *"ema"* ]]; then parentdir=$(dirname $CKPT) CKPT_BASE=$(basename $parentdir)_ema @@ -25,6 +29,21 @@ for i in "${!GPUS[@]}"; do then CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & else - CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + if [ -z ${NUM_SAMPLING_STEPS} ]; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + if [ -z ${FLOW} ]; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + if [ -z ${LLM_REFINE} ]; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW} ${LLM_REFINE}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + fi + fi + fi fi done From 1573dbbc01756fe9c05d389ef2859cd574405062 Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Mon, 17 Jun 2024 03:50:04 +0000 Subject: [PATCH 2/8] format --- eval/sample.sh | 15 ++++++--- eval/vbench/calc_vbench.py | 65 ++++++++++++++++++++++---------------- eval/vbench/launch_calc.sh | 5 ++- 3 files changed, 50 insertions(+), 35 deletions(-) diff --git a/eval/sample.sh b/eval/sample.sh index 1c52506..eff1368 100644 --- a/eval/sample.sh +++ b/eval/sample.sh @@ -260,10 +260,17 @@ function run_vbench() { --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} \ --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 else - eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ - --prompt-path assets/texts/VBench/all_dimension.txt \ - --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} --llm-refine ${LLM_REFINE} \ - --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + if [ "${FLOW}" = "None" ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --llm-refine ${LLM_REFINE} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} --llm-refine ${LLM_REFINE} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + fi fi fi fi diff --git a/eval/vbench/calc_vbench.py b/eval/vbench/calc_vbench.py index b2505bf..e5570a1 100644 --- a/eval/vbench/calc_vbench.py +++ b/eval/vbench/calc_vbench.py @@ -1,37 +1,46 @@ import argparse import os -from vbench import VBench -import torch import time +import torch + +from vbench import VBench + full_info_path = "eval/vbench/VBench_full_info.json" dimensions = [ - # Quality Score - "subject_consistency", - "background_consistency", - "motion_smoothness", - "dynamic_degree", - "aesthetic_quality", - "imaging_quality", - "temporal_flickering", - # Semantic Score - "object_class", - "multiple_objects", - "color", - "spatial_relationship", - "scene", - "temporal_style", - "overall_consistency", - "human_action", - "appearance_style", + # a: 10min + "subject_consistency", # 4min + "imaging_quality", # 6min + # b: 12min + "background_consistency", # 2min + "motion_smoothness", # 5min + "overall_consistency", # 2min + "human_action", # 3min + # c: 14min + "multiple_objects", # 14min + # d: 14min + "spatial_relationship", # 14min + # e: 12min + "object_class", # 12min + # f: 12min + "color", # 12min + # g: 10.5min + "aesthetic_quality", # 2.5min + "appearance_style", # 6min + "temporal_flickering", # 2min + # h: 9min + "scene", # 3min + "temporal_style", # 2min + "dynamic_degree", # 4min ] + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("video_folder", type=str) # samples/samples..._vbench/eval parser.add_argument("model_ckpt", type=str) - parser.add_argument("--start", type=int, default=0) # start index of dimension to be evaluated - parser.add_argument("--end", type=int, default=-1) # start index of dimension to be evaluated + parser.add_argument("--start", type=int, default=0) # start index of dimension to be evaluated + parser.add_argument("--end", type=int, default=-1) # start index of dimension to be evaluated args = parser.parse_args() return args @@ -44,23 +53,23 @@ if __name__ == "__main__": video_path = args.video_folder kwargs = {} - kwargs['imaging_quality_preprocessing_mode'] = 'longer' # use VBench/evaluate.py default + kwargs["imaging_quality_preprocessing_mode"] = "longer" # use VBench/evaluate.py default start_time = time.time() # NOTE: important to use torch.device("cuda"), else will have issue with object_class third_party module my_VBench = VBench(torch.device("cuda"), full_info_path, output_dir) - if args.end == -1: # adjust end accordingly + if args.end == -1: # adjust end accordingly args.end = len(dimensions) - for dim in dimensions[args.start:args.end]: + for dim in dimensions[args.start : args.end]: my_VBench.evaluate( videos_path=video_path, name=dim, local=False, read_frame=False, dimension_list=[dim], - mode='vbench_standard', - **kwargs + mode="vbench_standard", + **kwargs, ) - print("Runtime: %s seconds " % (time.time() - start_time)) \ No newline at end of file + print("Runtime: %s seconds " % (time.time() - start_time)) diff --git a/eval/vbench/launch_calc.sh b/eval/vbench/launch_calc.sh index 53114b9..9f14ce5 100644 --- a/eval/vbench/launch_calc.sh +++ b/eval/vbench/launch_calc.sh @@ -7,11 +7,10 @@ mkdir -p $LOG_BASE echo "Logging to $LOG_BASE" GPUS=(0 1 2 3 4 5 6 7) -START_INDEX_LIST=(0 2 4 6 8 10 12 14) -END_INDEX_LIST=(2 4 6 8 10 12 14 16) +START_INDEX_LIST=(0 2 6 7 8 9 10 13) +END_INDEX_LIST=(2 6 7 8 9 10 13 16) TASK_ID_LIST=(calc_vbench_a calc_vbench_b calc_vbench_c calc_vbench_d calc_vbench_e calc_vbench_f calc_vbench_g calc_vbench_h) # for log records only - for i in "${!GPUS[@]}"; do CUDA_VISIBLE_DEVICES=${GPUS[i]} python eval/vbench/calc_vbench.py $VIDEO_DIR $CKPT_DIR --start ${START_INDEX_LIST[i]} --end ${END_INDEX_LIST[i]} > ${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & done From d4db36f7820b456b0e5a5e77c44799695b4abf0a Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Mon, 17 Jun 2024 06:35:59 +0000 Subject: [PATCH 3/8] save update --- eval/vbench/launch.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/eval/vbench/launch.sh b/eval/vbench/launch.sh index c37526d..f3a5b62 100644 --- a/eval/vbench/launch.sh +++ b/eval/vbench/launch.sh @@ -24,6 +24,14 @@ TASK_ID_LIST=(4a 4b 4c 4d 4e 4f 4g 4h) # for log records only START_INDEX_LIST=(0 120 240 360 480 600 720 840) END_INDEX_LIST=(120 240 360 480 600 720 840 2000) +# START_INDEX_LIST=(60 180 300 420 540 660 780 900) +# END_INDEX_LIST=(120 240 360 480 600 720 840 2000) +# LOG_BASE=$(dirname $CKPT)/eval/last_60 +# mkdir -p ${LOG_BASE} +# echo "Logging to $LOG_BASE" + + + for i in "${!GPUS[@]}"; do if [ -z ${RES} ] || [ -z ${ASP_RATIO} ] ; then From 8fe96e4b886c53024a3edcfb46052ec0321903df Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Thu, 20 Jun 2024 04:03:13 +0000 Subject: [PATCH 4/8] more options for vbench --- eval/README.md | 6 ++++++ eval/vbench/launch.sh | 2 ++ 2 files changed, 8 insertions(+) diff --git a/eval/README.md b/eval/README.md index 7ae6e32..652d3a3 100644 --- a/eval/README.md +++ b/eval/README.md @@ -48,8 +48,14 @@ First, generate the relevant videos with the following commands: ```bash # vbench task, if evaluation all set start_index to 0, end_index to 2000 bash eval/sample.sh /path/to/ckpt num_frames model_name_for_log -4 start_index end_index + # Alternatively, launch 8 jobs at once (you must read the script to understand the details) bash eval/vbench/launch.sh /path/to/ckpt num_frames model_name + +# in addition, you can specify resolution, aspect ratio, sampling steps, flow, and llm-refine +bash eval/vbench/launch.sh /path/to/ckpt num_frames model_name res_value aspect_ratio_value steps_value flow_value llm_refine_value +# for example +# bash eval/vbench/launch.sh /mnt/jfs-hdd/sora/checkpoints/outputs/042-STDiT3-XL-2/epoch1-global_step16200_llm_refine/ema.pt 51 042-STDiT3-XL-2 240p 9:16 30 2 True ``` After generation, install the VBench package following our [installation](../docs/installation.md)'s sections of "Evaluation Dependencies". Then, run the following commands to evaluate the generated samples. diff --git a/eval/vbench/launch.sh b/eval/vbench/launch.sh index f3a5b62..e7c1165 100644 --- a/eval/vbench/launch.sh +++ b/eval/vbench/launch.sh @@ -24,6 +24,8 @@ TASK_ID_LIST=(4a 4b 4c 4d 4e 4f 4g 4h) # for log records only START_INDEX_LIST=(0 120 240 360 480 600 720 840) END_INDEX_LIST=(120 240 360 480 600 720 840 2000) +## Modify the following to run on multiple machines for faster results +## 720p will take quite long on a single machine # START_INDEX_LIST=(60 180 300 420 540 660 780 900) # END_INDEX_LIST=(120 240 360 480 600 720 840 2000) # LOG_BASE=$(dirname $CKPT)/eval/last_60 From 98958b8e56b64ab7450d6bbafd62b2074a25bd1f Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Fri, 21 Jun 2024 09:18:09 +0000 Subject: [PATCH 5/8] format --- eval/README.md | 9 +++++++++ eval/sample.sh | 41 +++++++++++++++++++++++++++++++-------- eval/vbench_i2v/launch.sh | 21 +++++++++++++++++++- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/eval/README.md b/eval/README.md index 652d3a3..261c21b 100644 --- a/eval/README.md +++ b/eval/README.md @@ -95,6 +95,15 @@ python eval/vbench_i2v/tabulate_vbench_i2v_scores.py path/to/videos/folder path/ ``` +Similarly as VBench, you can specify resolution, aspect ratio, sampling steps, flow, and llm-refine + +```bash +bash eval/vbench_i2v/launch.sh /path/to/ckpt num_frames model_name_for_log res_value aspect_ratio_value steps_value flow_value llm_refine_value +# for example +# bash eval/vbench_i2v/launch.sh /mnt/jfs-hdd/sora/checkpoints/outputs/042-STDiT3-XL-2/epoch1-global_step16200_llm_refine/ema.pt 51 042-STDiT3-XL-2 240p 9:16 30 2 True +# if no flow control, use "None" instead +``` + ## VAE Install the dependencies package following our [installation](../docs/installation.md)'s s sections of "Evaluation Dependencies". Then, run the following evaluation command: diff --git a/eval/sample.sh b/eval/sample.sh index eff1368..0f28550 100644 --- a/eval/sample.sh +++ b/eval/sample.sh @@ -287,16 +287,41 @@ function run_vbench_i2v() { eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ --prompt-path assets/texts/VBench/all_i2v.txt \ --image-size $VBENCH_I2V_H $VBENCH_I2V_W \ - --start-index $1 --end-index $2 \ - --num-frames $NUM_FRAMES --batch-size $VBENCH_BS + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 else - eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ - --prompt-path assets/texts/VBench/all_i2v.txt \ - --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \ - --start-index $1 --end-index $2 \ - --num-frames $NUM_FRAMES --batch-size $VBENCH_BS + if [ -z ${NUM_SAMPLING_STEPS} ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + if [ -z ${FLOW} ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + if [ -z ${LLM_REFINE} ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + if [ "${FLOW}" = "None" ]; then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --llm-refine ${LLM_REFINE} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} --llm-refine ${LLM_REFINE} \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + fi + fi + fi + fi fi - } ### Main diff --git a/eval/vbench_i2v/launch.sh b/eval/vbench_i2v/launch.sh index d8eea1d..b972acc 100644 --- a/eval/vbench_i2v/launch.sh +++ b/eval/vbench_i2v/launch.sh @@ -6,6 +6,10 @@ MODEL_NAME=$3 RES=$4 ASP_RATIO=$5 +NUM_SAMPLING_STEPS=$6 +FLOW=$7 +LLM_REFINE=$8 + if [[ $CKPT == *"ema"* ]]; then parentdir=$(dirname $CKPT) CKPT_BASE=$(basename $parentdir)_ema @@ -25,6 +29,21 @@ for i in "${!GPUS[@]}"; do then CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & else - CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + if [ -z ${NUM_SAMPLING_STEPS} ]; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + if [ -z ${FLOW} ]; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + if [ -z ${LLM_REFINE} ]; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW} ${LLM_REFINE}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + fi + fi + fi fi done From e581319f37e8c851cc2f3676928dc2f5d60cdaf1 Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Fri, 21 Jun 2024 09:44:38 +0000 Subject: [PATCH 6/8] add instruction for force huggingface --- docs/commands.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/commands.md b/docs/commands.md index 2c948de..d982db8 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -1,5 +1,6 @@ # Commands +- [Config](#Config) - [Inference](#inference) - [Inference with Open-Sora 1.2](#inference-with-open-sora-12) - [Inference with Open-Sora 1.1](#inference-with-open-sora-11) @@ -12,6 +13,35 @@ - [Training Hyperparameters](#training-hyperparameters) - [Search batch size for buckets](#search-batch-size-for-buckets) +## Config +Note that currently our model loading for vae and diffusion model supports two types: + +* load from local file path +* load from huggingface + +Our config supports loading from huggingface by default. +If you wish to load from a local path, you need to set `force_huggingface=True`, for instance: + +```python +# for vae +vae = dict( + type="OpenSoraVAE_V1_2", + from_pretrained="/root/commonData/OpenSora-VAE-v1.2", + micro_frame_size=17, + micro_batch_size=4, + force_huggingface=True, # NOTE: set here +) +# for diffusion model +model = dict( + type="STDiT3-XL/2", + from_pretrained="/root/commonData/OpenSora-STDiT-v3", + qk_norm=True, + enable_flash_attn=True, + enable_layernorm_kernel=True, + force_huggingface=True, # NOTE: set here +) +``` + ## Inference You can modify corresponding config files to change the inference settings. See more details [here](/docs/structure.md#inference-config-demos). From 70226e81e2e047cf7d31d83ea0b3a8355a3aa046 Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Mon, 24 Jun 2024 06:39:57 +0000 Subject: [PATCH 7/8] update --- eval/vbench_i2v/launch.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/eval/vbench_i2v/launch.sh b/eval/vbench_i2v/launch.sh index b972acc..30ee9c3 100644 --- a/eval/vbench_i2v/launch.sh +++ b/eval/vbench_i2v/launch.sh @@ -19,10 +19,16 @@ fi LOG_BASE=$(dirname $CKPT)/eval echo "Logging to $LOG_BASE" -GPUS=(0 1 2 3 4 5 6 7) -TASK_ID_LIST=(5a 5b 5c 5d 5e 5f 5g 5h) # for log records only -START_INDEX_LIST=(0 140 280 420 560 700 840 980) -END_INDEX_LIST=(140 280 420 560 700 840 980 2000) +# GPUS=(0 1 2 3 4 5 6 7) +# TASK_ID_LIST=(5a 5b 5c 5d 5e 5f 5g 5h) # for log records only +# START_INDEX_LIST=(0 140 280 420 560 700 840 980) +# END_INDEX_LIST=(140 280 420 560 700 840 980 2000) + +GPUS=(4 5 6 7) +TASK_ID_LIST=(5a 5b 5c 5d) # for log records only +START_INDEX_LIST=(0 140 280 420) +END_INDEX_LIST=(140 280 420 560) + for i in "${!GPUS[@]}"; do if [ -z ${RES} ] || [ -z ${ASP_RATIO} ] ; From 1e6c44238b0c42d0010df5fd5fb63c6654f7d4d8 Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Mon, 24 Jun 2024 06:48:08 +0000 Subject: [PATCH 8/8] vbench i2v passed test --- eval/vbench_i2v/launch.sh | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/eval/vbench_i2v/launch.sh b/eval/vbench_i2v/launch.sh index 30ee9c3..2b03309 100644 --- a/eval/vbench_i2v/launch.sh +++ b/eval/vbench_i2v/launch.sh @@ -19,15 +19,10 @@ fi LOG_BASE=$(dirname $CKPT)/eval echo "Logging to $LOG_BASE" -# GPUS=(0 1 2 3 4 5 6 7) -# TASK_ID_LIST=(5a 5b 5c 5d 5e 5f 5g 5h) # for log records only -# START_INDEX_LIST=(0 140 280 420 560 700 840 980) -# END_INDEX_LIST=(140 280 420 560 700 840 980 2000) - -GPUS=(4 5 6 7) -TASK_ID_LIST=(5a 5b 5c 5d) # for log records only -START_INDEX_LIST=(0 140 280 420) -END_INDEX_LIST=(140 280 420 560) +GPUS=(0 1 2 3 4 5 6 7) +TASK_ID_LIST=(5a 5b 5c 5d 5e 5f 5g 5h) # for log records only +START_INDEX_LIST=(0 140 280 420 560 700 840 980) +END_INDEX_LIST=(140 280 420 560 700 840 980 2000) for i in "${!GPUS[@]}"; do