Merge pull request #158 from hpcaitech/vbench_trials

Vbench trials
This commit is contained in:
Zheng Zangwei (Alex Zheng) 2024-06-24 15:02:12 +08:00 committed by GitHub
commit 54970678bb
7 changed files with 191 additions and 35 deletions

View file

@ -1,5 +1,6 @@
# Commands
- [Config](#Config)
- [Inference](#inference)
- [Inference with Open-Sora 1.2](#inference-with-open-sora-12)
- [Inference with Open-Sora 1.1](#inference-with-open-sora-11)
@ -12,6 +13,35 @@
- [Training Hyperparameters](#training-hyperparameters)
- [Search batch size for buckets](#search-batch-size-for-buckets)
## Config
Note that currently our model loading for vae and diffusion model supports two types:
* load from local file path
* load from huggingface
Our config supports loading from huggingface by default.
If you wish to load from a local path, you need to set `force_huggingface=True`, for instance:
```python
# for vae
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="/root/commonData/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
force_huggingface=True, # NOTE: set here
)
# for diffusion model
model = dict(
type="STDiT3-XL/2",
from_pretrained="/root/commonData/OpenSora-STDiT-v3",
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
force_huggingface=True, # NOTE: set here
)
```
## Inference
You can modify corresponding config files to change the inference settings. See more details [here](/docs/structure.md#inference-config-demos).

View file

@ -48,8 +48,14 @@ First, generate the relevant videos with the following commands:
```bash
# vbench task, if evaluation all set start_index to 0, end_index to 2000
bash eval/sample.sh /path/to/ckpt num_frames model_name_for_log -4 start_index end_index
# Alternatively, launch 8 jobs at once (you must read the script to understand the details)
bash eval/vbench/launch.sh /path/to/ckpt num_frames model_name
# in addition, you can specify resolution, aspect ratio, sampling steps, flow, and llm-refine
bash eval/vbench/launch.sh /path/to/ckpt num_frames model_name res_value aspect_ratio_value steps_value flow_value llm_refine_value
# for example
# bash eval/vbench/launch.sh /mnt/jfs-hdd/sora/checkpoints/outputs/042-STDiT3-XL-2/epoch1-global_step16200_llm_refine/ema.pt 51 042-STDiT3-XL-2 240p 9:16 30 2 True
```
After generation, install the VBench package following our [installation](../docs/installation.md)'s sections of "Evaluation Dependencies". Then, run the following commands to evaluate the generated samples.
@ -89,6 +95,15 @@ python eval/vbench_i2v/tabulate_vbench_i2v_scores.py path/to/videos/folder path/
```
Similarly as VBench, you can specify resolution, aspect ratio, sampling steps, flow, and llm-refine
```bash
bash eval/vbench_i2v/launch.sh /path/to/ckpt num_frames model_name_for_log res_value aspect_ratio_value steps_value flow_value llm_refine_value
# for example
# bash eval/vbench_i2v/launch.sh /mnt/jfs-hdd/sora/checkpoints/outputs/042-STDiT3-XL-2/epoch1-global_step16200_llm_refine/ema.pt 51 042-STDiT3-XL-2 240p 9:16 30 2 True
# if no flow control, use "None" instead
```
## VAE
Install the dependencies package following our [installation](../docs/installation.md)'s s sections of "Evaluation Dependencies". Then, run the following evaluation command:

View file

@ -9,6 +9,10 @@ VBENCH_END_INDEX=$6
VBENCH_RES=$7
VBENCH_ASP_RATIO=$8
NUM_SAMPLING_STEPS=$9
FLOW=${10}
LLM_REFINE=${11}
echo "NUM_FRAMES=${NUM_FRAMES}"
if [ -z "${NUM_FRAMES}" ]; then
@ -238,10 +242,38 @@ function run_vbench() {
--image-size $VBENCH_H $VBENCH_W \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_dimension.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
if [ -z ${NUM_SAMPLING_STEPS} ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_dimension.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
if [ -z ${FLOW} ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_dimension.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
if [ -z ${LLM_REFINE} ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_dimension.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
if [ "${FLOW}" = "None" ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_dimension.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --llm-refine ${LLM_REFINE} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_dimension.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} --llm-refine ${LLM_REFINE} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
fi
fi
fi
fi
fi
}
@ -255,16 +287,41 @@ function run_vbench_i2v() {
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--image-size $VBENCH_I2V_H $VBENCH_I2V_W \
--start-index $1 --end-index $2 \
--num-frames $NUM_FRAMES --batch-size $VBENCH_BS
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \
--start-index $1 --end-index $2 \
--num-frames $NUM_FRAMES --batch-size $VBENCH_BS
if [ -z ${NUM_SAMPLING_STEPS} ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
if [ -z ${FLOW} ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
if [ -z ${LLM_REFINE} ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
if [ "${FLOW}" = "None" ]; then
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --llm-refine ${LLM_REFINE} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
else
eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
--prompt-path assets/texts/VBench/all_i2v.txt \
--resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO --num-sampling-steps ${NUM_SAMPLING_STEPS} --flow ${FLOW} --llm-refine ${LLM_REFINE} \
--batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
fi
fi
fi
fi
fi
}
### Main

View file

@ -8,24 +8,30 @@ from vbench import VBench
full_info_path = "eval/vbench/VBench_full_info.json"
dimensions = [
# Quality Score
"subject_consistency",
"background_consistency",
"motion_smoothness",
"dynamic_degree",
"aesthetic_quality",
"imaging_quality",
"temporal_flickering",
# Semantic Score
"object_class",
"multiple_objects",
"color",
"spatial_relationship",
"scene",
"temporal_style",
"overall_consistency",
"human_action",
"appearance_style",
# a: 10min
"subject_consistency", # 4min
"imaging_quality", # 6min
# b: 12min
"background_consistency", # 2min
"motion_smoothness", # 5min
"overall_consistency", # 2min
"human_action", # 3min
# c: 14min
"multiple_objects", # 14min
# d: 14min
"spatial_relationship", # 14min
# e: 12min
"object_class", # 12min
# f: 12min
"color", # 12min
# g: 10.5min
"aesthetic_quality", # 2.5min
"appearance_style", # 6min
"temporal_flickering", # 2min
# h: 9min
"scene", # 3min
"temporal_style", # 2min
"dynamic_degree", # 4min
]

View file

@ -6,6 +6,10 @@ MODEL_NAME=$3
RES=$4
ASP_RATIO=$5
NUM_SAMPLING_STEPS=$6
FLOW=$7
LLM_REFINE=$8
if [[ $CKPT == *"ema"* ]]; then
parentdir=$(dirname $CKPT)
CKPT_BASE=$(basename $parentdir)_ema
@ -20,11 +24,36 @@ TASK_ID_LIST=(4a 4b 4c 4d 4e 4f 4g 4h) # for log records only
START_INDEX_LIST=(0 120 240 360 480 600 720 840)
END_INDEX_LIST=(120 240 360 480 600 720 840 2000)
## Modify the following to run on multiple machines for faster results
## 720p will take quite long on a single machine
# START_INDEX_LIST=(60 180 300 420 540 660 780 900)
# END_INDEX_LIST=(120 240 360 480 600 720 840 2000)
# LOG_BASE=$(dirname $CKPT)/eval/last_60
# mkdir -p ${LOG_BASE}
# echo "Logging to $LOG_BASE"
for i in "${!GPUS[@]}"; do
if [ -z ${RES} ] || [ -z ${ASP_RATIO} ] ;
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
if [ -z ${NUM_SAMPLING_STEPS} ];
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
if [ -z ${FLOW} ];
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
if [ -z ${LLM_REFINE} ];
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW} ${LLM_REFINE}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
fi
fi
fi
fi
done

View file

@ -7,11 +7,10 @@ mkdir -p $LOG_BASE
echo "Logging to $LOG_BASE"
GPUS=(0 1 2 3 4 5 6 7)
START_INDEX_LIST=(0 2 4 6 8 10 12 14)
END_INDEX_LIST=(2 4 6 8 10 12 14 16)
START_INDEX_LIST=(0 2 6 7 8 9 10 13)
END_INDEX_LIST=(2 6 7 8 9 10 13 16)
TASK_ID_LIST=(calc_vbench_a calc_vbench_b calc_vbench_c calc_vbench_d calc_vbench_e calc_vbench_f calc_vbench_g calc_vbench_h) # for log records only
for i in "${!GPUS[@]}"; do
CUDA_VISIBLE_DEVICES=${GPUS[i]} python eval/vbench/calc_vbench.py $VIDEO_DIR $CKPT_DIR --start ${START_INDEX_LIST[i]} --end ${END_INDEX_LIST[i]} > ${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
done

View file

@ -6,6 +6,10 @@ MODEL_NAME=$3
RES=$4
ASP_RATIO=$5
NUM_SAMPLING_STEPS=$6
FLOW=$7
LLM_REFINE=$8
if [[ $CKPT == *"ema"* ]]; then
parentdir=$(dirname $CKPT)
CKPT_BASE=$(basename $parentdir)_ema
@ -20,11 +24,27 @@ TASK_ID_LIST=(5a 5b 5c 5d 5e 5f 5g 5h) # for log records only
START_INDEX_LIST=(0 140 280 420 560 700 840 980)
END_INDEX_LIST=(140 280 420 560 700 840 980 2000)
for i in "${!GPUS[@]}"; do
if [ -z ${RES} ] || [ -z ${ASP_RATIO} ] ;
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
if [ -z ${NUM_SAMPLING_STEPS} ];
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
if [ -z ${FLOW} ];
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
if [ -z ${LLM_REFINE} ];
then
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
else
CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -5 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO} ${NUM_SAMPLING_STEPS} ${FLOW} ${LLM_REFINE}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
fi
fi
fi
fi
done