diff --git a/eval/loss/eval_loss.py b/eval/loss/eval_loss.py index 8a97f0e..f37f895 100644 --- a/eval/loss/eval_loss.py +++ b/eval/loss/eval_loss.py @@ -100,7 +100,13 @@ def main(): return dataloader, num_steps_per_epoch, num_batch evaluation_losses = {} - for res, t_bucket in bucket_config.items(): + start = cfg.start_index if "start_index" in cfg else 0 + end = cfg.end_index if "end_index" in cfg else len(bucket_config) + for i, res in enumerate(bucket_config): + if i < start or i >= end: # skip task + continue + + t_bucket = bucket_config[res] for num_frames, (_, batch_size) in t_bucket.items(): if batch_size is None: continue diff --git a/eval/loss/launch.sh b/eval/loss/launch.sh index 67d230a..a7a4b84 100644 --- a/eval/loss/launch.sh +++ b/eval/loss/launch.sh @@ -17,10 +17,13 @@ mkdir -p $LOG_BASE echo "Logging to $LOG_BASE" -GPUS=(1 2 3 4 5) +GPUS=(3 4 5 6 7) RESOLUTION=(144p 240p 360p 480p 720p) -CUDA_VISIBLE_DEVICES=0 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img.log 2>&1 & +CUDA_VISIBLE_DEVICES=0 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img_0.log --start-index 0 --end-index 3 2>&1 & +CUDA_VISIBLE_DEVICES=1 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img_1.log --start-index 3 --end-index 5 2>&1 & +CUDA_VISIBLE_DEVICES=2 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img_2.log --start-index 5 2>&1 & + for i in "${!GPUS[@]}"; do CUDA_VISIBLE_DEVICES=${GPUS[i]} $CMD --data-path $VID_PATH --ckpt-path $CKPT_PATH --resolution ${RESOLUTION[i]} >${LOG_BASE}/${RESOLUTION[i]}_vid.log 2>&1 & diff --git a/eval/loss/tabulate_rl_loss.py b/eval/loss/tabulate_rl_loss.py index fddd295..4c623c0 100644 --- a/eval/loss/tabulate_rl_loss.py +++ b/eval/loss/tabulate_rl_loss.py @@ -23,8 +23,16 @@ if __name__ == "__main__": args = parse_args() files = os.listdir(args.log_dir) - # files = ["img.log", "video_144p.log", "video_240p.log", "video_360p.log", "video_480p.log", "video_720p.log"] - files = ["img.log", "144p_vid.log", "240p_vid.log", "360p_vid.log", "480p_vid.log", "720p_vid.log"] + files = [ + "img_0.log", + "img_1.log", + "img_2.log", + "144p_vid.log", + "240p_vid.log", + "360p_vid.log", + "480p_vid.log", + "720p_vid.log", + ] loss_info = {} diff --git a/eval/sample.sh b/eval/sample.sh index 8efeb23..cd765ee 100644 --- a/eval/sample.sh +++ b/eval/sample.sh @@ -6,6 +6,8 @@ MODEL_NAME=$3 VBENCH_START_INDEX=$5 VBENCH_END_INDEX=$6 +VBENCH_RES=$7 +VBENCH_ASP_RATIO=$8 echo "NUM_FRAMES=${NUM_FRAMES}" @@ -137,6 +139,29 @@ function run_video_g() { # 15min eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 1358 600 --sample-name 720p_1_2 # 2:1 eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 600 1358 --sample-name 720p_2_1 + + # add motion score + eval $CMD --ckpt-path $CKPT --save-dir $OUTPUT --num-frames $NUM_FRAMES --resolution 720p --sample-name motion --prompt \ + \"A stylish woman walking in the street of Tokyo.\"\ + \"A stylish woman walking in the street of Tokyo. motion score: 0.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 2.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 4.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 6.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 10.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 20.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 30.0\" \ + \"A stylish woman walking in the street of Tokyo. motion score: 40.0\" + + # add aes score + eval $CMD --ckpt-path $CKPT --save-dir $OUTPUT --num-frames $NUM_FRAMES --resolution 720p --sample-name aes --prompt \ + \"A stylish woman walking in the street of Tokyo.\"\ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 4.0\" \ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 4.5\" \ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 5.0\" \ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 5.5\" \ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 6.0\" \ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 6.5\" \ + \"A stylish woman walking in the street of Tokyo. aesthetic score: 7.0\" } function run_video_h() { # 61min @@ -171,9 +196,18 @@ VBENCH_H=240 VBENCH_W=426 function run_vbench() { - eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ - --prompt-path assets/texts/VBench/all_dimension.txt \ - --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --image-size $VBENCH_H $VBENCH_W --start-index $1 --end-index $2 + if [ -z ${VBENCH_RES} ] || [ -z ${VBENCH_ASP_RATIO} ] ; + then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --image-size $VBENCH_H $VBENCH_W \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + else + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_dimension.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \ + --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2 + fi } # vbench-i2v has 1120 samples @@ -181,11 +215,22 @@ function run_vbench() { VBENCH_I2V_H=256 VBENCH_I2V_W=256 -function run_vbenck_i2v() { - eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ - --prompt-path assets/texts/VBench/all_i2v.txt \ - --start-index $1 --end-index $2 \ - --num-frames $NUM_FRAMES --image-size $VBENCH_I2V_H $VBENCH_I2V_W --batch-size $VBENCH_BS +function run_vbench_i2v() { + if [ -z ${VBENCH_RES} ] || [ -z ${VBENCH_ASP_RATIO} ] ; + then + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --image-size $VBENCH_I2V_H $VBENCH_I2V_W \ + --start-index $1 --end-index $2 \ + --num-frames $NUM_FRAMES --batch-size $VBENCH_BS + else + eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \ + --prompt-path assets/texts/VBench/all_i2v.txt \ + --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \ + --start-index $1 --end-index $2 \ + --num-frames $NUM_FRAMES --batch-size $VBENCH_BS + fi + } ### Main @@ -245,7 +290,7 @@ for arg in "$@"; do then echo "need to set start_index and end_index" else - run_vbenck_i2v $VBENCH_START_INDEX $VBENCH_END_INDEX + run_vbench_i2v $VBENCH_START_INDEX $VBENCH_END_INDEX fi fi done diff --git a/eval/vbench/launch.sh b/eval/vbench/launch.sh index 0cb342e..eedd9b3 100644 --- a/eval/vbench/launch.sh +++ b/eval/vbench/launch.sh @@ -3,7 +3,8 @@ CKPT=$1 NUM_FRAMES=$2 MODEL_NAME=$3 - +RES=$4 +ASP_RATIO=$5 if [[ $CKPT == *"ema"* ]]; then parentdir=$(dirname $CKPT) @@ -20,5 +21,10 @@ START_INDEX_LIST=(0 120 240 360 480 600 720 840) END_INDEX_LIST=(120 240 360 480 600 720 840 2000) for i in "${!GPUS[@]}"; do - CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + if [ -z ${RES} ] || [ -z ${ASP_RATIO} ] ; + then + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + else + CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 & + fi done diff --git a/opensora/utils/config_utils.py b/opensora/utils/config_utils.py index 503e06b..7a77125 100644 --- a/opensora/utils/config_utils.py +++ b/opensora/utils/config_utils.py @@ -51,7 +51,7 @@ def parse_args(training=False): parser.add_argument("--fps", default=None, type=int, help="fps") parser.add_argument("--image-size", default=None, type=int, nargs=2, help="image size") parser.add_argument("--frame-interval", default=None, type=int, help="frame interval") - parser.add_argument("--aspect-ratio", default=None, type=float, help="aspect ratio") + parser.add_argument("--aspect-ratio", default=None, type=str, help="aspect ratio") # hyperparameters parser.add_argument("--num-sampling-steps", default=None, type=int, help="sampling steps")