format

2026-04-10 21:01:26 +02:00 · 2024-06-10 06:02:36 +00:00 · 2024-06-10 06:02:36 +00:00 · bd182d93dd
commit bd182d93dd
parent 0936b61de2
6 changed files with 85 additions and 17 deletions
--- a/eval/loss/eval_loss.py
+++ b/eval/loss/eval_loss.py
@ -100,7 +100,13 @@ def main():
        return dataloader, num_steps_per_epoch, num_batch

    evaluation_losses = {}
-    for res, t_bucket in bucket_config.items():
+    start = cfg.start_index if "start_index" in cfg else 0
+    end = cfg.end_index if "end_index" in cfg else len(bucket_config)
+    for i, res in enumerate(bucket_config):
+        if i < start or i >= end:  # skip task
+            continue
+
+        t_bucket = bucket_config[res]
        for num_frames, (_, batch_size) in t_bucket.items():
            if batch_size is None:
                continue
--- a/eval/loss/launch.sh
+++ b/eval/loss/launch.sh
@ -17,10 +17,13 @@ mkdir -p $LOG_BASE
 echo "Logging to $LOG_BASE"


-GPUS=(1 2 3 4 5)
+GPUS=(3 4 5 6 7)
 RESOLUTION=(144p 240p 360p 480p 720p)

-CUDA_VISIBLE_DEVICES=0 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img.log 2>&1 &
+CUDA_VISIBLE_DEVICES=0 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img_0.log --start-index 0 --end-index 3 2>&1 &
+CUDA_VISIBLE_DEVICES=1 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img_1.log --start-index 3 --end-index 5 2>&1 &
+CUDA_VISIBLE_DEVICES=2 $CMD --data-path $IMG_PATH --ckpt-path $CKPT_PATH >${LOG_BASE}/img_2.log --start-index 5 2>&1 &
+

 for i in "${!GPUS[@]}"; do
    CUDA_VISIBLE_DEVICES=${GPUS[i]} $CMD --data-path $VID_PATH --ckpt-path $CKPT_PATH --resolution ${RESOLUTION[i]} >${LOG_BASE}/${RESOLUTION[i]}_vid.log 2>&1 &
--- a/eval/loss/tabulate_rl_loss.py
+++ b/eval/loss/tabulate_rl_loss.py
@ -23,8 +23,16 @@ if __name__ == "__main__":
    args = parse_args()

    files = os.listdir(args.log_dir)
-    # files = ["img.log", "video_144p.log", "video_240p.log", "video_360p.log", "video_480p.log", "video_720p.log"]
-    files = ["img.log", "144p_vid.log", "240p_vid.log", "360p_vid.log", "480p_vid.log", "720p_vid.log"]
+    files = [
+        "img_0.log",
+        "img_1.log",
+        "img_2.log",
+        "144p_vid.log",
+        "240p_vid.log",
+        "360p_vid.log",
+        "480p_vid.log",
+        "720p_vid.log",
+    ]

    loss_info = {}

--- a/eval/sample.sh
+++ b/eval/sample.sh
@ -6,6 +6,8 @@ MODEL_NAME=$3

 VBENCH_START_INDEX=$5
 VBENCH_END_INDEX=$6
+VBENCH_RES=$7
+VBENCH_ASP_RATIO=$8

 echo "NUM_FRAMES=${NUM_FRAMES}"

@ -137,6 +139,29 @@ function run_video_g() { # 15min
  eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 1358 600 --sample-name 720p_1_2
  # 2:1
  eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 600 1358 --sample-name 720p_2_1
+
+  # add motion score
+  eval $CMD --ckpt-path $CKPT --save-dir $OUTPUT --num-frames $NUM_FRAMES --resolution 720p --sample-name motion --prompt \
+    \"A stylish woman walking in the street of Tokyo.\"\
+    \"A stylish woman walking in the street of Tokyo. motion score: 0.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 2.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 4.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 6.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 10.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 20.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 30.0\" \
+    \"A stylish woman walking in the street of Tokyo. motion score: 40.0\"
+
+  # add aes score
+  eval $CMD --ckpt-path $CKPT --save-dir $OUTPUT --num-frames $NUM_FRAMES --resolution 720p --sample-name aes --prompt \
+    \"A stylish woman walking in the street of Tokyo.\"\
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 4.0\" \
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 4.5\" \
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 5.0\" \
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 5.5\" \
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 6.0\" \
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 6.5\" \
+    \"A stylish woman walking in the street of Tokyo. aesthetic score: 7.0\"
 }

 function run_video_h() { # 61min
@ -171,9 +196,18 @@ VBENCH_H=240
 VBENCH_W=426

 function run_vbench() {
-  eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
-  --prompt-path assets/texts/VBench/all_dimension.txt \
-  --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --image-size $VBENCH_H $VBENCH_W --start-index $1 --end-index $2
+  if [ -z ${VBENCH_RES} ] || [ -z ${VBENCH_ASP_RATIO} ]  ;
+      then
+        eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
+        --prompt-path assets/texts/VBench/all_dimension.txt \
+        --image-size $VBENCH_H $VBENCH_W \
+        --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
+      else
+        eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench --prompt-as-path --num-sample 5 \
+        --prompt-path assets/texts/VBench/all_dimension.txt \
+        --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \
+        --batch-size $VBENCH_BS --num-frames $NUM_FRAMES --start-index $1 --end-index $2
+  fi
 }

 # vbench-i2v has 1120 samples
@ -181,11 +215,22 @@ function run_vbench() {
 VBENCH_I2V_H=256
 VBENCH_I2V_W=256

-function run_vbenck_i2v() {
-  eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
-    --prompt-path assets/texts/VBench/all_i2v.txt \
-    --start-index $1 --end-index $2 \
-    --num-frames $NUM_FRAMES --image-size $VBENCH_I2V_H $VBENCH_I2V_W --batch-size $VBENCH_BS
+function run_vbench_i2v() {
+    if [ -z ${VBENCH_RES} ] || [ -z ${VBENCH_ASP_RATIO} ]  ;
+      then
+        eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
+        --prompt-path assets/texts/VBench/all_i2v.txt \
+        --image-size $VBENCH_I2V_H $VBENCH_I2V_W \
+        --start-index $1 --end-index $2 \
+        --num-frames $NUM_FRAMES  --batch-size $VBENCH_BS
+      else
+        eval $CMD --ckpt-path $CKPT --save-dir ${OUTPUT}_vbench_i2v --prompt-as-path --num-sample 5 \
+        --prompt-path assets/texts/VBench/all_i2v.txt \
+        --resolution $VBENCH_RES --aspect-ratio $VBENCH_ASP_RATIO \
+        --start-index $1 --end-index $2 \
+        --num-frames $NUM_FRAMES --batch-size $VBENCH_BS
+  fi
+
 }

 ### Main
@ -245,7 +290,7 @@ for arg in "$@"; do
      then
        echo "need to set start_index and end_index"
      else
-          run_vbenck_i2v $VBENCH_START_INDEX $VBENCH_END_INDEX
+          run_vbench_i2v $VBENCH_START_INDEX $VBENCH_END_INDEX
    fi
  fi
 done
--- a/eval/vbench/launch.sh
+++ b/eval/vbench/launch.sh
@ -3,7 +3,8 @@
 CKPT=$1
 NUM_FRAMES=$2
 MODEL_NAME=$3
-
+RES=$4
+ASP_RATIO=$5

 if [[ $CKPT == *"ema"* ]]; then
    parentdir=$(dirname $CKPT)
@ -20,5 +21,10 @@ START_INDEX_LIST=(0 120 240 360 480 600 720 840)
 END_INDEX_LIST=(120 240 360 480 600 720 840 2000)

 for i in "${!GPUS[@]}"; do
-    CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
+    if [ -z ${RES} ] || [ -z ${ASP_RATIO} ]  ;
+        then
+            CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
+        else
+            CUDA_VISIBLE_DEVICES=${GPUS[i]} bash eval/sample.sh $CKPT ${NUM_FRAMES} ${MODEL_NAME} -4 ${START_INDEX_LIST[i]} ${END_INDEX_LIST[i]} ${RES} ${ASP_RATIO}>${LOG_BASE}/${TASK_ID_LIST[i]}.log 2>&1 &
+    fi
 done
--- a/opensora/utils/config_utils.py
+++ b/opensora/utils/config_utils.py
@ -51,7 +51,7 @@ def parse_args(training=False):
        parser.add_argument("--fps", default=None, type=int, help="fps")
        parser.add_argument("--image-size", default=None, type=int, nargs=2, help="image size")
        parser.add_argument("--frame-interval", default=None, type=int, help="frame interval")
-        parser.add_argument("--aspect-ratio", default=None, type=float, help="aspect ratio")
+        parser.add_argument("--aspect-ratio", default=None, type=str, help="aspect ratio")

        # hyperparameters
        parser.add_argument("--num-sampling-steps", default=None, type=int, help="sampling steps")