mirror of
https://github.com/hpcaitech/Open-Sora.git
synced 2026-05-09 17:36:36 +02:00
automatically calculate scaled scores
This commit is contained in:
parent
b3e62fe989
commit
b8a93d5aba
|
|
@ -42,9 +42,9 @@ First, generate the relevant videos with the following commands:
|
|||
|
||||
```bash
|
||||
# vbench tasks (4a 4b 4c ...)
|
||||
bash eval/sample.sh /path/to/ckpt -4a
|
||||
bash eval/sample.sh /path/to/ckpt num_frames model_name_for_log -4a
|
||||
# launch 8 jobs at once (you must read the script to understand the details)
|
||||
bash eval/vbench/launch.sh /path/to/ckpt
|
||||
bash eval/vbench/launch.sh /path/to/ckpt num_frames model_name
|
||||
```
|
||||
|
||||
After generation, install the VBench package following our [installation](../docs/installation.md)'s sections of "Evaluation Dependencies". Then, run the following commands to evaluate the generated samples.
|
||||
|
|
@ -53,11 +53,31 @@ After generation, install the VBench package following our [installation](../doc
|
|||
bash eval/vbench/vbench.sh /path/to/video_folder
|
||||
```
|
||||
|
||||
Finally, we obtain the scaled scores for the model by:
|
||||
```bash
|
||||
python eval/vbench/tabulate_vbench_scores.py --score_dir path/to/evaluation_results/dir
|
||||
```
|
||||
|
||||
## VBench-i2v
|
||||
|
||||
[VBench-i2v](https://github.com/Vchitect/VBench/tree/master/vbench2_beta_i2v) is a benchmark for short image to video generation (beta version).
|
||||
Similarly, install the VBench package following our [installation](../docs/installation.md)'s sections of "Evaluation Dependencies". Then, run the following commands to evaluate the generated samples.
|
||||
|
||||
TBD
|
||||
```bash
|
||||
# Step 1: generate the relevant videos
|
||||
# vbench i2v tasks (5a 5b 5c ...)
|
||||
bash eval/sample.sh /path/to/ckpt num_frames model_name_for_log -5a
|
||||
# launch 8 jobs at once
|
||||
bash eval/vbench_i2v/launch.sh /path/to/ckpt num_frames model_name
|
||||
|
||||
# Step 2: run vbench to evaluate the generated samples
|
||||
python eval/vbench_i2v/vbench_i2v.py
|
||||
python eval/vbench_i2v/vbench_video_quality.py
|
||||
|
||||
# Step 3: obtain the scaled scores
|
||||
python eval/vbench_i2v/tabulate_vbench_i2v_scores.py --score_dir path/to/evaluation_results/dir
|
||||
|
||||
```
|
||||
|
||||
## VAE
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ set -e
|
|||
|
||||
CKPT=$1
|
||||
MODEL_NAME=$2
|
||||
NUM_FRAMES=51
|
||||
NUM_FRAMES=$3
|
||||
|
||||
if [[ $CKPT == *"ema"* ]]; then
|
||||
parentdir=$(dirname $CKPT)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,68 @@ import json
|
|||
import os
|
||||
from ast import literal_eval
|
||||
|
||||
SEMANTIC_WEIGHT = 1
|
||||
QUALITY_WEIGHT = 4
|
||||
|
||||
QUALITY_LIST = [
|
||||
"subject consistency",
|
||||
"background consistency",
|
||||
"temporal flickering",
|
||||
"motion smoothness",
|
||||
"aesthetic quality",
|
||||
"imaging quality",
|
||||
"dynamic degree",]
|
||||
|
||||
SEMANTIC_LIST = [
|
||||
"object class",
|
||||
"multiple objects",
|
||||
"human action",
|
||||
"color",
|
||||
"spatial relationship",
|
||||
"scene",
|
||||
"appearance style",
|
||||
"temporal style",
|
||||
"overall consistency"
|
||||
]
|
||||
|
||||
NORMALIZE_DIC = {
|
||||
"subject consistency": {"Min": 0.1462, "Max": 1.0},
|
||||
"background consistency": {"Min": 0.2615, "Max": 1.0},
|
||||
"temporal flickering": {"Min": 0.6293, "Max": 1.0},
|
||||
"motion smoothness": {"Min": 0.706, "Max": 0.9975},
|
||||
"dynamic degree": {"Min": 0.0, "Max": 1.0},
|
||||
"aesthetic quality": {"Min": 0.0, "Max": 1.0},
|
||||
"imaging quality": {"Min": 0.0, "Max": 1.0},
|
||||
"object class": {"Min": 0.0, "Max": 1.0},
|
||||
"multiple objects": {"Min": 0.0, "Max": 1.0},
|
||||
"human action": {"Min": 0.0, "Max": 1.0},
|
||||
"color": {"Min": 0.0, "Max": 1.0},
|
||||
"spatial relationship": {"Min": 0.0, "Max": 1.0},
|
||||
"scene": {"Min": 0.0, "Max": 0.8222},
|
||||
"appearance style": {"Min": 0.0009, "Max": 0.2855},
|
||||
"temporal style": {"Min": 0.0, "Max": 0.364},
|
||||
"overall consistency": {"Min": 0.0, "Max": 0.364}
|
||||
}
|
||||
|
||||
DIM_WEIGHT = {
|
||||
"subject consistency":1,
|
||||
"background consistency":1,
|
||||
"temporal flickering":1,
|
||||
"motion smoothness":1,
|
||||
"aesthetic quality":1,
|
||||
"imaging quality":1,
|
||||
"dynamic degree":0.5,
|
||||
"object class":1,
|
||||
"multiple objects":1,
|
||||
"human action":1,
|
||||
"color":1,
|
||||
"spatial relationship":1,
|
||||
"scene":1,
|
||||
"appearance style":1,
|
||||
"temporal style":1,
|
||||
"overall consistency":1
|
||||
}
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--score_dir", type=str) # evaluation_results/samples_...
|
||||
|
|
@ -19,7 +81,6 @@ if __name__ == "__main__":
|
|||
assert len(res_files) == len(info_files), f"got {len(res_files)} res files, but {len(info_files)} info files"
|
||||
|
||||
full_results = {}
|
||||
|
||||
for res_file in res_files:
|
||||
# first check if results is normal
|
||||
info_file = res_file.split(res_postfix)[0] + info_postfix
|
||||
|
|
@ -30,12 +91,37 @@ if __name__ == "__main__":
|
|||
with open(os.path.join(args.score_dir, res_file), "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
for key, val in data.items():
|
||||
full_results[key] = format(val[0]*100, ".2f")
|
||||
full_results[key] = format(val[0], ".4f")
|
||||
|
||||
scaled_results = {}
|
||||
dims = set()
|
||||
for key, val in full_results.items():
|
||||
dim = key.replace("_", " ") if "_" in key else key
|
||||
scaled_score = (float(val) - NORMALIZE_DIC[dim]["Min"]) / (NORMALIZE_DIC[dim]["Max"] - NORMALIZE_DIC[dim]["Min"])
|
||||
scaled_score *= DIM_WEIGHT[dim]
|
||||
scaled_results[dim] = scaled_score
|
||||
dims.add(dim)
|
||||
|
||||
assert len(dims) == len(NORMALIZE_DIC), f"{set(NORMALIZE_DIC.keys())-dims} not calculated yet"
|
||||
|
||||
quality_score = sum([scaled_results[i] for i in QUALITY_LIST]) / sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
|
||||
semantic_score = sum([scaled_results[i] for i in SEMANTIC_LIST]) / sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST])
|
||||
scaled_results["quality score"] = quality_score
|
||||
scaled_results["semantic score"] = semantic_score
|
||||
scaled_results["total score"] = (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
|
||||
|
||||
formated_scaled_results = {}
|
||||
for key,val in scaled_results.items():
|
||||
formated_scaled_results[key] = format(val*100, ".2f")+"%"
|
||||
|
||||
output_file_path = os.path.join(args.score_dir, "all_results.json")
|
||||
with open(output_file_path, "w") as outfile:
|
||||
json.dump(full_results, outfile, indent=4, sort_keys=True)
|
||||
print(f"results saved to: {output_file_path}")
|
||||
|
||||
scaled_file_path = os.path.join(args.score_dir, "scaled_results.json")
|
||||
with open(scaled_file_path, "w") as outfile:
|
||||
json.dump(formated_scaled_results, outfile, indent=4, sort_keys=True)
|
||||
print(f"results saved to: {scaled_file_path}")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ set -x
|
|||
set -e
|
||||
|
||||
CKPT=$1
|
||||
NUM_FRAMES=51
|
||||
MODEL_NAME=$2
|
||||
NUM_FRAMES=$2
|
||||
MODEL_NAME=$3
|
||||
|
||||
if [[ $CKPT == *"ema"* ]]; then
|
||||
parentdir=$(dirname $CKPT)
|
||||
|
|
@ -13,7 +13,6 @@ if [[ $CKPT == *"ema"* ]]; then
|
|||
else
|
||||
CKPT_BASE=$(basename $CKPT)
|
||||
fi
|
||||
# LOG_BASE=logs/sample/${MODEL_NAME}_${CKPT_BASE}
|
||||
LOG_BASE=$(dirname $CKPT)/eval
|
||||
echo "Logging to $LOG_BASE"
|
||||
|
||||
|
|
|
|||
110
eval/vbench_i2v/tabulate_vbench_i2v_scores.py
Normal file
110
eval/vbench_i2v/tabulate_vbench_i2v_scores.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
from ast import literal_eval
|
||||
|
||||
I2V_WEIGHT = 1.0
|
||||
I2V_QUALITY_WEIGHT = 1.0
|
||||
|
||||
I2V_LIST = [
|
||||
"Video-Image Subject Consistency",
|
||||
"Video-Image Background Consistency",
|
||||
]
|
||||
|
||||
I2V_QUALITY_LIST = [
|
||||
"Subject Consistency",
|
||||
"Background Consistency",
|
||||
"Motion Smoothness",
|
||||
"Dynamic Degree",
|
||||
"Aesthetic Quality",
|
||||
"Imaging Quality",
|
||||
"Temporal Flickering"
|
||||
]
|
||||
|
||||
DIM_WEIGHT_I2V = {
|
||||
"Video-Text Camera Motion": 0.1,
|
||||
"Video-Image Subject Consistency": 1,
|
||||
"Video-Image Background Consistency": 1,
|
||||
"Subject Consistency": 1,
|
||||
"Background Consistency": 1,
|
||||
"Motion Smoothness": 1,
|
||||
"Dynamic Degree": 0.5,
|
||||
"Aesthetic Quality": 1,
|
||||
"Imaging Quality": 1,
|
||||
"Temporal Flickering": 1
|
||||
}
|
||||
|
||||
NORMALIZE_DIC_I2V = {
|
||||
"Video-Text Camera Motion" :{"Min": 0.0, "Max":1.0 },
|
||||
"Video-Image Subject Consistency":{"Min": 0.1462, "Max": 1.0},
|
||||
"Video-Image Background Consistency":{"Min": 0.2615, "Max":1.0 },
|
||||
"Subject Consistency":{"Min": 0.1462, "Max": 1.0},
|
||||
"Background Consistency":{"Min": 0.2615, "Max": 1.0 },
|
||||
"Motion Smoothness":{"Min": 0.7060, "Max": 0.9975},
|
||||
"Dynamic Degree":{"Min": 0.0, "Max": 1.0},
|
||||
"Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
|
||||
"Imaging Quality":{"Min": 0.0, "Max": 1.0},
|
||||
"Temporal Flickering":{"Min":0.6293, "Max": 1.0}
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--score_dir", type=str) # evaluation_results/samples_...
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
res_postfix = "_eval_results.json"
|
||||
info_postfix = "_full_info.json"
|
||||
files = os.listdir(args.score_dir)
|
||||
res_files = [x for x in files if res_postfix in x]
|
||||
info_files = [x for x in files if info_postfix in x]
|
||||
assert len(res_files) == len(info_files), f"got {len(res_files)} res files, but {len(info_files)} info files"
|
||||
|
||||
full_results = {}
|
||||
for res_file in res_files:
|
||||
# first check if results is normal
|
||||
info_file = res_file.split(res_postfix)[0] + info_postfix
|
||||
with open(os.path.join(args.score_dir, info_file), "r", encoding="utf-8") as f:
|
||||
info = json.load(f)
|
||||
assert len(info[0]["video_list"]) > 0, f"Error: {info_file} has 0 video list"
|
||||
# read results
|
||||
with open(os.path.join(args.score_dir, res_file), "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
for key, val in data.items():
|
||||
full_results[key] = format(val[0], ".4f")
|
||||
|
||||
scaled_results = {}
|
||||
dims = set()
|
||||
for key, val in full_results.items():
|
||||
dim = key
|
||||
scaled_score = (float(val) - NORMALIZE_DIC_I2V[dim]["Min"]) / (NORMALIZE_DIC_I2V[dim]["Max"] - NORMALIZE_DIC_I2V[dim]["Min"])
|
||||
scaled_score *= DIM_WEIGHT_I2V[dim]
|
||||
scaled_results[dim] = scaled_score
|
||||
dims.add(dim)
|
||||
|
||||
assert len(dims) == len(NORMALIZE_DIC_I2V), f"{set(NORMALIZE_DIC_I2V.keys())-dims} not calculated yet"
|
||||
|
||||
quality_score = sum([scaled_results[i] for i in I2V_QUALITY_LIST]) / sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST])
|
||||
i2v_score = sum([scaled_results[i] for i in I2V_LIST]) / sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST])
|
||||
|
||||
scaled_results["quality score"] = quality_score
|
||||
scaled_results["i2v score"] = i2v_score
|
||||
scaled_results["total score"] = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
|
||||
|
||||
formated_scaled_results = {}
|
||||
for key,val in scaled_results.items():
|
||||
formated_scaled_results[key] = format(val*100, ".2f")+"%"
|
||||
|
||||
output_file_path = os.path.join(args.score_dir, "all_results.json")
|
||||
with open(output_file_path, "w") as outfile:
|
||||
json.dump(full_results, outfile, indent=4, sort_keys=True)
|
||||
print(f"results saved to: {output_file_path}")
|
||||
|
||||
|
||||
scaled_file_path = os.path.join(args.score_dir, "scaled_results.json")
|
||||
with open(scaled_file_path, "w") as outfile:
|
||||
json.dump(formated_scaled_results, outfile, indent=4, sort_keys=True)
|
||||
print(f"results saved to: {scaled_file_path}")
|
||||
|
|
@ -2,7 +2,7 @@ from vbench import VBench
|
|||
|
||||
|
||||
VIDEO_PATH = ""
|
||||
DIMENSIONS = ["subject consistency", "background_consistency", "motion_smoothness", "dynamic_degree", "aesthetic_quality", "imaging_quality"]
|
||||
DIMENSIONS = ["subject_consistency", "background_consistency", "motion_smoothness", "dynamic_degree", "aesthetic_quality", "imaging_quality", "temporal_flickering"]
|
||||
|
||||
my_VBench = VBench("cuda", "vbench2_beta_i2v/vbench2_i2v_full_info.json", "evaluation_results")
|
||||
my_VBench.evaluate(
|
||||
|
|
|
|||
Loading…
Reference in a new issue