automatically calculate scaled scores

This commit is contained in:
Shen-Chenhui 2024-06-05 06:37:23 +00:00
parent b3e62fe989
commit b8a93d5aba
6 changed files with 225 additions and 10 deletions

View file

@ -42,9 +42,9 @@ First, generate the relevant videos with the following commands:
```bash
# vbench tasks (4a 4b 4c ...)
bash eval/sample.sh /path/to/ckpt -4a
bash eval/sample.sh /path/to/ckpt num_frames model_name_for_log -4a
# launch 8 jobs at once (you must read the script to understand the details)
bash eval/vbench/launch.sh /path/to/ckpt
bash eval/vbench/launch.sh /path/to/ckpt num_frames model_name
```
After generation, install the VBench package following our [installation](../docs/installation.md)'s sections of "Evaluation Dependencies". Then, run the following commands to evaluate the generated samples.
@ -53,11 +53,31 @@ After generation, install the VBench package following our [installation](../doc
bash eval/vbench/vbench.sh /path/to/video_folder
```
Finally, we obtain the scaled scores for the model by:
```bash
python eval/vbench/tabulate_vbench_scores.py --score_dir path/to/evaluation_results/dir
```
## VBench-i2v
[VBench-i2v](https://github.com/Vchitect/VBench/tree/master/vbench2_beta_i2v) is a benchmark for short image to video generation (beta version).
Similarly, install the VBench package following our [installation](../docs/installation.md)'s sections of "Evaluation Dependencies". Then, run the following commands to evaluate the generated samples.
TBD
```bash
# Step 1: generate the relevant videos
# vbench i2v tasks (5a 5b 5c ...)
bash eval/sample.sh /path/to/ckpt num_frames model_name_for_log -5a
# launch 8 jobs at once
bash eval/vbench_i2v/launch.sh /path/to/ckpt num_frames model_name
# Step 2: run vbench to evaluate the generated samples
python eval/vbench_i2v/vbench_i2v.py
python eval/vbench_i2v/vbench_video_quality.py
# Step 3: obtain the scaled scores
python eval/vbench_i2v/tabulate_vbench_i2v_scores.py --score_dir path/to/evaluation_results/dir
```
## VAE

View file

@ -5,7 +5,7 @@ set -e
CKPT=$1
MODEL_NAME=$2
NUM_FRAMES=51
NUM_FRAMES=$3
if [[ $CKPT == *"ema"* ]]; then
parentdir=$(dirname $CKPT)

View file

@ -3,6 +3,68 @@ import json
import os
from ast import literal_eval
SEMANTIC_WEIGHT = 1
QUALITY_WEIGHT = 4
QUALITY_LIST = [
"subject consistency",
"background consistency",
"temporal flickering",
"motion smoothness",
"aesthetic quality",
"imaging quality",
"dynamic degree",]
SEMANTIC_LIST = [
"object class",
"multiple objects",
"human action",
"color",
"spatial relationship",
"scene",
"appearance style",
"temporal style",
"overall consistency"
]
NORMALIZE_DIC = {
"subject consistency": {"Min": 0.1462, "Max": 1.0},
"background consistency": {"Min": 0.2615, "Max": 1.0},
"temporal flickering": {"Min": 0.6293, "Max": 1.0},
"motion smoothness": {"Min": 0.706, "Max": 0.9975},
"dynamic degree": {"Min": 0.0, "Max": 1.0},
"aesthetic quality": {"Min": 0.0, "Max": 1.0},
"imaging quality": {"Min": 0.0, "Max": 1.0},
"object class": {"Min": 0.0, "Max": 1.0},
"multiple objects": {"Min": 0.0, "Max": 1.0},
"human action": {"Min": 0.0, "Max": 1.0},
"color": {"Min": 0.0, "Max": 1.0},
"spatial relationship": {"Min": 0.0, "Max": 1.0},
"scene": {"Min": 0.0, "Max": 0.8222},
"appearance style": {"Min": 0.0009, "Max": 0.2855},
"temporal style": {"Min": 0.0, "Max": 0.364},
"overall consistency": {"Min": 0.0, "Max": 0.364}
}
DIM_WEIGHT = {
"subject consistency":1,
"background consistency":1,
"temporal flickering":1,
"motion smoothness":1,
"aesthetic quality":1,
"imaging quality":1,
"dynamic degree":0.5,
"object class":1,
"multiple objects":1,
"human action":1,
"color":1,
"spatial relationship":1,
"scene":1,
"appearance style":1,
"temporal style":1,
"overall consistency":1
}
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--score_dir", type=str) # evaluation_results/samples_...
@ -19,7 +81,6 @@ if __name__ == "__main__":
assert len(res_files) == len(info_files), f"got {len(res_files)} res files, but {len(info_files)} info files"
full_results = {}
for res_file in res_files:
# first check if results is normal
info_file = res_file.split(res_postfix)[0] + info_postfix
@ -30,12 +91,37 @@ if __name__ == "__main__":
with open(os.path.join(args.score_dir, res_file), "r", encoding="utf-8") as f:
data = json.load(f)
for key, val in data.items():
full_results[key] = format(val[0]*100, ".2f")
full_results[key] = format(val[0], ".4f")
scaled_results = {}
dims = set()
for key, val in full_results.items():
dim = key.replace("_", " ") if "_" in key else key
scaled_score = (float(val) - NORMALIZE_DIC[dim]["Min"]) / (NORMALIZE_DIC[dim]["Max"] - NORMALIZE_DIC[dim]["Min"])
scaled_score *= DIM_WEIGHT[dim]
scaled_results[dim] = scaled_score
dims.add(dim)
assert len(dims) == len(NORMALIZE_DIC), f"{set(NORMALIZE_DIC.keys())-dims} not calculated yet"
quality_score = sum([scaled_results[i] for i in QUALITY_LIST]) / sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
semantic_score = sum([scaled_results[i] for i in SEMANTIC_LIST]) / sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST])
scaled_results["quality score"] = quality_score
scaled_results["semantic score"] = semantic_score
scaled_results["total score"] = (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
formated_scaled_results = {}
for key,val in scaled_results.items():
formated_scaled_results[key] = format(val*100, ".2f")+"%"
output_file_path = os.path.join(args.score_dir, "all_results.json")
with open(output_file_path, "w") as outfile:
json.dump(full_results, outfile, indent=4, sort_keys=True)
print(f"results saved to: {output_file_path}")
scaled_file_path = os.path.join(args.score_dir, "scaled_results.json")
with open(scaled_file_path, "w") as outfile:
json.dump(formated_scaled_results, outfile, indent=4, sort_keys=True)
print(f"results saved to: {scaled_file_path}")

View file

@ -4,8 +4,8 @@ set -x
set -e
CKPT=$1
NUM_FRAMES=51
MODEL_NAME=$2
NUM_FRAMES=$2
MODEL_NAME=$3
if [[ $CKPT == *"ema"* ]]; then
parentdir=$(dirname $CKPT)
@ -13,7 +13,6 @@ if [[ $CKPT == *"ema"* ]]; then
else
CKPT_BASE=$(basename $CKPT)
fi
# LOG_BASE=logs/sample/${MODEL_NAME}_${CKPT_BASE}
LOG_BASE=$(dirname $CKPT)/eval
echo "Logging to $LOG_BASE"

View file

@ -0,0 +1,110 @@
import argparse
import json
import os
from ast import literal_eval
I2V_WEIGHT = 1.0
I2V_QUALITY_WEIGHT = 1.0
I2V_LIST = [
"Video-Image Subject Consistency",
"Video-Image Background Consistency",
]
I2V_QUALITY_LIST = [
"Subject Consistency",
"Background Consistency",
"Motion Smoothness",
"Dynamic Degree",
"Aesthetic Quality",
"Imaging Quality",
"Temporal Flickering"
]
DIM_WEIGHT_I2V = {
"Video-Text Camera Motion": 0.1,
"Video-Image Subject Consistency": 1,
"Video-Image Background Consistency": 1,
"Subject Consistency": 1,
"Background Consistency": 1,
"Motion Smoothness": 1,
"Dynamic Degree": 0.5,
"Aesthetic Quality": 1,
"Imaging Quality": 1,
"Temporal Flickering": 1
}
NORMALIZE_DIC_I2V = {
"Video-Text Camera Motion" :{"Min": 0.0, "Max":1.0 },
"Video-Image Subject Consistency":{"Min": 0.1462, "Max": 1.0},
"Video-Image Background Consistency":{"Min": 0.2615, "Max":1.0 },
"Subject Consistency":{"Min": 0.1462, "Max": 1.0},
"Background Consistency":{"Min": 0.2615, "Max": 1.0 },
"Motion Smoothness":{"Min": 0.7060, "Max": 0.9975},
"Dynamic Degree":{"Min": 0.0, "Max": 1.0},
"Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
"Imaging Quality":{"Min": 0.0, "Max": 1.0},
"Temporal Flickering":{"Min":0.6293, "Max": 1.0}
}
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--score_dir", type=str) # evaluation_results/samples_...
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
res_postfix = "_eval_results.json"
info_postfix = "_full_info.json"
files = os.listdir(args.score_dir)
res_files = [x for x in files if res_postfix in x]
info_files = [x for x in files if info_postfix in x]
assert len(res_files) == len(info_files), f"got {len(res_files)} res files, but {len(info_files)} info files"
full_results = {}
for res_file in res_files:
# first check if results is normal
info_file = res_file.split(res_postfix)[0] + info_postfix
with open(os.path.join(args.score_dir, info_file), "r", encoding="utf-8") as f:
info = json.load(f)
assert len(info[0]["video_list"]) > 0, f"Error: {info_file} has 0 video list"
# read results
with open(os.path.join(args.score_dir, res_file), "r", encoding="utf-8") as f:
data = json.load(f)
for key, val in data.items():
full_results[key] = format(val[0], ".4f")
scaled_results = {}
dims = set()
for key, val in full_results.items():
dim = key
scaled_score = (float(val) - NORMALIZE_DIC_I2V[dim]["Min"]) / (NORMALIZE_DIC_I2V[dim]["Max"] - NORMALIZE_DIC_I2V[dim]["Min"])
scaled_score *= DIM_WEIGHT_I2V[dim]
scaled_results[dim] = scaled_score
dims.add(dim)
assert len(dims) == len(NORMALIZE_DIC_I2V), f"{set(NORMALIZE_DIC_I2V.keys())-dims} not calculated yet"
quality_score = sum([scaled_results[i] for i in I2V_QUALITY_LIST]) / sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST])
i2v_score = sum([scaled_results[i] for i in I2V_LIST]) / sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST])
scaled_results["quality score"] = quality_score
scaled_results["i2v score"] = i2v_score
scaled_results["total score"] = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
formated_scaled_results = {}
for key,val in scaled_results.items():
formated_scaled_results[key] = format(val*100, ".2f")+"%"
output_file_path = os.path.join(args.score_dir, "all_results.json")
with open(output_file_path, "w") as outfile:
json.dump(full_results, outfile, indent=4, sort_keys=True)
print(f"results saved to: {output_file_path}")
scaled_file_path = os.path.join(args.score_dir, "scaled_results.json")
with open(scaled_file_path, "w") as outfile:
json.dump(formated_scaled_results, outfile, indent=4, sort_keys=True)
print(f"results saved to: {scaled_file_path}")

View file

@ -2,7 +2,7 @@ from vbench import VBench
VIDEO_PATH = ""
DIMENSIONS = ["subject consistency", "background_consistency", "motion_smoothness", "dynamic_degree", "aesthetic_quality", "imaging_quality"]
DIMENSIONS = ["subject_consistency", "background_consistency", "motion_smoothness", "dynamic_degree", "aesthetic_quality", "imaging_quality", "temporal_flickering"]
my_VBench = VBench("cuda", "vbench2_beta_i2v/vbench2_i2v_full_info.json", "evaluation_results")
my_VBench.evaluate(