[feat] tools score_to_text

2026-05-21 11:59:01 +02:00 · 2024-06-02 07:49:46 +00:00 · 2024-06-02 07:49:46 +00:00 · 01421b7319
commit 01421b7319
parent 60c265d165
1 changed files with 35 additions and 2 deletions
--- a/tools/datasets/datautil.py
+++ b/tools/datasets/datautil.py
@ -406,6 +406,32 @@ def load_caption(path, ext):
        return ""


+# ======================================================
+# --clean-caption
+# ======================================================
+
+DROP_SCORE_PROB = 0.2
+
+
+def score_to_text(data):
+    text = data["text"]
+    scores = []
+    # aesthetic
+    if "aes" in data:
+        aes = data["aes"]
+        if random.random() > DROP_SCORE_PROB:
+            score_text = f"aesthetic score: {aes:.1f}"
+            scores.append(score_text)
+    if "flow" in data:
+        flow = data["flow"]
+        if random.random() > DROP_SCORE_PROB:
+            score_text = f"motion score: {flow:.1f}"
+            scores.append(score_text)
+    if len(scores) > 0:
+        text = f"{text} [{', '.join(scores)}]"
+    return text
+
+
 # ======================================================
 # read & write
 # ======================================================
@ -542,7 +568,7 @@ def main(args):
        assert "path" in data.columns
        data = data.drop_duplicates(subset=["path"])
    if args.path_subset:
-        data = data[data['path'].str.contains(args.path_subset)]
+        data = data[data["path"].str.contains(args.path_subset)]

    # processing
    if args.relpath is not None:
@ -568,6 +594,8 @@ def main(args):
    if args.count_num_token is not None:
        assert "text" in data.columns
        data["text_len"] = apply(data["text"], lambda x: len(tokenizer(x)["input_ids"]))
+    if args.score_to_text:
+        data["text"] = apply(data, score_to_text, axis=1)

    # sort
    if args.sort is not None:
@ -659,7 +687,9 @@ def parse_args():
    parser.add_argument("--relpath", type=str, default=None, help="modify the path to relative path by root given")
    parser.add_argument("--abspath", type=str, default=None, help="modify the path to absolute path by root given")
    parser.add_argument("--path-to-id", action="store_true", help="add id based on path")
-    parser.add_argument("--path-subset", type=str, default=None, help="extract a subset data containing the given `path-subset` value")
+    parser.add_argument(
+        "--path-subset", type=str, default=None, help="extract a subset data containing the given `path-subset` value"
+    )

    # caption filtering
    parser.add_argument(
@ -682,6 +712,7 @@ def parse_args():
        "--count-num-token", type=str, choices=["t5"], default=None, help="Count the number of tokens in the caption"
    )
    parser.add_argument("--append-text", type=str, default=None, help="append text to the caption")
+    parser.add_argument("--score-to-text", action="store_true", help="convert score to text")

    # score filtering
    parser.add_argument("--fmin", type=int, default=None, help="filter the dataset by minimum number of frames")
@ -755,6 +786,8 @@ def get_output_path(args, input_name):
        name += "_ntoken"
    if args.append_text is not None:
        name += "_appendtext"
+    if args.score_to_text:
+        name += "_score2text"

    # score filtering
    if args.fmin is not None: