mirror of
https://github.com/hpcaitech/Open-Sora.git
synced 2026-04-12 05:46:22 +02:00
parent
c9b81d8fd6
commit
0dfea90a89
26
tools/scene_cut/README.md
Normal file
26
tools/scene_cut/README.md
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
## Scene Detection and Video Splitting
|
||||
|
||||
### Formatting
|
||||
Input meta should be `{prefix}.csv` with column `'videoId'`
|
||||
```
|
||||
python tools/scene_cut/process_meta.py --task append_format --meta_path /mnt/hdd/data/pexels_new/raw/meta/popular_6.csv --split popular_6
|
||||
```
|
||||
Output is `{prefix}_format.csv` (with column `path`) and `{prefix}_intact.csv` (with column `intact` and `path`)
|
||||
|
||||
### Scene Detection
|
||||
Input meta should be `{prefix}_format.csv`
|
||||
```
|
||||
python tools/scene_cut/scene_detect.py --meta_path /mnt/hdd/data/pexels_new/raw/meta/popular_6_format.csv
|
||||
```
|
||||
Output is `{prefix}_format_timestamp.csv`
|
||||
|
||||
### Video Splitting
|
||||
Input meta should be `{prefix}_timestamp.csv`
|
||||
```
|
||||
python tools/scene_cut/main_cut_pandarallel.py \
|
||||
--meta_path /mnt/hdd/data/pexels_new/raw/meta/popular_6_format_timestamp.csv \
|
||||
--out_dir /mnt/hdd/data/pexels_new/scene_cut/data/popular_6
|
||||
```
|
||||
Output is `{out_dir}/{wo_ext}_scene-{sid}.mp4`
|
||||
|
||||
TODO: meta for video clips
|
||||
168
tools/scene_cut/main_cut_multi_thread.py
Normal file
168
tools/scene_cut/main_cut_multi_thread.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
import os
|
||||
import argparse
|
||||
import time
|
||||
import subprocess
|
||||
from tqdm import tqdm
|
||||
|
||||
import pandas as pd
|
||||
from scenedetect import FrameTimecode
|
||||
from imageio_ffmpeg import get_ffmpeg_exe
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from mmengine.logging import MMLogger, print_log
|
||||
from utils_video import is_intact_video, iterate_files, clone_folder_structure
|
||||
|
||||
|
||||
def single_process(row, save_dir, logger=None):
|
||||
# video_id = row['videoID']
|
||||
# video_path = os.path.join(root_src, f'{video_id}.mp4')
|
||||
video_path = row['path']
|
||||
|
||||
# check mp4 integrity
|
||||
# if not is_intact_video(video_path, logger=logger):
|
||||
# return False
|
||||
|
||||
timestamp = row['timestamp']
|
||||
if not (timestamp.startswith('[') and timestamp.endswith(']')):
|
||||
return False
|
||||
scene_list = eval(timestamp)
|
||||
scene_list = [
|
||||
(FrameTimecode(s, fps=1), FrameTimecode(t, fps=1))
|
||||
for s, t in scene_list
|
||||
]
|
||||
split_video(video_path, scene_list, save_dir=save_dir, logger=logger)
|
||||
return True
|
||||
|
||||
|
||||
def split_video(
|
||||
video_path,
|
||||
scene_list,
|
||||
save_dir,
|
||||
min_seconds=None,
|
||||
max_seconds=None,
|
||||
target_fps=30,
|
||||
shorter_size=512,
|
||||
verbose=False,
|
||||
logger=None,
|
||||
):
|
||||
"""
|
||||
scenes shorter than min_seconds will be ignored;
|
||||
scenes longer than max_seconds will be cut to save the beginning max_seconds.
|
||||
Currently, the saved file name pattern is f'{fname}_scene-{idx}'.mp4
|
||||
|
||||
Args:
|
||||
scene_list (List[Tuple[FrameTimecode, FrameTimecode]]): each element is (s, t): start and end of a scene.
|
||||
min_seconds (float | None)
|
||||
max_seconds (float | None)
|
||||
target_fps (int | None)
|
||||
shorter_size (int | None)
|
||||
"""
|
||||
FFMPEG_PATH = get_ffmpeg_exe()
|
||||
|
||||
save_path_list = []
|
||||
for idx, scene in enumerate(scene_list):
|
||||
s, t = scene # FrameTimecode
|
||||
if min_seconds is not None:
|
||||
if (t - s).get_seconds() < min_seconds:
|
||||
continue
|
||||
|
||||
duration = t - s
|
||||
if max_seconds is not None:
|
||||
fps = s.framerate
|
||||
max_duration = FrameTimecode(timecode="00:00:00", fps=fps)
|
||||
max_duration.frame_num = round(fps * max_seconds)
|
||||
duration = min(max_duration, duration)
|
||||
|
||||
# save path
|
||||
fname = os.path.basename(video_path)
|
||||
fname_wo_ext = os.path.splitext(fname)[0]
|
||||
# TODO: fname pattern
|
||||
save_path = os.path.join(save_dir, f"{fname_wo_ext}_scene-{idx}.mp4")
|
||||
|
||||
# ffmpeg cmd
|
||||
cmd = [FFMPEG_PATH]
|
||||
|
||||
# Only show ffmpeg output for the first call, which will display any
|
||||
# errors if it fails, and then break the loop. We only show error messages
|
||||
# for the remaining calls.
|
||||
# cmd += ['-v', 'error']
|
||||
|
||||
# input path
|
||||
# cmd += ["-i", video_path]
|
||||
|
||||
# clip to cut
|
||||
cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-i", video_path, "-t", str(duration.get_seconds())]
|
||||
# cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-t", str(duration.get_seconds())]
|
||||
|
||||
# target fps
|
||||
# cmd += ['-vf', 'select=mod(n\,2)']
|
||||
if target_fps is not None:
|
||||
cmd += ["-r", f"{target_fps}"]
|
||||
|
||||
# aspect ratio
|
||||
if shorter_size is not None:
|
||||
cmd += ["-vf", f"scale='if(gt(iw,ih),-2,{shorter_size})':'if(gt(iw,ih),{shorter_size},-2)'"]
|
||||
# cmd += ['-vf', f"scale='if(gt(iw,ih),{shorter_size},trunc(ow/a/2)*2)':-2"]
|
||||
|
||||
cmd += ["-map", "0", save_path]
|
||||
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
stdout, stderr = proc.communicate()
|
||||
if verbose:
|
||||
stdout = stdout.decode("utf-8")
|
||||
print_log(stdout, logger=logger)
|
||||
|
||||
save_path_list.append(video_path)
|
||||
print_log(f"Video clip saved to '{save_path}'", logger=logger)
|
||||
|
||||
return save_path_list
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--root', default='F:/Panda-70M/')
|
||||
parser.add_argument('--split', default='test')
|
||||
parser.add_argument('--num_workers', default=5, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
# args = parse_args()
|
||||
# root = args.root
|
||||
# split = args.split
|
||||
|
||||
root = 'F:/Panda-70M/'
|
||||
root, split = 'F:/pexels_new/', 'popular_2'
|
||||
meta_path = os.path.join(root, f'raw/meta/{split}_format_timestamp.csv')
|
||||
root_dst = os.path.join(root, f'scene_cut/data/{split}')
|
||||
|
||||
folder_dst = root_dst
|
||||
# folder_src = os.path.join(root_src, f'data/{split}')
|
||||
# folder_dst = os.path.join(root_dst, os.path.relpath(folder_src, root_src))
|
||||
os.makedirs(folder_dst, exist_ok=True)
|
||||
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
# create logger
|
||||
# folder_path_log = os.path.dirname(root_dst)
|
||||
# log_name = os.path.basename(root_dst)
|
||||
# timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))
|
||||
# log_path = os.path.join(folder_path_log, f"{log_name}_{timestamp}.log")
|
||||
# logger = MMLogger.get_instance(log_name, log_file=log_path)
|
||||
logger = None
|
||||
|
||||
tasks = []
|
||||
pool = ThreadPoolExecutor(max_workers=1)
|
||||
for idx, row in meta.iterrows():
|
||||
task = pool.submit(single_process, row, folder_dst, logger)
|
||||
tasks.append(task)
|
||||
|
||||
for task in tqdm(as_completed(tasks), total=len(meta)):
|
||||
task.result()
|
||||
pool.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
163
tools/scene_cut/main_cut_pandarallel.py
Normal file
163
tools/scene_cut/main_cut_pandarallel.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
import os
|
||||
import argparse
|
||||
import time
|
||||
import subprocess
|
||||
from tqdm import tqdm
|
||||
|
||||
import pandas as pd
|
||||
from scenedetect import FrameTimecode
|
||||
from functools import partial
|
||||
from pandarallel import pandarallel
|
||||
from imageio_ffmpeg import get_ffmpeg_exe
|
||||
|
||||
from mmengine.logging import MMLogger, print_log
|
||||
from utils_video import is_intact_video, iterate_files, clone_folder_structure
|
||||
|
||||
|
||||
def process_single_row(row, save_dir, log_name=None):
|
||||
video_path = row['path']
|
||||
|
||||
logger = None
|
||||
if log_name is not None:
|
||||
logger = MMLogger.get_instance(log_name)
|
||||
|
||||
# check mp4 integrity
|
||||
# if not is_intact_video(video_path, logger=logger):
|
||||
# return False
|
||||
|
||||
timestamp = row['timestamp']
|
||||
if not (timestamp.startswith('[') and timestamp.endswith(']')):
|
||||
return False
|
||||
scene_list = eval(timestamp)
|
||||
scene_list = [
|
||||
(FrameTimecode(s, fps=1), FrameTimecode(t, fps=1))
|
||||
for s, t in scene_list
|
||||
]
|
||||
split_video(video_path, scene_list, save_dir=save_dir,
|
||||
min_seconds=2, max_seconds=15, shorter_size=720,
|
||||
logger=logger)
|
||||
|
||||
|
||||
def split_video(
|
||||
video_path,
|
||||
scene_list,
|
||||
save_dir,
|
||||
min_seconds=None,
|
||||
max_seconds=None,
|
||||
target_fps=30,
|
||||
shorter_size=512,
|
||||
verbose=False,
|
||||
logger=None,
|
||||
):
|
||||
"""
|
||||
scenes shorter than min_seconds will be ignored;
|
||||
scenes longer than max_seconds will be cut to save the beginning max_seconds.
|
||||
Currently, the saved file name pattern is f'{fname}_scene-{idx}'.mp4
|
||||
|
||||
Args:
|
||||
scene_list (List[Tuple[FrameTimecode, FrameTimecode]]): each element is (s, t): start and end of a scene.
|
||||
min_seconds (float | None)
|
||||
max_seconds (float | None)
|
||||
target_fps (int | None)
|
||||
shorter_size (int | None)
|
||||
"""
|
||||
FFMPEG_PATH = get_ffmpeg_exe()
|
||||
|
||||
save_path_list = []
|
||||
for idx, scene in enumerate(scene_list):
|
||||
s, t = scene # FrameTimecode
|
||||
if min_seconds is not None:
|
||||
if (t - s).get_seconds() < min_seconds:
|
||||
continue
|
||||
|
||||
duration = t - s
|
||||
if max_seconds is not None:
|
||||
fps = s.framerate
|
||||
max_duration = FrameTimecode(timecode="00:00:00", fps=fps)
|
||||
max_duration.frame_num = round(fps * max_seconds)
|
||||
duration = min(max_duration, duration)
|
||||
|
||||
# save path
|
||||
fname = os.path.basename(video_path)
|
||||
fname_wo_ext = os.path.splitext(fname)[0]
|
||||
# TODO: fname pattern
|
||||
save_path = os.path.join(save_dir, f"{fname_wo_ext}_scene-{idx}.mp4")
|
||||
|
||||
# ffmpeg cmd
|
||||
cmd = [FFMPEG_PATH]
|
||||
|
||||
# Only show ffmpeg output for the first call, which will display any
|
||||
# errors if it fails, and then break the loop. We only show error messages
|
||||
# for the remaining calls.
|
||||
# cmd += ['-v', 'error']
|
||||
|
||||
# -ss after -i is very slow; put -ss before -i
|
||||
# input path
|
||||
# cmd += ["-i", video_path]
|
||||
|
||||
# clip to cut
|
||||
# cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-t", str(duration.get_seconds())]
|
||||
|
||||
# clip to cut
|
||||
cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-i", video_path, "-t", str(duration.get_seconds())]
|
||||
|
||||
# target fps
|
||||
# cmd += ['-vf', 'select=mod(n\,2)']
|
||||
if target_fps is not None:
|
||||
cmd += ["-r", f"{target_fps}"]
|
||||
|
||||
# aspect ratio
|
||||
if shorter_size is not None:
|
||||
cmd += ["-vf", f"scale='if(gt(iw,ih),-2,{shorter_size})':'if(gt(iw,ih),{shorter_size},-2)'"]
|
||||
# cmd += ['-vf', f"scale='if(gt(iw,ih),{shorter_size},trunc(ow/a/2)*2)':-2"]
|
||||
|
||||
cmd += ["-map", "0", save_path]
|
||||
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
stdout, stderr = proc.communicate()
|
||||
# stdout = stdout.decode("utf-8")
|
||||
# print_log(stdout, logger=logger)
|
||||
|
||||
save_path_list.append(video_path)
|
||||
if verbose:
|
||||
print_log(f"Video clip saved to '{save_path}'", logger=logger)
|
||||
|
||||
return save_path_list
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--meta_path', default='./data/pexels_new/raw/meta/popular_5_format_timestamp.csv')
|
||||
parser.add_argument('--out_dir', default='./data/pexels_new/scene_cut/data/popular_5')
|
||||
parser.add_argument('--num_workers', default=5, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
meta_path = args.meta_path
|
||||
out_dir = args.out_dir
|
||||
|
||||
assert os.path.basename(os.path.dirname(out_dir)) == 'data'
|
||||
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
# create logger
|
||||
log_dir = os.path.dirname(out_dir)
|
||||
log_name = os.path.basename(out_dir)
|
||||
timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))
|
||||
log_path = os.path.join(log_dir, f"{log_name}_{timestamp}.log")
|
||||
logger = MMLogger.get_instance(log_name, log_file=log_path)
|
||||
# logger = None
|
||||
|
||||
pandarallel.initialize(progress_bar=True)
|
||||
process_single_row_partial = partial(process_single_row, save_dir=out_dir, log_name=log_name)
|
||||
meta.parallel_apply(process_single_row_partial, axis=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
266
tools/scene_cut/process_meta.py
Normal file
266
tools/scene_cut/process_meta.py
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
"""
|
||||
1. format_raw_meta()
|
||||
- only keep intact videos
|
||||
- add 'path' column (abs path)
|
||||
2. create_meta_for_folder()
|
||||
"""
|
||||
|
||||
import os
|
||||
# os.chdir('../..')
|
||||
print(f'Current working directory: {os.getcwd()}')
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import pickle as pkl
|
||||
from pandarallel import pandarallel
|
||||
from functools import partial
|
||||
import numpy as np
|
||||
|
||||
from utils_video import is_intact_video
|
||||
|
||||
|
||||
def has_downloaded_success(json_path):
|
||||
if not os.path.exists(json_path):
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
if 'success' not in data or isinstance(data['success'], bool) is False or data['success'] is False:
|
||||
return False
|
||||
except Exception as e:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def split_meta_csv(chunk_size=60000):
|
||||
"""
|
||||
Split csv into multiple small csv in order
|
||||
"""
|
||||
root = './data/Panda-70M'
|
||||
# meta_name = 'meta/panda70m_training_full.csv'
|
||||
meta_name = 'meta/panda70m_training_10m.csv'
|
||||
# meta_name = 'meta/training_10m/train_0.csv'
|
||||
meta_path = os.path.join(root, meta_name)
|
||||
|
||||
df = pd.read_csv(meta_path)
|
||||
num_rows = len(df)
|
||||
|
||||
# Split the DataFrame into smaller DataFrames
|
||||
for idx, i in enumerate(range(0, num_rows, chunk_size)):
|
||||
df_i = df.iloc[i:i + chunk_size]
|
||||
out_path = os.path.join(root, f'meta/train_{idx}.csv')
|
||||
df_i.to_csv(out_path, index=False)
|
||||
|
||||
# If there are remaining rows
|
||||
if num_rows > chunk_size and num_rows % chunk_size != 0:
|
||||
df_last = df.iloc[-(num_rows % chunk_size):]
|
||||
out_path = os.path.join(root, f'meta/train_{idx + 1}.csv')
|
||||
df_last.to_csv(out_path, index=False)
|
||||
|
||||
|
||||
def remove_index():
|
||||
df = pd.read_csv('your_file.csv', index_col=0)
|
||||
df.to_csv('your_file_without_index.csv', index=False)
|
||||
|
||||
|
||||
def append_format(meta_path, mode='.mp4'):
|
||||
"""
|
||||
Append _format to csv file:
|
||||
- filter broken videos; only intact videos are kept
|
||||
- add column 'path'
|
||||
|
||||
input csv should satisfy:
|
||||
- name should be: {split}.csv
|
||||
- contain column 'videoID'/'videoId'
|
||||
"""
|
||||
# meta_path = os.path.join(root, f'raw/meta/{split}.csv')
|
||||
meta_dirname = os.path.dirname(meta_path)
|
||||
assert meta_dirname.endswith('raw/meta')
|
||||
root_raw = os.path.dirname(meta_dirname)
|
||||
|
||||
meta_fname = os.path.basename(meta_path)
|
||||
split, ext = os.path.splitext(meta_fname)
|
||||
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
path_list = []
|
||||
new_meta = []
|
||||
for idx, row in tqdm(meta.iterrows(), total=len(meta)):
|
||||
# video_id = row['videoID'] # panda
|
||||
video_id = row['videoId'] # pexels_new
|
||||
video_path = os.path.join(root_raw, f'data/{split}/{video_id}.mp4')
|
||||
if mode == '.mp4':
|
||||
if not is_intact_video(video_path):
|
||||
continue
|
||||
elif mode == '.json':
|
||||
json_path = os.path.join(root_raw, f'data/{split}/{video_id}.json')
|
||||
if not has_downloaded_success(json_path):
|
||||
continue
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
new_meta.append(row)
|
||||
path_list.append(video_path)
|
||||
|
||||
new_meta = pd.DataFrame(new_meta)
|
||||
new_meta['path'] = path_list
|
||||
|
||||
out_path = os.path.join(root_raw, f'meta/{split}_format.csv')
|
||||
new_meta.to_csv(out_path, index=False)
|
||||
print(f'New meta (shape={new_meta.shape}) saved to \'{out_path}\'')
|
||||
|
||||
|
||||
def append_format_pandarallel(meta_path, split, mode='.mp4'):
|
||||
"""
|
||||
Append _format to csv file:
|
||||
- filter broken videos; only intact videos are kept
|
||||
- add column 'path'
|
||||
|
||||
input csv should satisfy:
|
||||
- name should be: {split}.csv
|
||||
- contain column 'videoID'/'videoId'
|
||||
"""
|
||||
# meta_path = os.path.join(root, f'raw/meta/{split}.csv')
|
||||
meta_dirname = os.path.dirname(meta_path)
|
||||
assert meta_dirname.endswith('raw/meta')
|
||||
root_raw = os.path.dirname(meta_dirname)
|
||||
|
||||
meta_fname = os.path.basename(meta_path)
|
||||
wo_ext, ext = os.path.splitext(meta_fname)
|
||||
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
def is_intact(row, mode='.json'):
|
||||
video_id = row['videoId'] # pexels_new
|
||||
video_path = os.path.join(root_raw, f'data/{split}/{video_id}.mp4')
|
||||
row['path'] = video_path
|
||||
if mode == '.mp4':
|
||||
if is_intact_video(video_path):
|
||||
return True, video_path
|
||||
return False, video_path
|
||||
elif mode == '.json':
|
||||
json_path = os.path.join(root_raw, f'data/{split}/{video_id}.json')
|
||||
if has_downloaded_success(json_path):
|
||||
return True, video_path
|
||||
return False, video_path
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
pandarallel.initialize(progress_bar=True)
|
||||
is_intact_partial = partial(is_intact, mode=mode)
|
||||
ret = meta.parallel_apply(is_intact_partial, axis=1)
|
||||
|
||||
intact, paths = list(zip(*ret))
|
||||
|
||||
meta['intact'] = intact
|
||||
meta['path'] = paths
|
||||
out_path = os.path.join(root_raw, f'meta/{wo_ext}_intact.csv')
|
||||
meta.to_csv(out_path, index=False)
|
||||
print(f'New meta (shape={meta.shape}) with intact info saved to \'{out_path}\'')
|
||||
|
||||
# meta_format = meta[meta['intact']]
|
||||
meta_format = meta[np.array(intact)]
|
||||
meta_format.drop('intact', axis=1, inplace=True)
|
||||
out_path = os.path.join(root_raw, f'meta/{wo_ext}_format.csv')
|
||||
meta_format.to_csv(out_path, index=False)
|
||||
print(f'New meta (shape={meta_format.shape}) with format info saved to \'{out_path}\'')
|
||||
|
||||
|
||||
def create_subset(meta_path):
|
||||
meta = pd.read_csv(meta_path)
|
||||
meta_subset = meta.iloc[:100]
|
||||
|
||||
wo_ext, ext = os.path.splitext(meta_path)
|
||||
out_path = f'{wo_ext}_head-100{ext}'
|
||||
meta_subset.to_csv(out_path, index=False)
|
||||
print(f'New meta (shape={meta_subset.shape}) saved to \'{out_path}\'')
|
||||
|
||||
|
||||
def append_cut(root='./data/Panda-70M'):
|
||||
"""
|
||||
Append _cut to csv file
|
||||
input csv should satisfy:
|
||||
- name_should be {split}_intact.csv
|
||||
- contain column 'timestamp': list of timestamp
|
||||
"""
|
||||
split = 'test'
|
||||
meta_path = os.path.join(root, f'processed/meta/{split}_intact.csv')
|
||||
|
||||
wo_ext, ext = os.path.splitext(meta_path)
|
||||
suffix = 'cut'
|
||||
out_path = f'{wo_ext}_{suffix}{ext}'
|
||||
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
new_meta = []
|
||||
for idx, row in tqdm(meta.iterrows(), total=len(meta)):
|
||||
video_id = row['videoID']
|
||||
timestamps = eval(row['timestamp'])
|
||||
captions = eval(row['caption'])
|
||||
scores = eval(row['matching_score'])
|
||||
|
||||
num_clips = len(timestamps)
|
||||
for idx_c in range(num_clips):
|
||||
path_i = os.path.join(root, f'processed/{split}/{video_id}_scene-{idx_c}.mp4')
|
||||
# if not is_intact_video(path_i):
|
||||
# continue
|
||||
|
||||
row_i = [f'{video_id}_scene-{idx_c}', path_i, timestamps[idx_c], captions[idx_c], scores[idx_c]]
|
||||
|
||||
new_meta.append(row_i)
|
||||
|
||||
columns = ['videoID', 'path', 'timestamp', 'text', 'match_official']
|
||||
new_meta = pd.DataFrame(new_meta, columns=columns)
|
||||
|
||||
new_meta.to_csv(out_path, index=False)
|
||||
print(f'New meta (shape={new_meta.shape}) saved to \'{out_path}\'')
|
||||
|
||||
|
||||
def debug_meta_topk():
|
||||
meta_path = 'F:/Panda-70M/meta/test_intact_cut_flow.csv'
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
score_column = 'flow_score'
|
||||
topk = meta.nlargest(10, columns=score_column)
|
||||
topk_s = meta.nsmallest(200, columns=score_column)
|
||||
|
||||
topk_list = [(row['path'], row['caption'], row[score_column]) for idx, row in topk.iterrows()]
|
||||
topk_s_list = [(row['path'], row['caption'], row[score_column]) for idx, row in topk_s.iterrows()]
|
||||
|
||||
x = 0
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--task', default='append_format')
|
||||
parser.add_argument('--meta_path', default='./data/pexels_new/raw/meta/popular_1.csv')
|
||||
parser.add_argument('--split', default='popular_5')
|
||||
parser.add_argument('--num_workers', default=5, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# split_meta_csv()
|
||||
|
||||
args = parse_args()
|
||||
meta_path = args.meta_path
|
||||
task = args.task
|
||||
|
||||
if task == 'append_format':
|
||||
# append_format(meta_path=meta_path, mode='.mp4')
|
||||
append_format_pandarallel(meta_path=meta_path, split=args.split, mode='.json')
|
||||
elif task == 'create_subset':
|
||||
create_subset(meta_path=meta_path)
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
# append_cut(root=root)
|
||||
# append_score(root=root)
|
||||
# debug_meta_topk()
|
||||
89
tools/scene_cut/scene_detect.py
Normal file
89
tools/scene_cut/scene_detect.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
import argparse
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
from functools import partial
|
||||
from pandarallel import pandarallel
|
||||
from scenedetect import detect, ContentDetector, AdaptiveDetector, FrameTimecode
|
||||
|
||||
|
||||
def process_single_row(row):
|
||||
# windows
|
||||
# from scenedetect import detect, ContentDetector, AdaptiveDetector
|
||||
|
||||
video_path = row['path']
|
||||
|
||||
detector = AdaptiveDetector(
|
||||
adaptive_threshold=3.0,
|
||||
# luma_only=True,
|
||||
)
|
||||
# detector = ContentDetector()
|
||||
# TODO: catch error here
|
||||
try:
|
||||
scene_list = detect(video_path, detector, start_in_scene=True)
|
||||
timestamp = [(s.get_timecode(), t.get_timecode()) for s, t in scene_list]
|
||||
return True, str(timestamp)
|
||||
except Exception as e:
|
||||
print(f'Video \'{video_path}\' with error {e}')
|
||||
return False, ''
|
||||
|
||||
|
||||
def main():
|
||||
meta_path = 'F:/pexels_new/raw/meta/popular_1_format.csv'
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
timestamp_list = []
|
||||
for idx, row in tqdm(meta.iterrows()):
|
||||
video_path = row['path']
|
||||
|
||||
detector = AdaptiveDetector(
|
||||
adaptive_threshold=1.5,
|
||||
luma_only=True,
|
||||
)
|
||||
# detector = ContentDetector()
|
||||
scene_list = detect(video_path, detector, start_in_scene=True)
|
||||
|
||||
timestamp = [(s.get_timecode(), t.get_timecode()) for s, t in scene_list]
|
||||
timestamp_list.append(timestamp)
|
||||
|
||||
meta['timestamp'] = timestamp_list
|
||||
|
||||
wo_ext, ext = os.path.splitext(meta_path)
|
||||
out_path = f"{wo_ext}_timestamp{ext}"
|
||||
meta.to_csv(out_path, index=False)
|
||||
print(f"New meta with timestamp saved to '{out_path}'.")
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--meta_path', default='F:/pexels_new/raw/meta/popular_1_format.csv')
|
||||
parser.add_argument('--num_workers', default=5, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main_pandarallel():
|
||||
args = parse_args()
|
||||
meta_path = args.meta_path
|
||||
|
||||
# meta_path = 'F:/pexels_new/raw/meta/popular_1_format.csv'
|
||||
meta = pd.read_csv(meta_path)
|
||||
|
||||
pandarallel.initialize(progress_bar=True)
|
||||
ret = meta.parallel_apply(process_single_row, axis=1)
|
||||
|
||||
succ, timestamps = list(zip(*ret))
|
||||
|
||||
meta['timestamp'] = timestamps
|
||||
meta = meta[np.array(succ)]
|
||||
|
||||
wo_ext, ext = os.path.splitext(meta_path)
|
||||
out_path = f"{wo_ext}_timestamp{ext}"
|
||||
meta.to_csv(out_path, index=False)
|
||||
print(f"New meta (shape={meta.shape}) with timestamp saved to '{out_path}'.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main_pandarallel()
|
||||
97
tools/scene_cut/utils_video.py
Normal file
97
tools/scene_cut/utils_video.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import os
|
||||
import cv2
|
||||
from mmengine.logging import print_log
|
||||
from moviepy.editor import VideoFileClip
|
||||
|
||||
|
||||
def iterate_files(folder_path):
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
# root contains the current directory path
|
||||
# dirs contains the list of subdirectories in the current directory
|
||||
# files contains the list of files in the current directory
|
||||
|
||||
# Process files in the current directory
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
# print("File:", file_path)
|
||||
yield file_path
|
||||
|
||||
# Process subdirectories and recursively call the function
|
||||
for subdir in dirs:
|
||||
subdir_path = os.path.join(root, subdir)
|
||||
# print("Subdirectory:", subdir_path)
|
||||
iterate_files(subdir_path)
|
||||
|
||||
|
||||
def iterate_folders(folder_path):
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
for subdir in dirs:
|
||||
subdir_path = os.path.join(root, subdir)
|
||||
yield subdir_path
|
||||
# print("Subdirectory:", subdir_path)
|
||||
iterate_folders(subdir_path)
|
||||
|
||||
|
||||
def clone_folder_structure(root_src, root_dst, verbose=False):
|
||||
src_path_list = iterate_folders(root_src)
|
||||
src_relpath_list = [os.path.relpath(x, root_src) for x in src_path_list]
|
||||
|
||||
os.makedirs(root_dst, exist_ok=True)
|
||||
dst_path_list = [os.path.join(root_dst, x) for x in src_relpath_list]
|
||||
for folder_path in dst_path_list:
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
if verbose:
|
||||
print(f"Create folder: '{folder_path}'")
|
||||
|
||||
|
||||
def is_intact_video(video_path, mode='moviepy', verbose=False, logger=None):
|
||||
if not os.path.exists(video_path):
|
||||
if verbose:
|
||||
print_log(f"Could not find '{video_path}'", logger=logger)
|
||||
return False
|
||||
|
||||
if mode == 'moviepy':
|
||||
try:
|
||||
VideoFileClip(video_path)
|
||||
if verbose:
|
||||
print_log(f"The video file '{video_path}' is intact.", logger=logger)
|
||||
return True
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print_log(f"Error: {e}", logger=logger)
|
||||
print_log(f"The video file '{video_path}' is not intact.", logger=logger)
|
||||
return False
|
||||
elif mode == 'cv2':
|
||||
try:
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if cap.isOpened():
|
||||
if verbose:
|
||||
print_log(f"The video file '{video_path}' is intact.", logger=logger)
|
||||
return True
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print_log(f"Error: {e}", logger=logger)
|
||||
print_log(f"The video file '{video_path}' is not intact.", logger=logger)
|
||||
return False
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
|
||||
def count_frames(video_path, logger=None):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
if not cap.isOpened():
|
||||
print_log(f"Error: Could not open video file '{video_path}'", logger=logger)
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
print_log(f"Total frames in the video '{video_path}': {total_frames}", logger=logger)
|
||||
|
||||
cap.release()
|
||||
|
||||
|
||||
def count_files(root, suffix=".mp4"):
|
||||
files_list = iterate_files(root)
|
||||
cnt = len([x for x in files_list if x.endswith(suffix)])
|
||||
return cnt
|
||||
|
||||
|
|
@ -8,6 +8,10 @@ import pandas as pd
|
|||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torchvision.datasets.folder import pil_loader
|
||||
<<<<<<< HEAD
|
||||
|
||||
=======
|
||||
>>>>>>> dev/v1.0.1
|
||||
from tqdm import tqdm
|
||||
|
||||
IMG_EXTENSIONS = (
|
||||
|
|
@ -24,6 +28,15 @@ IMG_EXTENSIONS = (
|
|||
VID_EXTENSIONS = (".mp4", ".avi", ".mov", ".mkv")
|
||||
|
||||
|
||||
def is_video(filename):
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return ext in VID_EXTENSIONS
|
||||
|
||||
|
||||
IMG_EXTENSIONS = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp")
|
||||
VID_EXTENSIONS = (".mp4", ".avi", ".mov", ".mkv")
|
||||
|
||||
|
||||
def is_video(filename):
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return ext in VID_EXTENSIONS
|
||||
|
|
@ -52,13 +65,12 @@ class VideoTextDataset(torch.utils.data.Dataset):
|
|||
|
||||
def __getitem__(self, index):
|
||||
row = self.meta.iloc[index]
|
||||
path = row["path"]
|
||||
path = row['path']
|
||||
|
||||
if is_video(path):
|
||||
img = extract_frames(path, points=[0.5])[0]
|
||||
else:
|
||||
img = pil_loader(path)
|
||||
|
||||
img = self.transform(img)
|
||||
|
||||
text = row["text"]
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
# Scene Detection and Video Split
|
||||
|
||||
Raw videos from the Internet may be too long for training.
|
||||
Thus, we detect scenes in raw videos and split them into short clips based on the scenes.
|
||||
First prepare the video processing packages.
|
||||
|
||||
```bash
|
||||
pip install scenedetect moviepy opencv-python
|
||||
```
|
||||
|
||||
Then run `scene_detect.py`. We provide efficient processing using `multiprocessing`. Don't forget to specify your own dataset path.
|
||||
|
|
@ -1,138 +0,0 @@
|
|||
import os
|
||||
from multiprocessing import Pool
|
||||
|
||||
from mmengine.logging import MMLogger
|
||||
from scenedetect import ContentDetector, detect
|
||||
from tqdm import tqdm
|
||||
|
||||
from opensora.utils.misc import get_timestamp
|
||||
|
||||
from .utils import check_mp4_integrity, clone_folder_structure, iterate_files, split_video
|
||||
|
||||
# config
|
||||
target_fps = 30 # int
|
||||
shorter_size = 512 # int
|
||||
min_seconds = 1 # float
|
||||
max_seconds = 5 # float
|
||||
assert max_seconds > min_seconds
|
||||
cfg = dict(
|
||||
target_fps=target_fps,
|
||||
min_seconds=min_seconds,
|
||||
max_seconds=max_seconds,
|
||||
shorter_size=shorter_size,
|
||||
)
|
||||
|
||||
|
||||
def process_folder(root_src, root_dst):
|
||||
# create logger
|
||||
folder_path_log = os.path.dirname(root_dst)
|
||||
log_name = os.path.basename(root_dst)
|
||||
timestamp = get_timestamp()
|
||||
log_path = os.path.join(folder_path_log, f"{log_name}_{timestamp}.log")
|
||||
logger = MMLogger.get_instance(log_name, log_file=log_path)
|
||||
|
||||
# clone folder structure
|
||||
clone_folder_structure(root_src, root_dst)
|
||||
|
||||
# all source videos
|
||||
mp4_list = [x for x in iterate_files(root_src) if x.endswith(".mp4")]
|
||||
mp4_list = sorted(mp4_list)
|
||||
|
||||
for idx, sample_path in tqdm(enumerate(mp4_list)):
|
||||
folder_src = os.path.dirname(sample_path)
|
||||
folder_dst = os.path.join(root_dst, os.path.relpath(folder_src, root_src))
|
||||
|
||||
# check src video integrity
|
||||
if not check_mp4_integrity(sample_path, logger=logger):
|
||||
continue
|
||||
|
||||
# detect scenes
|
||||
scene_list = detect(sample_path, ContentDetector(), start_in_scene=True)
|
||||
|
||||
# split scenes
|
||||
save_path_list = split_video(sample_path, scene_list, save_dir=folder_dst, **cfg, logger=logger)
|
||||
|
||||
# check integrity of generated clips
|
||||
for x in save_path_list:
|
||||
check_mp4_integrity(x, logger=logger)
|
||||
|
||||
|
||||
def scene_detect():
|
||||
"""detect & cut scenes using a single process
|
||||
Expected dataset structure:
|
||||
data/
|
||||
your_dataset/
|
||||
raw_videos/
|
||||
xxx.mp4
|
||||
yyy.mp4
|
||||
|
||||
This function results in:
|
||||
data/
|
||||
your_dataset/
|
||||
raw_videos/
|
||||
xxx.mp4
|
||||
yyy.mp4
|
||||
zzz.mp4
|
||||
clips/
|
||||
xxx_scene-0.mp4
|
||||
yyy_scene-0.mp4
|
||||
yyy_scene-1.mp4
|
||||
"""
|
||||
# TODO: specify your dataset root
|
||||
root_src = f"./data/your_dataset/raw_videos"
|
||||
root_dst = f"./data/your_dataset/clips"
|
||||
|
||||
process_folder(root_src, root_dst)
|
||||
|
||||
|
||||
def scene_detect_mp():
|
||||
"""detect & cut scenes using multiple processes
|
||||
Expected dataset structure:
|
||||
data/
|
||||
your_dataset/
|
||||
raw_videos/
|
||||
split_0/
|
||||
xxx.mp4
|
||||
yyy.mp4
|
||||
split_1/
|
||||
xxx.mp4
|
||||
yyy.mp4
|
||||
|
||||
This function results in:
|
||||
data/
|
||||
your_dataset/
|
||||
raw_videos/
|
||||
split_0/
|
||||
xxx.mp4
|
||||
yyy.mp4
|
||||
split_1/
|
||||
xxx.mp4
|
||||
yyy.mp4
|
||||
clips/
|
||||
split_0/
|
||||
xxx_scene-0.mp4
|
||||
yyy_scene-0.mp4
|
||||
split_1/
|
||||
xxx_scene-0.mp4
|
||||
yyy_scene-0.mp4
|
||||
yyy_scene-1.mp4
|
||||
"""
|
||||
# TODO: specify your dataset root
|
||||
root_src = f"./data/your_dataset/raw_videos"
|
||||
root_dst = f"./data/your_dataset/clips"
|
||||
|
||||
# TODO: specify your splits
|
||||
splits = ["split_0", "split_1"]
|
||||
|
||||
# process folders
|
||||
root_src_list = [os.path.join(root_src, x) for x in splits]
|
||||
root_dst_list = [os.path.join(root_dst, x) for x in splits]
|
||||
|
||||
with Pool(processes=len(splits)) as pool:
|
||||
pool.starmap(process_folder, list(zip(root_src_list, root_dst_list)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# TODO: choose single process or multiprocessing
|
||||
scene_detect()
|
||||
# scene_detect_mp()
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
import os
|
||||
import subprocess
|
||||
|
||||
import cv2
|
||||
from imageio_ffmpeg import get_ffmpeg_exe
|
||||
from mmengine.logging import print_log
|
||||
from moviepy.editor import VideoFileClip
|
||||
from scenedetect import FrameTimecode
|
||||
|
||||
|
||||
def iterate_files(folder_path):
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
# root contains the current directory path
|
||||
# dirs contains the list of subdirectories in the current directory
|
||||
# files contains the list of files in the current directory
|
||||
|
||||
# Process files in the current directory
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
# print("File:", file_path)
|
||||
yield file_path
|
||||
|
||||
# Process subdirectories and recursively call the function
|
||||
for subdir in dirs:
|
||||
subdir_path = os.path.join(root, subdir)
|
||||
# print("Subdirectory:", subdir_path)
|
||||
iterate_files(subdir_path)
|
||||
|
||||
|
||||
def iterate_folders(folder_path):
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
for subdir in dirs:
|
||||
subdir_path = os.path.join(root, subdir)
|
||||
yield subdir_path
|
||||
# print("Subdirectory:", subdir_path)
|
||||
iterate_folders(subdir_path)
|
||||
|
||||
|
||||
def clone_folder_structure(root_src, root_dst, verbose=False):
|
||||
src_path_list = iterate_folders(root_src)
|
||||
src_relpath_list = [os.path.relpath(x, root_src) for x in src_path_list]
|
||||
|
||||
os.makedirs(root_dst, exist_ok=True)
|
||||
dst_path_list = [os.path.join(root_dst, x) for x in src_relpath_list]
|
||||
for folder_path in dst_path_list:
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
if verbose:
|
||||
print(f"Create folder: '{folder_path}'")
|
||||
|
||||
|
||||
def count_files(root, suffix=".mp4"):
|
||||
files_list = iterate_files(root)
|
||||
cnt = len([x for x in files_list if x.endswith(suffix)])
|
||||
return cnt
|
||||
|
||||
|
||||
def check_mp4_integrity(file_path, verbose=True, logger=None):
|
||||
try:
|
||||
VideoFileClip(file_path)
|
||||
if verbose:
|
||||
print_log(f"The MP4 file '{file_path}' is intact.", logger=logger)
|
||||
return True
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print_log(f"Error: {e}", logger=logger)
|
||||
print_log(f"The MP4 file '{file_path}' is not intact.", logger=logger)
|
||||
return False
|
||||
|
||||
|
||||
def count_frames(video_path):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Could not open video file '{video_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
print(f"Total frames in the video '{video_path}': {total_frames}")
|
||||
|
||||
cap.release()
|
||||
|
||||
|
||||
def split_video(
|
||||
sample_path,
|
||||
scene_list,
|
||||
save_dir,
|
||||
target_fps=30,
|
||||
min_seconds=1,
|
||||
max_seconds=10,
|
||||
shorter_size=512,
|
||||
verbose=False,
|
||||
logger=None,
|
||||
):
|
||||
FFMPEG_PATH = get_ffmpeg_exe()
|
||||
|
||||
save_path_list = []
|
||||
for idx, scene in enumerate(scene_list):
|
||||
s, t = scene # FrameTimecode
|
||||
fps = s.framerate
|
||||
max_duration = FrameTimecode(timecode="00:00:00", fps=fps)
|
||||
max_duration.frame_num = round(fps * max_seconds)
|
||||
duration = min(max_duration, t - s)
|
||||
if duration.get_frames() < round(min_seconds * fps):
|
||||
continue
|
||||
|
||||
# save path
|
||||
fname = os.path.basename(sample_path)
|
||||
fname_wo_ext = os.path.splitext(fname)[0]
|
||||
# TODO: fname pattern
|
||||
save_path = os.path.join(save_dir, f"{fname_wo_ext}_scene-{idx}.mp4")
|
||||
|
||||
# ffmpeg cmd
|
||||
cmd = [FFMPEG_PATH]
|
||||
|
||||
# Only show ffmpeg output for the first call, which will display any
|
||||
# errors if it fails, and then break the loop. We only show error messages
|
||||
# for the remaining calls.
|
||||
# cmd += ['-v', 'error']
|
||||
|
||||
# input path
|
||||
cmd += ["-i", sample_path]
|
||||
|
||||
# clip to cut
|
||||
cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-t", str(duration.get_seconds())]
|
||||
|
||||
# target fps
|
||||
# cmd += ['-vf', 'select=mod(n\,2)']
|
||||
cmd += ["-r", f"{target_fps}"]
|
||||
|
||||
# aspect ratio
|
||||
cmd += ["-vf", f"scale='if(gt(iw,ih),-2,{shorter_size})':'if(gt(iw,ih),{shorter_size},-2)'"]
|
||||
# cmd += ['-vf', f"scale='if(gt(iw,ih),{shorter_size},trunc(ow/a/2)*2)':-2"]
|
||||
|
||||
cmd += ["-map", "0", save_path]
|
||||
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
stdout, stderr = proc.communicate()
|
||||
if verbose:
|
||||
stdout = stdout.decode("utf-8")
|
||||
print_log(stdout, logger=logger)
|
||||
|
||||
save_path_list.append(sample_path)
|
||||
print_log(f"Video clip saved to '{save_path}'", logger=logger)
|
||||
|
||||
return save_path_list
|
||||
Loading…
Reference in a new issue