update scoring & scene_cut
This commit is contained in:
xyupeng 2024-04-06 16:33:06 +08:00 committed by GitHub
parent c9b81d8fd6
commit 0dfea90a89
12 changed files with 823 additions and 296 deletions

26
tools/scene_cut/README.md Normal file
View file

@ -0,0 +1,26 @@
## Scene Detection and Video Splitting
### Formatting
Input meta should be `{prefix}.csv` with column `'videoId'`
```
python tools/scene_cut/process_meta.py --task append_format --meta_path /mnt/hdd/data/pexels_new/raw/meta/popular_6.csv --split popular_6
```
Output is `{prefix}_format.csv` (with column `path`) and `{prefix}_intact.csv` (with column `intact` and `path`)
### Scene Detection
Input meta should be `{prefix}_format.csv`
```
python tools/scene_cut/scene_detect.py --meta_path /mnt/hdd/data/pexels_new/raw/meta/popular_6_format.csv
```
Output is `{prefix}_format_timestamp.csv`
### Video Splitting
Input meta should be `{prefix}_timestamp.csv`
```
python tools/scene_cut/main_cut_pandarallel.py \
--meta_path /mnt/hdd/data/pexels_new/raw/meta/popular_6_format_timestamp.csv \
--out_dir /mnt/hdd/data/pexels_new/scene_cut/data/popular_6
```
Output is `{out_dir}/{wo_ext}_scene-{sid}.mp4`
TODO: meta for video clips

View file

@ -0,0 +1,168 @@
import os
import argparse
import time
import subprocess
from tqdm import tqdm
import pandas as pd
from scenedetect import FrameTimecode
from imageio_ffmpeg import get_ffmpeg_exe
from concurrent.futures import ThreadPoolExecutor, as_completed
from mmengine.logging import MMLogger, print_log
from utils_video import is_intact_video, iterate_files, clone_folder_structure
def single_process(row, save_dir, logger=None):
# video_id = row['videoID']
# video_path = os.path.join(root_src, f'{video_id}.mp4')
video_path = row['path']
# check mp4 integrity
# if not is_intact_video(video_path, logger=logger):
# return False
timestamp = row['timestamp']
if not (timestamp.startswith('[') and timestamp.endswith(']')):
return False
scene_list = eval(timestamp)
scene_list = [
(FrameTimecode(s, fps=1), FrameTimecode(t, fps=1))
for s, t in scene_list
]
split_video(video_path, scene_list, save_dir=save_dir, logger=logger)
return True
def split_video(
video_path,
scene_list,
save_dir,
min_seconds=None,
max_seconds=None,
target_fps=30,
shorter_size=512,
verbose=False,
logger=None,
):
"""
scenes shorter than min_seconds will be ignored;
scenes longer than max_seconds will be cut to save the beginning max_seconds.
Currently, the saved file name pattern is f'{fname}_scene-{idx}'.mp4
Args:
scene_list (List[Tuple[FrameTimecode, FrameTimecode]]): each element is (s, t): start and end of a scene.
min_seconds (float | None)
max_seconds (float | None)
target_fps (int | None)
shorter_size (int | None)
"""
FFMPEG_PATH = get_ffmpeg_exe()
save_path_list = []
for idx, scene in enumerate(scene_list):
s, t = scene # FrameTimecode
if min_seconds is not None:
if (t - s).get_seconds() < min_seconds:
continue
duration = t - s
if max_seconds is not None:
fps = s.framerate
max_duration = FrameTimecode(timecode="00:00:00", fps=fps)
max_duration.frame_num = round(fps * max_seconds)
duration = min(max_duration, duration)
# save path
fname = os.path.basename(video_path)
fname_wo_ext = os.path.splitext(fname)[0]
# TODO: fname pattern
save_path = os.path.join(save_dir, f"{fname_wo_ext}_scene-{idx}.mp4")
# ffmpeg cmd
cmd = [FFMPEG_PATH]
# Only show ffmpeg output for the first call, which will display any
# errors if it fails, and then break the loop. We only show error messages
# for the remaining calls.
# cmd += ['-v', 'error']
# input path
# cmd += ["-i", video_path]
# clip to cut
cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-i", video_path, "-t", str(duration.get_seconds())]
# cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-t", str(duration.get_seconds())]
# target fps
# cmd += ['-vf', 'select=mod(n\,2)']
if target_fps is not None:
cmd += ["-r", f"{target_fps}"]
# aspect ratio
if shorter_size is not None:
cmd += ["-vf", f"scale='if(gt(iw,ih),-2,{shorter_size})':'if(gt(iw,ih),{shorter_size},-2)'"]
# cmd += ['-vf', f"scale='if(gt(iw,ih),{shorter_size},trunc(ow/a/2)*2)':-2"]
cmd += ["-map", "0", save_path]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = proc.communicate()
if verbose:
stdout = stdout.decode("utf-8")
print_log(stdout, logger=logger)
save_path_list.append(video_path)
print_log(f"Video clip saved to '{save_path}'", logger=logger)
return save_path_list
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--root', default='F:/Panda-70M/')
parser.add_argument('--split', default='test')
parser.add_argument('--num_workers', default=5, type=int)
args = parser.parse_args()
return args
def main():
# args = parse_args()
# root = args.root
# split = args.split
root = 'F:/Panda-70M/'
root, split = 'F:/pexels_new/', 'popular_2'
meta_path = os.path.join(root, f'raw/meta/{split}_format_timestamp.csv')
root_dst = os.path.join(root, f'scene_cut/data/{split}')
folder_dst = root_dst
# folder_src = os.path.join(root_src, f'data/{split}')
# folder_dst = os.path.join(root_dst, os.path.relpath(folder_src, root_src))
os.makedirs(folder_dst, exist_ok=True)
meta = pd.read_csv(meta_path)
# create logger
# folder_path_log = os.path.dirname(root_dst)
# log_name = os.path.basename(root_dst)
# timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))
# log_path = os.path.join(folder_path_log, f"{log_name}_{timestamp}.log")
# logger = MMLogger.get_instance(log_name, log_file=log_path)
logger = None
tasks = []
pool = ThreadPoolExecutor(max_workers=1)
for idx, row in meta.iterrows():
task = pool.submit(single_process, row, folder_dst, logger)
tasks.append(task)
for task in tqdm(as_completed(tasks), total=len(meta)):
task.result()
pool.shutdown()
if __name__ == '__main__':
main()

View file

@ -0,0 +1,163 @@
import os
import argparse
import time
import subprocess
from tqdm import tqdm
import pandas as pd
from scenedetect import FrameTimecode
from functools import partial
from pandarallel import pandarallel
from imageio_ffmpeg import get_ffmpeg_exe
from mmengine.logging import MMLogger, print_log
from utils_video import is_intact_video, iterate_files, clone_folder_structure
def process_single_row(row, save_dir, log_name=None):
video_path = row['path']
logger = None
if log_name is not None:
logger = MMLogger.get_instance(log_name)
# check mp4 integrity
# if not is_intact_video(video_path, logger=logger):
# return False
timestamp = row['timestamp']
if not (timestamp.startswith('[') and timestamp.endswith(']')):
return False
scene_list = eval(timestamp)
scene_list = [
(FrameTimecode(s, fps=1), FrameTimecode(t, fps=1))
for s, t in scene_list
]
split_video(video_path, scene_list, save_dir=save_dir,
min_seconds=2, max_seconds=15, shorter_size=720,
logger=logger)
def split_video(
video_path,
scene_list,
save_dir,
min_seconds=None,
max_seconds=None,
target_fps=30,
shorter_size=512,
verbose=False,
logger=None,
):
"""
scenes shorter than min_seconds will be ignored;
scenes longer than max_seconds will be cut to save the beginning max_seconds.
Currently, the saved file name pattern is f'{fname}_scene-{idx}'.mp4
Args:
scene_list (List[Tuple[FrameTimecode, FrameTimecode]]): each element is (s, t): start and end of a scene.
min_seconds (float | None)
max_seconds (float | None)
target_fps (int | None)
shorter_size (int | None)
"""
FFMPEG_PATH = get_ffmpeg_exe()
save_path_list = []
for idx, scene in enumerate(scene_list):
s, t = scene # FrameTimecode
if min_seconds is not None:
if (t - s).get_seconds() < min_seconds:
continue
duration = t - s
if max_seconds is not None:
fps = s.framerate
max_duration = FrameTimecode(timecode="00:00:00", fps=fps)
max_duration.frame_num = round(fps * max_seconds)
duration = min(max_duration, duration)
# save path
fname = os.path.basename(video_path)
fname_wo_ext = os.path.splitext(fname)[0]
# TODO: fname pattern
save_path = os.path.join(save_dir, f"{fname_wo_ext}_scene-{idx}.mp4")
# ffmpeg cmd
cmd = [FFMPEG_PATH]
# Only show ffmpeg output for the first call, which will display any
# errors if it fails, and then break the loop. We only show error messages
# for the remaining calls.
# cmd += ['-v', 'error']
# -ss after -i is very slow; put -ss before -i
# input path
# cmd += ["-i", video_path]
# clip to cut
# cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-t", str(duration.get_seconds())]
# clip to cut
cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-i", video_path, "-t", str(duration.get_seconds())]
# target fps
# cmd += ['-vf', 'select=mod(n\,2)']
if target_fps is not None:
cmd += ["-r", f"{target_fps}"]
# aspect ratio
if shorter_size is not None:
cmd += ["-vf", f"scale='if(gt(iw,ih),-2,{shorter_size})':'if(gt(iw,ih),{shorter_size},-2)'"]
# cmd += ['-vf', f"scale='if(gt(iw,ih),{shorter_size},trunc(ow/a/2)*2)':-2"]
cmd += ["-map", "0", save_path]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = proc.communicate()
# stdout = stdout.decode("utf-8")
# print_log(stdout, logger=logger)
save_path_list.append(video_path)
if verbose:
print_log(f"Video clip saved to '{save_path}'", logger=logger)
return save_path_list
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--meta_path', default='./data/pexels_new/raw/meta/popular_5_format_timestamp.csv')
parser.add_argument('--out_dir', default='./data/pexels_new/scene_cut/data/popular_5')
parser.add_argument('--num_workers', default=5, type=int)
args = parser.parse_args()
return args
def main():
args = parse_args()
meta_path = args.meta_path
out_dir = args.out_dir
assert os.path.basename(os.path.dirname(out_dir)) == 'data'
os.makedirs(out_dir, exist_ok=True)
meta = pd.read_csv(meta_path)
# create logger
log_dir = os.path.dirname(out_dir)
log_name = os.path.basename(out_dir)
timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime(time.time()))
log_path = os.path.join(log_dir, f"{log_name}_{timestamp}.log")
logger = MMLogger.get_instance(log_name, log_file=log_path)
# logger = None
pandarallel.initialize(progress_bar=True)
process_single_row_partial = partial(process_single_row, save_dir=out_dir, log_name=log_name)
meta.parallel_apply(process_single_row_partial, axis=1)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,266 @@
"""
1. format_raw_meta()
- only keep intact videos
- add 'path' column (abs path)
2. create_meta_for_folder()
"""
import os
# os.chdir('../..')
print(f'Current working directory: {os.getcwd()}')
import argparse
import json
import subprocess
import pandas as pd
from tqdm import tqdm
import pickle as pkl
from pandarallel import pandarallel
from functools import partial
import numpy as np
from utils_video import is_intact_video
def has_downloaded_success(json_path):
if not os.path.exists(json_path):
return False
try:
with open(json_path, 'r') as f:
data = json.load(f)
if 'success' not in data or isinstance(data['success'], bool) is False or data['success'] is False:
return False
except Exception as e:
return False
return True
def split_meta_csv(chunk_size=60000):
"""
Split csv into multiple small csv in order
"""
root = './data/Panda-70M'
# meta_name = 'meta/panda70m_training_full.csv'
meta_name = 'meta/panda70m_training_10m.csv'
# meta_name = 'meta/training_10m/train_0.csv'
meta_path = os.path.join(root, meta_name)
df = pd.read_csv(meta_path)
num_rows = len(df)
# Split the DataFrame into smaller DataFrames
for idx, i in enumerate(range(0, num_rows, chunk_size)):
df_i = df.iloc[i:i + chunk_size]
out_path = os.path.join(root, f'meta/train_{idx}.csv')
df_i.to_csv(out_path, index=False)
# If there are remaining rows
if num_rows > chunk_size and num_rows % chunk_size != 0:
df_last = df.iloc[-(num_rows % chunk_size):]
out_path = os.path.join(root, f'meta/train_{idx + 1}.csv')
df_last.to_csv(out_path, index=False)
def remove_index():
df = pd.read_csv('your_file.csv', index_col=0)
df.to_csv('your_file_without_index.csv', index=False)
def append_format(meta_path, mode='.mp4'):
"""
Append _format to csv file:
- filter broken videos; only intact videos are kept
- add column 'path'
input csv should satisfy:
- name should be: {split}.csv
- contain column 'videoID'/'videoId'
"""
# meta_path = os.path.join(root, f'raw/meta/{split}.csv')
meta_dirname = os.path.dirname(meta_path)
assert meta_dirname.endswith('raw/meta')
root_raw = os.path.dirname(meta_dirname)
meta_fname = os.path.basename(meta_path)
split, ext = os.path.splitext(meta_fname)
meta = pd.read_csv(meta_path)
path_list = []
new_meta = []
for idx, row in tqdm(meta.iterrows(), total=len(meta)):
# video_id = row['videoID'] # panda
video_id = row['videoId'] # pexels_new
video_path = os.path.join(root_raw, f'data/{split}/{video_id}.mp4')
if mode == '.mp4':
if not is_intact_video(video_path):
continue
elif mode == '.json':
json_path = os.path.join(root_raw, f'data/{split}/{video_id}.json')
if not has_downloaded_success(json_path):
continue
else:
raise ValueError
new_meta.append(row)
path_list.append(video_path)
new_meta = pd.DataFrame(new_meta)
new_meta['path'] = path_list
out_path = os.path.join(root_raw, f'meta/{split}_format.csv')
new_meta.to_csv(out_path, index=False)
print(f'New meta (shape={new_meta.shape}) saved to \'{out_path}\'')
def append_format_pandarallel(meta_path, split, mode='.mp4'):
"""
Append _format to csv file:
- filter broken videos; only intact videos are kept
- add column 'path'
input csv should satisfy:
- name should be: {split}.csv
- contain column 'videoID'/'videoId'
"""
# meta_path = os.path.join(root, f'raw/meta/{split}.csv')
meta_dirname = os.path.dirname(meta_path)
assert meta_dirname.endswith('raw/meta')
root_raw = os.path.dirname(meta_dirname)
meta_fname = os.path.basename(meta_path)
wo_ext, ext = os.path.splitext(meta_fname)
meta = pd.read_csv(meta_path)
def is_intact(row, mode='.json'):
video_id = row['videoId'] # pexels_new
video_path = os.path.join(root_raw, f'data/{split}/{video_id}.mp4')
row['path'] = video_path
if mode == '.mp4':
if is_intact_video(video_path):
return True, video_path
return False, video_path
elif mode == '.json':
json_path = os.path.join(root_raw, f'data/{split}/{video_id}.json')
if has_downloaded_success(json_path):
return True, video_path
return False, video_path
else:
raise ValueError
pandarallel.initialize(progress_bar=True)
is_intact_partial = partial(is_intact, mode=mode)
ret = meta.parallel_apply(is_intact_partial, axis=1)
intact, paths = list(zip(*ret))
meta['intact'] = intact
meta['path'] = paths
out_path = os.path.join(root_raw, f'meta/{wo_ext}_intact.csv')
meta.to_csv(out_path, index=False)
print(f'New meta (shape={meta.shape}) with intact info saved to \'{out_path}\'')
# meta_format = meta[meta['intact']]
meta_format = meta[np.array(intact)]
meta_format.drop('intact', axis=1, inplace=True)
out_path = os.path.join(root_raw, f'meta/{wo_ext}_format.csv')
meta_format.to_csv(out_path, index=False)
print(f'New meta (shape={meta_format.shape}) with format info saved to \'{out_path}\'')
def create_subset(meta_path):
meta = pd.read_csv(meta_path)
meta_subset = meta.iloc[:100]
wo_ext, ext = os.path.splitext(meta_path)
out_path = f'{wo_ext}_head-100{ext}'
meta_subset.to_csv(out_path, index=False)
print(f'New meta (shape={meta_subset.shape}) saved to \'{out_path}\'')
def append_cut(root='./data/Panda-70M'):
"""
Append _cut to csv file
input csv should satisfy:
- name_should be {split}_intact.csv
- contain column 'timestamp': list of timestamp
"""
split = 'test'
meta_path = os.path.join(root, f'processed/meta/{split}_intact.csv')
wo_ext, ext = os.path.splitext(meta_path)
suffix = 'cut'
out_path = f'{wo_ext}_{suffix}{ext}'
meta = pd.read_csv(meta_path)
new_meta = []
for idx, row in tqdm(meta.iterrows(), total=len(meta)):
video_id = row['videoID']
timestamps = eval(row['timestamp'])
captions = eval(row['caption'])
scores = eval(row['matching_score'])
num_clips = len(timestamps)
for idx_c in range(num_clips):
path_i = os.path.join(root, f'processed/{split}/{video_id}_scene-{idx_c}.mp4')
# if not is_intact_video(path_i):
# continue
row_i = [f'{video_id}_scene-{idx_c}', path_i, timestamps[idx_c], captions[idx_c], scores[idx_c]]
new_meta.append(row_i)
columns = ['videoID', 'path', 'timestamp', 'text', 'match_official']
new_meta = pd.DataFrame(new_meta, columns=columns)
new_meta.to_csv(out_path, index=False)
print(f'New meta (shape={new_meta.shape}) saved to \'{out_path}\'')
def debug_meta_topk():
meta_path = 'F:/Panda-70M/meta/test_intact_cut_flow.csv'
meta = pd.read_csv(meta_path)
score_column = 'flow_score'
topk = meta.nlargest(10, columns=score_column)
topk_s = meta.nsmallest(200, columns=score_column)
topk_list = [(row['path'], row['caption'], row[score_column]) for idx, row in topk.iterrows()]
topk_s_list = [(row['path'], row['caption'], row[score_column]) for idx, row in topk_s.iterrows()]
x = 0
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--task', default='append_format')
parser.add_argument('--meta_path', default='./data/pexels_new/raw/meta/popular_1.csv')
parser.add_argument('--split', default='popular_5')
parser.add_argument('--num_workers', default=5, type=int)
args = parser.parse_args()
return args
if __name__ == '__main__':
# split_meta_csv()
args = parse_args()
meta_path = args.meta_path
task = args.task
if task == 'append_format':
# append_format(meta_path=meta_path, mode='.mp4')
append_format_pandarallel(meta_path=meta_path, split=args.split, mode='.json')
elif task == 'create_subset':
create_subset(meta_path=meta_path)
else:
raise ValueError
# append_cut(root=root)
# append_score(root=root)
# debug_meta_topk()

View file

@ -0,0 +1,89 @@
import argparse
import os
import numpy as np
from tqdm import tqdm
import pandas as pd
from functools import partial
from pandarallel import pandarallel
from scenedetect import detect, ContentDetector, AdaptiveDetector, FrameTimecode
def process_single_row(row):
# windows
# from scenedetect import detect, ContentDetector, AdaptiveDetector
video_path = row['path']
detector = AdaptiveDetector(
adaptive_threshold=3.0,
# luma_only=True,
)
# detector = ContentDetector()
# TODO: catch error here
try:
scene_list = detect(video_path, detector, start_in_scene=True)
timestamp = [(s.get_timecode(), t.get_timecode()) for s, t in scene_list]
return True, str(timestamp)
except Exception as e:
print(f'Video \'{video_path}\' with error {e}')
return False, ''
def main():
meta_path = 'F:/pexels_new/raw/meta/popular_1_format.csv'
meta = pd.read_csv(meta_path)
timestamp_list = []
for idx, row in tqdm(meta.iterrows()):
video_path = row['path']
detector = AdaptiveDetector(
adaptive_threshold=1.5,
luma_only=True,
)
# detector = ContentDetector()
scene_list = detect(video_path, detector, start_in_scene=True)
timestamp = [(s.get_timecode(), t.get_timecode()) for s, t in scene_list]
timestamp_list.append(timestamp)
meta['timestamp'] = timestamp_list
wo_ext, ext = os.path.splitext(meta_path)
out_path = f"{wo_ext}_timestamp{ext}"
meta.to_csv(out_path, index=False)
print(f"New meta with timestamp saved to '{out_path}'.")
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--meta_path', default='F:/pexels_new/raw/meta/popular_1_format.csv')
parser.add_argument('--num_workers', default=5, type=int)
args = parser.parse_args()
return args
def main_pandarallel():
args = parse_args()
meta_path = args.meta_path
# meta_path = 'F:/pexels_new/raw/meta/popular_1_format.csv'
meta = pd.read_csv(meta_path)
pandarallel.initialize(progress_bar=True)
ret = meta.parallel_apply(process_single_row, axis=1)
succ, timestamps = list(zip(*ret))
meta['timestamp'] = timestamps
meta = meta[np.array(succ)]
wo_ext, ext = os.path.splitext(meta_path)
out_path = f"{wo_ext}_timestamp{ext}"
meta.to_csv(out_path, index=False)
print(f"New meta (shape={meta.shape}) with timestamp saved to '{out_path}'.")
if __name__ == '__main__':
main_pandarallel()

View file

@ -0,0 +1,97 @@
import os
import cv2
from mmengine.logging import print_log
from moviepy.editor import VideoFileClip
def iterate_files(folder_path):
for root, dirs, files in os.walk(folder_path):
# root contains the current directory path
# dirs contains the list of subdirectories in the current directory
# files contains the list of files in the current directory
# Process files in the current directory
for file in files:
file_path = os.path.join(root, file)
# print("File:", file_path)
yield file_path
# Process subdirectories and recursively call the function
for subdir in dirs:
subdir_path = os.path.join(root, subdir)
# print("Subdirectory:", subdir_path)
iterate_files(subdir_path)
def iterate_folders(folder_path):
for root, dirs, files in os.walk(folder_path):
for subdir in dirs:
subdir_path = os.path.join(root, subdir)
yield subdir_path
# print("Subdirectory:", subdir_path)
iterate_folders(subdir_path)
def clone_folder_structure(root_src, root_dst, verbose=False):
src_path_list = iterate_folders(root_src)
src_relpath_list = [os.path.relpath(x, root_src) for x in src_path_list]
os.makedirs(root_dst, exist_ok=True)
dst_path_list = [os.path.join(root_dst, x) for x in src_relpath_list]
for folder_path in dst_path_list:
os.makedirs(folder_path, exist_ok=True)
if verbose:
print(f"Create folder: '{folder_path}'")
def is_intact_video(video_path, mode='moviepy', verbose=False, logger=None):
if not os.path.exists(video_path):
if verbose:
print_log(f"Could not find '{video_path}'", logger=logger)
return False
if mode == 'moviepy':
try:
VideoFileClip(video_path)
if verbose:
print_log(f"The video file '{video_path}' is intact.", logger=logger)
return True
except Exception as e:
if verbose:
print_log(f"Error: {e}", logger=logger)
print_log(f"The video file '{video_path}' is not intact.", logger=logger)
return False
elif mode == 'cv2':
try:
cap = cv2.VideoCapture(video_path)
if cap.isOpened():
if verbose:
print_log(f"The video file '{video_path}' is intact.", logger=logger)
return True
except Exception as e:
if verbose:
print_log(f"Error: {e}", logger=logger)
print_log(f"The video file '{video_path}' is not intact.", logger=logger)
return False
else:
raise ValueError
def count_frames(video_path, logger=None):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print_log(f"Error: Could not open video file '{video_path}'", logger=logger)
return
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print_log(f"Total frames in the video '{video_path}': {total_frames}", logger=logger)
cap.release()
def count_files(root, suffix=".mp4"):
files_list = iterate_files(root)
cnt = len([x for x in files_list if x.endswith(suffix)])
return cnt

View file

@ -8,6 +8,10 @@ import pandas as pd
import torch
import torch.nn.functional as F
from torchvision.datasets.folder import pil_loader
<<<<<<< HEAD
=======
>>>>>>> dev/v1.0.1
from tqdm import tqdm
IMG_EXTENSIONS = (
@ -24,6 +28,15 @@ IMG_EXTENSIONS = (
VID_EXTENSIONS = (".mp4", ".avi", ".mov", ".mkv")
def is_video(filename):
ext = os.path.splitext(filename)[-1].lower()
return ext in VID_EXTENSIONS
IMG_EXTENSIONS = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp")
VID_EXTENSIONS = (".mp4", ".avi", ".mov", ".mkv")
def is_video(filename):
ext = os.path.splitext(filename)[-1].lower()
return ext in VID_EXTENSIONS
@ -52,13 +65,12 @@ class VideoTextDataset(torch.utils.data.Dataset):
def __getitem__(self, index):
row = self.meta.iloc[index]
path = row["path"]
path = row['path']
if is_video(path):
img = extract_frames(path, points=[0.5])[0]
else:
img = pil_loader(path)
img = self.transform(img)
text = row["text"]

View file

@ -1,11 +0,0 @@
# Scene Detection and Video Split
Raw videos from the Internet may be too long for training.
Thus, we detect scenes in raw videos and split them into short clips based on the scenes.
First prepare the video processing packages.
```bash
pip install scenedetect moviepy opencv-python
```
Then run `scene_detect.py`. We provide efficient processing using `multiprocessing`. Don't forget to specify your own dataset path.

View file

@ -1,138 +0,0 @@
import os
from multiprocessing import Pool
from mmengine.logging import MMLogger
from scenedetect import ContentDetector, detect
from tqdm import tqdm
from opensora.utils.misc import get_timestamp
from .utils import check_mp4_integrity, clone_folder_structure, iterate_files, split_video
# config
target_fps = 30 # int
shorter_size = 512 # int
min_seconds = 1 # float
max_seconds = 5 # float
assert max_seconds > min_seconds
cfg = dict(
target_fps=target_fps,
min_seconds=min_seconds,
max_seconds=max_seconds,
shorter_size=shorter_size,
)
def process_folder(root_src, root_dst):
# create logger
folder_path_log = os.path.dirname(root_dst)
log_name = os.path.basename(root_dst)
timestamp = get_timestamp()
log_path = os.path.join(folder_path_log, f"{log_name}_{timestamp}.log")
logger = MMLogger.get_instance(log_name, log_file=log_path)
# clone folder structure
clone_folder_structure(root_src, root_dst)
# all source videos
mp4_list = [x for x in iterate_files(root_src) if x.endswith(".mp4")]
mp4_list = sorted(mp4_list)
for idx, sample_path in tqdm(enumerate(mp4_list)):
folder_src = os.path.dirname(sample_path)
folder_dst = os.path.join(root_dst, os.path.relpath(folder_src, root_src))
# check src video integrity
if not check_mp4_integrity(sample_path, logger=logger):
continue
# detect scenes
scene_list = detect(sample_path, ContentDetector(), start_in_scene=True)
# split scenes
save_path_list = split_video(sample_path, scene_list, save_dir=folder_dst, **cfg, logger=logger)
# check integrity of generated clips
for x in save_path_list:
check_mp4_integrity(x, logger=logger)
def scene_detect():
"""detect & cut scenes using a single process
Expected dataset structure:
data/
your_dataset/
raw_videos/
xxx.mp4
yyy.mp4
This function results in:
data/
your_dataset/
raw_videos/
xxx.mp4
yyy.mp4
zzz.mp4
clips/
xxx_scene-0.mp4
yyy_scene-0.mp4
yyy_scene-1.mp4
"""
# TODO: specify your dataset root
root_src = f"./data/your_dataset/raw_videos"
root_dst = f"./data/your_dataset/clips"
process_folder(root_src, root_dst)
def scene_detect_mp():
"""detect & cut scenes using multiple processes
Expected dataset structure:
data/
your_dataset/
raw_videos/
split_0/
xxx.mp4
yyy.mp4
split_1/
xxx.mp4
yyy.mp4
This function results in:
data/
your_dataset/
raw_videos/
split_0/
xxx.mp4
yyy.mp4
split_1/
xxx.mp4
yyy.mp4
clips/
split_0/
xxx_scene-0.mp4
yyy_scene-0.mp4
split_1/
xxx_scene-0.mp4
yyy_scene-0.mp4
yyy_scene-1.mp4
"""
# TODO: specify your dataset root
root_src = f"./data/your_dataset/raw_videos"
root_dst = f"./data/your_dataset/clips"
# TODO: specify your splits
splits = ["split_0", "split_1"]
# process folders
root_src_list = [os.path.join(root_src, x) for x in splits]
root_dst_list = [os.path.join(root_dst, x) for x in splits]
with Pool(processes=len(splits)) as pool:
pool.starmap(process_folder, list(zip(root_src_list, root_dst_list)))
if __name__ == "__main__":
# TODO: choose single process or multiprocessing
scene_detect()
# scene_detect_mp()

View file

@ -1,145 +0,0 @@
import os
import subprocess
import cv2
from imageio_ffmpeg import get_ffmpeg_exe
from mmengine.logging import print_log
from moviepy.editor import VideoFileClip
from scenedetect import FrameTimecode
def iterate_files(folder_path):
for root, dirs, files in os.walk(folder_path):
# root contains the current directory path
# dirs contains the list of subdirectories in the current directory
# files contains the list of files in the current directory
# Process files in the current directory
for file in files:
file_path = os.path.join(root, file)
# print("File:", file_path)
yield file_path
# Process subdirectories and recursively call the function
for subdir in dirs:
subdir_path = os.path.join(root, subdir)
# print("Subdirectory:", subdir_path)
iterate_files(subdir_path)
def iterate_folders(folder_path):
for root, dirs, files in os.walk(folder_path):
for subdir in dirs:
subdir_path = os.path.join(root, subdir)
yield subdir_path
# print("Subdirectory:", subdir_path)
iterate_folders(subdir_path)
def clone_folder_structure(root_src, root_dst, verbose=False):
src_path_list = iterate_folders(root_src)
src_relpath_list = [os.path.relpath(x, root_src) for x in src_path_list]
os.makedirs(root_dst, exist_ok=True)
dst_path_list = [os.path.join(root_dst, x) for x in src_relpath_list]
for folder_path in dst_path_list:
os.makedirs(folder_path, exist_ok=True)
if verbose:
print(f"Create folder: '{folder_path}'")
def count_files(root, suffix=".mp4"):
files_list = iterate_files(root)
cnt = len([x for x in files_list if x.endswith(suffix)])
return cnt
def check_mp4_integrity(file_path, verbose=True, logger=None):
try:
VideoFileClip(file_path)
if verbose:
print_log(f"The MP4 file '{file_path}' is intact.", logger=logger)
return True
except Exception as e:
if verbose:
print_log(f"Error: {e}", logger=logger)
print_log(f"The MP4 file '{file_path}' is not intact.", logger=logger)
return False
def count_frames(video_path):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Could not open video file '{video_path}'")
return
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"Total frames in the video '{video_path}': {total_frames}")
cap.release()
def split_video(
sample_path,
scene_list,
save_dir,
target_fps=30,
min_seconds=1,
max_seconds=10,
shorter_size=512,
verbose=False,
logger=None,
):
FFMPEG_PATH = get_ffmpeg_exe()
save_path_list = []
for idx, scene in enumerate(scene_list):
s, t = scene # FrameTimecode
fps = s.framerate
max_duration = FrameTimecode(timecode="00:00:00", fps=fps)
max_duration.frame_num = round(fps * max_seconds)
duration = min(max_duration, t - s)
if duration.get_frames() < round(min_seconds * fps):
continue
# save path
fname = os.path.basename(sample_path)
fname_wo_ext = os.path.splitext(fname)[0]
# TODO: fname pattern
save_path = os.path.join(save_dir, f"{fname_wo_ext}_scene-{idx}.mp4")
# ffmpeg cmd
cmd = [FFMPEG_PATH]
# Only show ffmpeg output for the first call, which will display any
# errors if it fails, and then break the loop. We only show error messages
# for the remaining calls.
# cmd += ['-v', 'error']
# input path
cmd += ["-i", sample_path]
# clip to cut
cmd += ["-nostdin", "-y", "-ss", str(s.get_seconds()), "-t", str(duration.get_seconds())]
# target fps
# cmd += ['-vf', 'select=mod(n\,2)']
cmd += ["-r", f"{target_fps}"]
# aspect ratio
cmd += ["-vf", f"scale='if(gt(iw,ih),-2,{shorter_size})':'if(gt(iw,ih),{shorter_size},-2)'"]
# cmd += ['-vf', f"scale='if(gt(iw,ih),{shorter_size},trunc(ow/a/2)*2)':-2"]
cmd += ["-map", "0", save_path]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = proc.communicate()
if verbose:
stdout = stdout.decode("utf-8")
print_log(stdout, logger=logger)
save_path_list.append(sample_path)
print_log(f"Video clip saved to '{save_path}'", logger=logger)
return save_path_list