mirror of
https://github.com/hpcaitech/Open-Sora.git
synced 2026-04-11 05:13:31 +02:00
* update scoring/matching * update scoring/matching * update scoring/matching * update scoring/matching * update scoring/matching * update scoring/matching * update scoring/matching * update scoring/matching * update scoring/matching * update scene_cut * update scene_cut * update scene_cut[A * update scene_cut * update scene_cut * update scene_cut * update scene_cut * update scene_cut * update scene_cut * m * m * m * m * m * m * m * m * m * m * m * m * m * m * update readme * update readme * extract frames using opencv everywhere * extract frames using opencv everywhere * extract frames using opencv everywhere * filter panda10m * filter panda10m * m * m * m * m * m * m * m * m * m * m * m * m * m * m * m * m * m * ocr * add ocr * add main.sh * add ocr * add ocr * add ocr * add ocr * add ocr * add ocr * update scene_cut * update remove main.sh * update scoring * update scoring * update scoring * update README * update readme * update scene_cut * update readme * update scoring * update readme * update readme * update filter_panda10m * update readme * update readme * update launch.ipynb * update scene_cut * update scene_cut * update readme * update launch.ipynb
449 lines
14 KiB
Text
449 lines
14 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Data Process Pipeline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Data Process Commands"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"# TODO: change to your own project path!!!\n",
|
|
"OPEN_SORA_HOME = \"/path/to/Open-Sora/\"\n",
|
|
"\n",
|
|
"\n",
|
|
"def convert_dataset_cmd(input_dir, output_file, datatype=\"video\"):\n",
|
|
" commands = []\n",
|
|
" commands.append(f'echo \"Converting {input_dir} to {output_file}\"')\n",
|
|
" output_dir = os.path.dirname(output_file)\n",
|
|
"\n",
|
|
" commands.append(f\"mkdir -p {output_dir}\")\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"python -m tools.datasets.convert {datatype} {input_dir} --output {output_file}\")\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_video_info(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_info{ext}\"\n",
|
|
" output_format = ext[1:]\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting info of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(\n",
|
|
" f\"python -m tools.datasets.datautil {input_file} --output {output_file} --format {output_format} --info --fmin 1\"\n",
|
|
" )\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_video_info_torchvision(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_info{ext}\"\n",
|
|
" output_format = ext[1:]\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting info of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(\n",
|
|
" f\"python -m tools.datasets.datautil {input_file} --output {output_file} --format {output_format} --video-info --fmin 1\"\n",
|
|
" )\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_caption_llava7b_video(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_caption{ext}\"\n",
|
|
" output_format = ext[1:]\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting info of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"conda activate llava2\")\n",
|
|
" commands.append(\n",
|
|
" f\"torchrun --nproc_per_node 8 --standalone -m tools.caption.caption_llava {input_file} --dp-size 8 --tp-size 1 --model-path liuhaotian/llava-v1.6-mistral-7b --prompt video\"\n",
|
|
" )\n",
|
|
" commands.append(f\"conda activate opensora\")\n",
|
|
" commands.append(\n",
|
|
" f\"python -m tools.datasets.datautil {base}_caption_part*{ext} --output {output_file} --format {output_format} --intersection {input_file} --clean-caption --refine-llm-caption --remove-empty-caption\"\n",
|
|
" )\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_caption_load(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_caption{ext}\"\n",
|
|
" output_format = ext[1:]\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting caption of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(\n",
|
|
" f\"python -m tools.datasets.datautil {input_file} --output {output_file} --format {output_format} --load-caption json --remove-empty-caption --clean-caption\"\n",
|
|
" )\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_aesthetic_score(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_aes{ext}\"\n",
|
|
" output_format = ext[1:]\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting aesthetic score of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"torchrun --standalone --nproc_per_node 8 -m tools.scoring.aesthetic.inference {input_file}\")\n",
|
|
" commands.append(\n",
|
|
" f\"python -m tools.datasets.datautil {base}_aes_part*{ext} --output {output_file} --format {output_format} --sort aes\"\n",
|
|
" )\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_flow_score(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_flow{ext}\"\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting flow score of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"torchrun --standalone --nproc_per_node 8 -m tools.scoring.optical_flow.inference {input_file}\")\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_ocr(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_match{ext}\"\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting match score of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"torchrun --standalone --nproc_per_node 8 -m tools.scoring.ocr.inference {input_file}\")\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
" \n",
|
|
"def get_match_score(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_match{ext}\"\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting match score of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"torchrun --standalone --nproc_per_node 8 -m tools.scoring.matching.inference {input_file}\")\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_cmotion_score(input_file):\n",
|
|
" commands = []\n",
|
|
" base, ext = os.path.splitext(input_file)\n",
|
|
" output_file = f\"{base}_cmotion{ext}\"\n",
|
|
"\n",
|
|
" commands.append(f'echo \"Getting cmotion score of {input_file} to {output_file}\"')\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append(f\"python -m tools.caption.camera_motion_detect {input_file}\")\n",
|
|
" return \" && \".join(commands), output_file\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_commands(job_list):\n",
|
|
" commands = []\n",
|
|
" output_file = None\n",
|
|
" for job in job_list:\n",
|
|
" cmd = job.pop(\"cmd\")\n",
|
|
" if output_file is None:\n",
|
|
" command, output_file = cmd(**job)\n",
|
|
" commands.append(command)\n",
|
|
" else:\n",
|
|
" job[\"input_file\"] = output_file\n",
|
|
" command, output_file = cmd(**job)\n",
|
|
" commands.append(command)\n",
|
|
" commands.append(f'echo \"All Done!\"')\n",
|
|
" return \" && \".join(commands), output_file"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Remote Launch via Paramiko\n",
|
|
"\n",
|
|
"First, add hosts to `~/.ssh/config`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import paramiko\n",
|
|
"\n",
|
|
"HOSTS = [\"host-0\", \"host-1\", \"host-2\", \"host-3\", \"host-4\", \"host-5\", \"host-6\", \"host-7\"]\n",
|
|
"\n",
|
|
"# load from ~/.ssh/config\n",
|
|
"ssh_config = paramiko.SSHConfig()\n",
|
|
"user_config_file = os.path.expanduser(\"~/.ssh/config\")\n",
|
|
"if os.path.exists(user_config_file):\n",
|
|
" with open(user_config_file) as f:\n",
|
|
" ssh_config.parse(f)\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_ssh_config(hostname):\n",
|
|
" # get the configuration for the host\n",
|
|
" user_config = ssh_config.lookup(hostname)\n",
|
|
" cfg = {\n",
|
|
" \"hostname\": user_config[\"hostname\"],\n",
|
|
" \"username\": user_config[\"user\"],\n",
|
|
" \"port\": int(user_config[\"port\"]),\n",
|
|
" \"key_filename\": user_config[\"identityfile\"],\n",
|
|
" }\n",
|
|
" return cfg\n",
|
|
"\n",
|
|
"\n",
|
|
"def connect(hostname):\n",
|
|
" cfg = get_ssh_config(hostname)\n",
|
|
" # connect\n",
|
|
" client = paramiko.SSHClient()\n",
|
|
" client.set_missing_host_key_policy(paramiko.AutoAddPolicy())\n",
|
|
" client.connect(**cfg)\n",
|
|
" return client\n",
|
|
"\n",
|
|
"\n",
|
|
"def run_command(command, hostname, nohup=False, log_file=None, sleep=None):\n",
|
|
" client = connect(hostname)\n",
|
|
" print(\"HOST:\", hostname)\n",
|
|
" if sleep:\n",
|
|
" command = f\"sleep {sleep}; {command}\"\n",
|
|
" command = f\"bash -ic '{command}'\"\n",
|
|
" if log_file:\n",
|
|
" command = f\"{command} >> {log_file} 2>&1\"\n",
|
|
" if nohup:\n",
|
|
" command = f\"nohup {command} &\"\n",
|
|
" print(\"COMMAND:\", command)\n",
|
|
" stdin, stdout, stderr = client.exec_command(command, get_pty=False)\n",
|
|
"\n",
|
|
" stdout_str = stdout.read().decode()\n",
|
|
" stderr_str = stderr.read().decode()\n",
|
|
" if stdout_str:\n",
|
|
" print(\"==== STDOUT ====\\n\", stdout_str)\n",
|
|
" if stderr_str:\n",
|
|
" print(\"==== STDERR ====\\n\", stderr_str)\n",
|
|
"\n",
|
|
" client.close()\n",
|
|
"\n",
|
|
"\n",
|
|
"def run_command_all_hosts(command, hosts=HOSTS):\n",
|
|
" for hostname in hosts:\n",
|
|
" run_command(command, hostname)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Here are tools to examine machine's status."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def nvidia_smi(host):\n",
|
|
" if host:\n",
|
|
" run_command(\"nvidia-smi\", host)\n",
|
|
" else:\n",
|
|
" run_command_all_hosts(\"nvidia-smi\")\n",
|
|
"\n",
|
|
"\n",
|
|
"def nvitop(host=None):\n",
|
|
" if host:\n",
|
|
" run_command(f\"/home/user/.local/bin/nvitop -1\", host)\n",
|
|
" else:\n",
|
|
" run_command_all_hosts(\"/home/user/.local/bin/nvitop -1\")\n",
|
|
"\n",
|
|
"\n",
|
|
"def ps(host=None, interest=\"python|sleep|torchrun|colossal\", all=True):\n",
|
|
" cmd = \"ps aux\" if all else \"ps ux\"\n",
|
|
" if host:\n",
|
|
" if interest is None:\n",
|
|
" run_command(f\"{cmd} | cat\", host)\n",
|
|
" else:\n",
|
|
" run_command(f'{cmd} | cat | grep --color=never -E \"{interest}\"', host)\n",
|
|
" else:\n",
|
|
" if interest is None:\n",
|
|
" run_command_all_hosts(f\"{cmd} | cat\")\n",
|
|
" else:\n",
|
|
" run_command_all_hosts(f'{cmd} | cat | grep --color=never -E \"{interest}\"')\n",
|
|
"\n",
|
|
"\n",
|
|
"def kill(pid, host):\n",
|
|
" run_command(f\"kill -KILL {pid}\", host)\n",
|
|
"\n",
|
|
"\n",
|
|
"def pkill(interest, host):\n",
|
|
" run_command(f'pkill -9 -f \"{interest}\"', host)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Example\n",
|
|
"\n",
|
|
"Remote launch via paramiko."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sleep = None\n",
|
|
"run_command(cmd, host, log_file=log_file, nohup=True, sleep=sleep)\n",
|
|
"ps(host)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Using following commands to monitor the status of the jobs."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ps()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"nvitop(host)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"kill(, host)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Training"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def colossal_run(data_path, load_path=None):\n",
|
|
" commands = []\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" command = f\"colossalai run --nproc_per_node 8 --hostfile hostfile scripts/train.py configs/opensora-v1-1/train/video.py --wandb True --data-path {data_path}\"\n",
|
|
" if load_path:\n",
|
|
" command = f\"{command} --load-path {load_path}\"\n",
|
|
" commands.append(command)\n",
|
|
" cmd = \" && \".join(commands)\n",
|
|
" return cmd\n",
|
|
"\n",
|
|
"\n",
|
|
"def kill_all():\n",
|
|
" commands = []\n",
|
|
" commands.append(f\"cd {OPEN_SORA_HOME}\")\n",
|
|
" commands.append('cat hostfile | xargs -I \"{}\" ssh \"{}\" pkill -9 python')\n",
|
|
" cmd = \" && \".join(commands)\n",
|
|
" return cmd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Examples"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"host = \"host-0\"\n",
|
|
"log_file = os.path.join(OPEN_SORA_HOME, \"logs/train.log\")\n",
|
|
"data_path = \"/path/to/meta.csv\"\n",
|
|
"cmd = colossal_run(data_path)\n",
|
|
"print(cmd)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run_command(cmd, host, log_file=log_file, nohup=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cmd = kill_all()\n",
|
|
"run_command(cmd, host)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.18"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|