mirror of
https://github.com/hpcaitech/Open-Sora.git
synced 2026-05-21 03:33:55 +02:00
[fix] transform may not fit enough
This commit is contained in:
parent
b44acc022c
commit
987283fa1b
|
|
@ -15,7 +15,7 @@ bucket_config = {
|
|||
}
|
||||
|
||||
# Define acceleration
|
||||
num_workers = 4
|
||||
num_workers = 0
|
||||
dtype = "bf16"
|
||||
grad_checkpoint = True
|
||||
plugin = "zero2"
|
||||
|
|
|
|||
|
|
@ -19,6 +19,25 @@ def get_aspect_ratios_dict(
|
|||
return est
|
||||
|
||||
|
||||
# S = 8294400
|
||||
ASPECT_RATIO_4K = {
|
||||
"0.39": (1798, 4610),
|
||||
"0.42": (1866, 4442),
|
||||
"0.48": (1996, 4158),
|
||||
"0.50": (2036, 4072),
|
||||
"0.52": (2076, 3992),
|
||||
"0.56": (2160, 3840),
|
||||
"0.66": (2340, 3546),
|
||||
"0.75": (2494, 3326),
|
||||
"1.00": (2880, 2880),
|
||||
"1.33": (3322, 2498),
|
||||
"1.52": (3550, 2336),
|
||||
"1.78": (3842, 2158),
|
||||
"1.92": (3990, 2078),
|
||||
"2.00": (4072, 2036),
|
||||
"2.10": (4174, 1988),
|
||||
}
|
||||
|
||||
# S = 2073600
|
||||
ASPECT_RATIO_1080P = {
|
||||
"0.39": (900, 2308),
|
||||
|
|
@ -264,4 +283,5 @@ ASPECT_RATIOS = {
|
|||
"720p": (921600, ASPECT_RATIO_720P),
|
||||
"1024": (1048576, ASPECT_RATIO_1024),
|
||||
"1080p": (2073600, ASPECT_RATIO_1080P),
|
||||
"4k": (8294400, ASPECT_RATIO_4K),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -148,14 +148,15 @@ def resize_crop_to_fill(pil_image, image_size):
|
|||
th, tw = image_size
|
||||
rh, rw = th / h, tw / w
|
||||
if rh > rw:
|
||||
sh, sw = th, int(w * rh)
|
||||
sh, sw = th, round(w * rh)
|
||||
image = pil_image.resize((sw, sh), Image.BICUBIC)
|
||||
i = 0
|
||||
j = int(round((sw - tw) / 2.0))
|
||||
else:
|
||||
sh, sw = int(h * rw), tw
|
||||
sh, sw = round(h * rw), tw
|
||||
image = pil_image.resize((sw, sh), Image.BICUBIC)
|
||||
i = int(round((sh - th) / 2.0))
|
||||
j = 0
|
||||
arr = np.array(image)
|
||||
assert i + th <= arr.shape[0] and j + tw <= arr.shape[1]
|
||||
return Image.fromarray(arr[i : i + th, j : j + tw])
|
||||
|
|
|
|||
|
|
@ -111,15 +111,16 @@ def resize_crop_to_fill(clip, target_size):
|
|||
th, tw = target_size[0], target_size[1]
|
||||
rh, rw = th / h, tw / w
|
||||
if rh > rw:
|
||||
sh, sw = th, int(w * rh)
|
||||
sh, sw = th, round(w * rh)
|
||||
clip = resize(clip, (sh, sw), "bilinear")
|
||||
i = 0
|
||||
j = int(round(sw - tw) / 2.0)
|
||||
else:
|
||||
sh, sw = int(h * rw), tw
|
||||
sh, sw = round(h * rw), tw
|
||||
clip = resize(clip, (sh, sw), "bilinear")
|
||||
i = int(round(sh - th) / 2.0)
|
||||
j = 0
|
||||
assert i + th <= clip.size(-2) and j + tw <= clip.size(-1)
|
||||
return crop(clip, i, j, th, tw)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -33,20 +33,20 @@ The columns are defined as follows:
|
|||
|
||||
## Dataset to CSV
|
||||
|
||||
As a start point, `convert_dataset.py` is used to convert the dataset to a CSV file. You can use the following commands to convert the dataset to a CSV file:
|
||||
As a start point, `convert.py` is used to convert the dataset to a CSV file. You can use the following commands to convert the dataset to a CSV file:
|
||||
|
||||
```bash
|
||||
python -m tools.datasets.convert_dataset DATASET-TYPE DATA_FOLDER
|
||||
python -m tools.datasets.convert DATASET-TYPE DATA_FOLDER
|
||||
# general video folder
|
||||
python -m tools.datasets.convert_dataset video VIDEO_FOLDER
|
||||
python -m tools.datasets.convert video VIDEO_FOLDER
|
||||
# general image folder
|
||||
python -m tools.datasets.convert_dataset image IMAGE_FOLDER
|
||||
python -m tools.datasets.convert image IMAGE_FOLDER
|
||||
# imagenet
|
||||
python -m tools.datasets.convert_dataset imagenet IMAGENET_FOLDER --split train
|
||||
python -m tools.datasets.convert imagenet IMAGENET_FOLDER --split train
|
||||
# ucf101
|
||||
python -m tools.datasets.convert_dataset ucf101 UCF101_FOLDER --split videos
|
||||
python -m tools.datasets.convert ucf101 UCF101_FOLDER --split videos
|
||||
# vidprom
|
||||
python -m tools.datasets.convert_dataset vidprom VIDPROM_FOLDER --info VidProM_semantic_unique.csv
|
||||
python -m tools.datasets.convert vidprom VIDPROM_FOLDER --info VidProM_semantic_unique.csv
|
||||
```
|
||||
|
||||
## Manage datasets
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ def get_video_info(path):
|
|||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext in IMG_EXTENSIONS:
|
||||
im = cv2.imread(path)
|
||||
if im is None:
|
||||
return 0, 0, 0, np.nan, np.nan
|
||||
height, width = im.shape[:2]
|
||||
num_frames, fps = 1, np.nan
|
||||
else:
|
||||
|
|
|
|||
Loading…
Reference in a new issue