[fix] transform may not fit enough

This commit is contained in:
Zangwei Zheng 2024-03-30 17:05:15 +08:00
parent b44acc022c
commit 987283fa1b
7 changed files with 36 additions and 12 deletions

View file

@ -15,7 +15,7 @@ bucket_config = {
}
# Define acceleration
num_workers = 4
num_workers = 0
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"

View file

@ -19,6 +19,25 @@ def get_aspect_ratios_dict(
return est
# S = 8294400
ASPECT_RATIO_4K = {
"0.39": (1798, 4610),
"0.42": (1866, 4442),
"0.48": (1996, 4158),
"0.50": (2036, 4072),
"0.52": (2076, 3992),
"0.56": (2160, 3840),
"0.66": (2340, 3546),
"0.75": (2494, 3326),
"1.00": (2880, 2880),
"1.33": (3322, 2498),
"1.52": (3550, 2336),
"1.78": (3842, 2158),
"1.92": (3990, 2078),
"2.00": (4072, 2036),
"2.10": (4174, 1988),
}
# S = 2073600
ASPECT_RATIO_1080P = {
"0.39": (900, 2308),
@ -264,4 +283,5 @@ ASPECT_RATIOS = {
"720p": (921600, ASPECT_RATIO_720P),
"1024": (1048576, ASPECT_RATIO_1024),
"1080p": (2073600, ASPECT_RATIO_1080P),
"4k": (8294400, ASPECT_RATIO_4K),
}

View file

@ -148,14 +148,15 @@ def resize_crop_to_fill(pil_image, image_size):
th, tw = image_size
rh, rw = th / h, tw / w
if rh > rw:
sh, sw = th, int(w * rh)
sh, sw = th, round(w * rh)
image = pil_image.resize((sw, sh), Image.BICUBIC)
i = 0
j = int(round((sw - tw) / 2.0))
else:
sh, sw = int(h * rw), tw
sh, sw = round(h * rw), tw
image = pil_image.resize((sw, sh), Image.BICUBIC)
i = int(round((sh - th) / 2.0))
j = 0
arr = np.array(image)
assert i + th <= arr.shape[0] and j + tw <= arr.shape[1]
return Image.fromarray(arr[i : i + th, j : j + tw])

View file

@ -111,15 +111,16 @@ def resize_crop_to_fill(clip, target_size):
th, tw = target_size[0], target_size[1]
rh, rw = th / h, tw / w
if rh > rw:
sh, sw = th, int(w * rh)
sh, sw = th, round(w * rh)
clip = resize(clip, (sh, sw), "bilinear")
i = 0
j = int(round(sw - tw) / 2.0)
else:
sh, sw = int(h * rw), tw
sh, sw = round(h * rw), tw
clip = resize(clip, (sh, sw), "bilinear")
i = int(round(sh - th) / 2.0)
j = 0
assert i + th <= clip.size(-2) and j + tw <= clip.size(-1)
return crop(clip, i, j, th, tw)

View file

@ -33,20 +33,20 @@ The columns are defined as follows:
## Dataset to CSV
As a start point, `convert_dataset.py` is used to convert the dataset to a CSV file. You can use the following commands to convert the dataset to a CSV file:
As a start point, `convert.py` is used to convert the dataset to a CSV file. You can use the following commands to convert the dataset to a CSV file:
```bash
python -m tools.datasets.convert_dataset DATASET-TYPE DATA_FOLDER
python -m tools.datasets.convert DATASET-TYPE DATA_FOLDER
# general video folder
python -m tools.datasets.convert_dataset video VIDEO_FOLDER
python -m tools.datasets.convert video VIDEO_FOLDER
# general image folder
python -m tools.datasets.convert_dataset image IMAGE_FOLDER
python -m tools.datasets.convert image IMAGE_FOLDER
# imagenet
python -m tools.datasets.convert_dataset imagenet IMAGENET_FOLDER --split train
python -m tools.datasets.convert imagenet IMAGENET_FOLDER --split train
# ucf101
python -m tools.datasets.convert_dataset ucf101 UCF101_FOLDER --split videos
python -m tools.datasets.convert ucf101 UCF101_FOLDER --split videos
# vidprom
python -m tools.datasets.convert_dataset vidprom VIDPROM_FOLDER --info VidProM_semantic_unique.csv
python -m tools.datasets.convert vidprom VIDPROM_FOLDER --info VidProM_semantic_unique.csv
```
## Manage datasets

View file

@ -32,6 +32,8 @@ def get_video_info(path):
ext = os.path.splitext(path)[1].lower()
if ext in IMG_EXTENSIONS:
im = cv2.imread(path)
if im is None:
return 0, 0, 0, np.nan, np.nan
height, width = im.shape[:2]
num_frames, fps = 1, np.nan
else: