From d2e710c97776330f4928283787ffe049fedc3ee8 Mon Sep 17 00:00:00 2001 From: Shen-Chenhui Date: Mon, 17 Jun 2024 09:13:12 +0000 Subject: [PATCH] format --- README.md | 32 ++++- configs/vae/train/{image.py => stage1.py} | 9 +- .../vae/train/{video_disc.py => stage2.py} | 24 +--- configs/vae/train/{video.py => stage3.py} | 4 +- docs/installation.md | 135 ++++++++++-------- docs/vae.md | 63 ++++++++ eval/vae/cal_flolpips.py | 6 +- opensora/models/vae/README.md | 79 ---------- opensora/models/vae/lpips.py | 3 +- opensora/models/vae/utils.py | 19 +-- requirements/requirements-vae.txt | 5 + setup.py | 1 + 12 files changed, 193 insertions(+), 187 deletions(-) rename configs/vae/train/{image.py => stage1.py} (82%) rename configs/vae/train/{video_disc.py => stage2.py} (67%) rename configs/vae/train/{video.py => stage3.py} (91%) create mode 100644 docs/vae.md delete mode 100644 opensora/models/vae/README.md create mode 100644 requirements/requirements-vae.txt diff --git a/README.md b/README.md index c010e2c..32a61d7 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ see [here](/assets/texts/t2v_samples.txt) for full prompts. * [Gradio Demo](#gradio-demo) * [Inference](#inference) * [Data Processing](#data-processing) +* [VAE](#vae) * [Training](#training) * [Evaluation](#evaluation) * [Contribution](#contribution) @@ -157,7 +158,7 @@ Other useful documents and links are listed below. ### Install from Source -For CUDA 12.1, you can install the dependencies with the following commands. Otherwise, please refer to [Installation](docs/installation.md) for more instructions on different cuda version, and additional dependency for data preprocessing. +For CUDA 12.1, you can install the dependencies with the following commands. Otherwise, please refer to [Installation](docs/installation.md) for more instructions on different cuda version, and additional dependency for data preprocessing, VAE, and model evaluation. ```bash # create a virtual env and activate (conda as an example) @@ -372,6 +373,35 @@ Also check out the [datasets](docs/datasets.md) we use. ![Data Processing Pipeline](assets/readme/report_data_pipeline.png) +## VAE +We train a VAE pipeline that consists of a spatial VAE followed by a temporal VAE. +For more details, refer to our [VAE documentation](docs/vae.md). +Before you run the following commands, follow our [Installation Documentation](docs/installation.md) to install the required dependencies for VAE and Evaluation. + +Once you prepare the data in a `csv` file, run the following commands to train the VAE. +Note that you need to adjust the number of trained epochs (`epochs`) in the config file accordingly with respect to your own csv data size. + +```bash +# stage 1 training, 380k steps, 8 GPUs +torchrun --nnodes=1 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage1.py --data-path YOUR_CSV_PATH +# stage 2 training, 260k steps, 8 GPUs +torchrun --nnodes=1 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage[1-3].py --data-path YOUR_CSV_PATH +# stage 3 training, 540k steps, 24 GPUs +torchrun --nnodes=3 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage[1-3].py --data-path YOUR_CSV_PATH +``` +To evaluate the VAE performance, you need to run VAE inference first to generate the videos, then calculate scores on the generated videos: + +```bash +# video generation +torchrun --standalone --nnodes=1 --nproc_per_node=1 scripts/inference_vae.py configs/vae/inference/video.py --ckpt-path YOUR_VAE_CKPT_PATH --data-path YOUR_CSV_PATH --save-dir YOUR_VIDEO_DIR +# the original videos will be saved to `YOUR_VIDEO_DIR_ori` +# the reconstructed videos through the pipeline will be saved to `YOUR_VIDEO_DIR_rec` +# the reconstructed videos through the spatial VAE only will be saved to `YOUR_VIDEO_DIR_spatial` + +# score calculation +python eval/vae/eval_common_metric.py --batch_size 2 --real_video_dir YOUR_VIDEO_DIR_ori --generated_video_dir YOUR_VIDEO_DIR_rec --device cuda --sample_fps 24 --crop_size 256 --resolution 256 --num_frames 17 --sample_rate 1 --metric ssim psnr lpips flolpips +``` + ## Training ### Open-Sora 1.2 Training diff --git a/configs/vae/train/image.py b/configs/vae/train/stage1.py similarity index 82% rename from configs/vae/train/image.py rename to configs/vae/train/stage1.py index 46621dc..a6899ec 100644 --- a/configs/vae/train/image.py +++ b/configs/vae/train/stage1.py @@ -1,4 +1,4 @@ -num_frames = 1 +num_frames = 17 image_size = (256, 256) # Define dataset @@ -35,10 +35,11 @@ model = dict( ) # loss weights -perceptual_loss_weight = 0.0 # use vgg is not None and more than 0 +perceptual_loss_weight = 0.1 # use vgg is not None and more than 0 kl_loss_weight = 1e-6 -mixed_image_ratio = 0.1 +mixed_strategy = "mixed_video_image" +mixed_image_ratio = 0.2 use_real_rec_loss = False use_z_rec_loss = True use_image_identity_loss = True @@ -48,7 +49,7 @@ seed = 42 outputs = "outputs" wandb = False -epochs = 100 +epochs = 100 # NOTE: adjust accordingly w.r.t dataset size log_every = 1 ckpt_every = 1000 load = None diff --git a/configs/vae/train/video_disc.py b/configs/vae/train/stage2.py similarity index 67% rename from configs/vae/train/video_disc.py rename to configs/vae/train/stage2.py index 7af989d..80748d4 100644 --- a/configs/vae/train/video_disc.py +++ b/configs/vae/train/stage2.py @@ -34,30 +34,14 @@ model = dict( ), ) -discriminator = dict( - type="NLayerDiscriminator", - from_pretrained="/home/shenchenhui/opensoraplan-v1.0.0-discriminator.pt", - input_nc=3, - n_layers=3, - use_actnorm=False, -) - -# discriminator hyper-parames TODO -discriminator_factor = 1 -discriminator_start = -1 -generator_factor = 0.5 -generator_loss_type = "hinge" -discriminator_loss_type = "hinge" -lecam_loss_weight = None -gradient_penalty_loss_weight = None - # loss weights perceptual_loss_weight = 0.1 # use vgg is not None and more than 0 kl_loss_weight = 1e-6 +mixed_strategy = "mixed_video_image" mixed_image_ratio = 0.2 -use_real_rec_loss = True -use_z_rec_loss = False +use_real_rec_loss = False +use_z_rec_loss = True use_image_identity_loss = False # Others @@ -65,7 +49,7 @@ seed = 42 outputs = "outputs" wandb = False -epochs = 100 +epochs = 100 # NOTE: adjust accordingly w.r.t dataset size log_every = 1 ckpt_every = 1000 load = None diff --git a/configs/vae/train/video.py b/configs/vae/train/stage3.py similarity index 91% rename from configs/vae/train/video.py rename to configs/vae/train/stage3.py index 8dd96c2..2b6bc12 100644 --- a/configs/vae/train/video.py +++ b/configs/vae/train/stage3.py @@ -38,7 +38,7 @@ model = dict( perceptual_loss_weight = 0.1 # use vgg is not None and more than 0 kl_loss_weight = 1e-6 -mixed_image_ratio = 0.2 +mixed_strategy = "mixed_video_random" use_real_rec_loss = True use_z_rec_loss = False use_image_identity_loss = False @@ -48,7 +48,7 @@ seed = 42 outputs = "outputs" wandb = False -epochs = 100 +epochs = 100 # NOTE: adjust accordingly w.r.t dataset size log_every = 1 ckpt_every = 1000 load = None diff --git a/docs/installation.md b/docs/installation.md index bf7c711..5e6cd29 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -7,7 +7,7 @@ Note that besides these packages, some packages needs to be mannually installed, You need to install `opensora` for training and inference. You can follow the steps below for installation. We also provide guideline for different CUDA versions for compatiblity. -Please note that the default installation is for training and inference only. Other optional dependencies are detailed in the sections [Data Proessing](#data-processing) and [Evaluation](#evaluation) respectively. +Please note that the default installation is for training and inference only. Other optional dependencies are detailed in the sections [Data Processing](#data-processing), [Evaluation](#evaluation), and [VAE](#vae) respectively. ### Step 1: Install PyTorch and xformers @@ -58,63 +58,6 @@ pip install flash-attn --no-build-isolation pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git ``` -## Evaluation - -### Step 1: Install Requirements - -To conduct evaluation, run the following command to install requirements: - -```bash -pip install -v .[eval] -# For development:`pip install -v -e .[eval]` -``` - -### Step 2: Install VBench - - - -You need to install VBench mannually by: -```bash -# first clone their repo -cd .. # assume you are in the Open-Sora root folder, you may install at other location but make sure the soft link paths later are correct -git clone https://github.com/Vchitect/VBench.git -cd VBench -git checkout v0.1.2 - -# next, fix their hard-coded path isse -vim vbench2_beta_i2v/utils.py -# find `image_root` in the `load_i2v_dimension_info` function, change it to point to your appropriate image folder - -# last, create softlinks -cd ../Open-Sora # or `cd ../Open-Sora-dev` for development -ln -s ../VBench/vbench vbench # you may need to change ../VBench/vbench to your corresponding path -ln -s ../VBench/vbench2_beta_i2v vbench2_beta_i2v # you may need to change ../VBench/vbench_beta_i2v to your corresponding path -# later you need to make sure to run evaluation from your Open-Sora folder, else vbench, vbench2_beta_i2v cannot be found -``` - - -### Step 3: Install `cupy` for Potential VAE Errors - -You need to mannually install [cupy](https://docs.cupy.dev/en/stable/install.html). - -- For CUDA v11.2~11.8 (x86_64 / aarch64), `pip install cupy-cuda11x` -- For CUDA v12.x (x86_64 / aarch64), `pip install cupy-cuda12x` - -Note that for VAE evaluation, you may run into error with `ModuleNotFoundError: No module named 'torchvision.transforms.functional_tensor'`, in this case, you need to go to the corresponding file (`.../pytorchvideo/transforms/augmentations.py`) reporting this error, then change as following: - -```python -# find the original line: -import torchvision.transforms.functional_tensor as F_t -# change to: -import torchvision.transforms._functional_tensor as F_t -``` - ## Data Processing ### Step 1: Install Requirements @@ -193,3 +136,79 @@ and change the assert of `mmcv_version < digit_version(mmcv_maximum_version)` to If you are unsure of your path to the mmdet init file, simply run our [OCR command](../tools/scoring/README.md), wait for the mmdeet assertion error on mmcv versions. The error will contain the exact path to the mmdet init file. + + +## Evaluation + +### Step 1: Install Requirements + +To conduct evaluation, run the following command to install requirements: + +```bash +pip install -v .[eval] +# For development:`pip install -v -e .[eval]` +``` + +### Step 2: Install VBench + + + +You need to install VBench mannually by: +```bash +# first clone their repo +cd .. # assume you are in the Open-Sora root folder, you may install at other location but make sure the soft link paths later are correct +git clone https://github.com/Vchitect/VBench.git +cd VBench +git checkout v0.1.2 + +# next, fix their hard-coded path isse +vim vbench2_beta_i2v/utils.py +# find `image_root` in the `load_i2v_dimension_info` function, change it to point to your appropriate image folder + +# last, create softlinks +cd ../Open-Sora # or `cd ../Open-Sora-dev` for development +ln -s ../VBench/vbench vbench # you may need to change ../VBench/vbench to your corresponding path +ln -s ../VBench/vbench2_beta_i2v vbench2_beta_i2v # you may need to change ../VBench/vbench_beta_i2v to your corresponding path +# later you need to make sure to run evaluation from your Open-Sora folder, else vbench, vbench2_beta_i2v cannot be found +``` + + +### Step 3: Install `cupy` for Potential VAE Errors + +You need to mannually install [cupy](https://docs.cupy.dev/en/stable/install.html). + +- For CUDA v11.2~11.8 (x86_64 / aarch64), `pip install cupy-cuda11x` +- For CUDA v12.x (x86_64 / aarch64), `pip install cupy-cuda12x` + +Note that for VAE evaluation, you may run into error with `ModuleNotFoundError: No module named 'torchvision.transforms.functional_tensor'`, in this case, you need to go to the corresponding file (`.../pytorchvideo/transforms/augmentations.py`) reporting this error, then change as following: + +```python +# find the original line: +import torchvision.transforms.functional_tensor as F_t +# change to: +import torchvision.transforms._functional_tensor as F_t +``` + + + + +## VAE + +### Step 1: Install Requirements + +To train and evaluate your own VAE, run the following command to install requirements: + +```bash +pip install -v .[vae] +# For development:`pip install -v -e .[vae]` +``` + +### Step 2: VAE Evaluation (`cupy` and Potential VAE Errors) + +Refer to [Evaluation's VAE section](#step-3-install-cupy-for-potential-vae-errors). diff --git a/docs/vae.md b/docs/vae.md new file mode 100644 index 0000000..114a7e5 --- /dev/null +++ b/docs/vae.md @@ -0,0 +1,63 @@ +# VAE Report + +As [Pixart-Sigma](https://arxiv.org/abs/2403.04692) finds that adapting to a new VAE is simple, we develop a temporal VAE for the diffusion model to adapt to. +Specifically, our VAE consists of a pipeline of a [spatial VAE](https://huggingface.co/PixArt-alpha/pixart_sigma_sdxlvae_T5_diffusers) followed by a temporal VAE. +For the temporal VAE, we follow the implementation of [MAGVIT-v2](https://arxiv.org/abs/2310.05737), with the following modifications: +* We remove the architecture specific to the codebook. +* We do not use the discriminator, and use the VAE reconstruction loss, kl loss, and perceptual loss for training. +* In the last linear layer of the encoder, we scale down to a diagonal Gaussian Distribution of 4 channels, following our previously trained STDiT that takes in 4 channels input. +* Our decoder is symmetric to the encoder architecture. + +## Training + +We train the model in different stages. + +We first train the temporal VAE only by freezing the spatial VAE for 380k steps on a single machine (8 GPUs). +We use an additional identity loss to make features from the 3D VAE similar to the features from the 2D VAE. +We train the VAE using 20% images and 80% videos with 17 frames. +```bash +torchrun --nnodes=1 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage1.py --data-path YOUR_CSV_PATH +``` +Next, we remove the indentity loss and train the 3D VAE pipeline to reconstructe the 2D-compressed videos for 260k steps. +```bash +torchrun --nnodes=1 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage2.py --data-path YOUR_CSV_PATH +``` +Finally, we remove the reconstruction loss for the 2D-compressed videos and train the VAE pipeline to construct the 3D videos for 540k steps. +We train our VAE with a random number within 34 frames to make it more robust to different video lengths. +This stage is trained on 24 GPUs. +```bash +torchrun --nnodes=3 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage3.py --data-path YOUR_CSV_PATH +``` + +Note that you need to adjust the `epochs` in the config file accordingly with respect to your own csv data size. + + +## Inference + +To visually check the performance of the VAE, you may run the following inference. +It saves the original video to your specified video directory with `_ori` postfix (i.e. `"YOUR_VIDEO_DIR"_ori`), the reconstructed video from the full pipeline with the `_rec` postfix (i.e. `"YOUR_VIDEO_DIR"_rec`), and the reconstructed video from the 2D compression and decompression with the `_spatial` postfix (i.e. `"YOUR_VIDEO_DIR"_spatial`). +```bash +torchrun --standalone --nnodes=1 --nproc_per_node=1 scripts/inference_vae.py configs/vae/inference/video.py --ckpt-path YOUR_VAE_CKPT_PATH --data-path YOUR_CSV_PATH --save-dir YOUR_VIDEO_DIR +``` + +## Evaluation +We can then calculate the scores of the VAE performances on metrics of SSIM, PSNR, LPIPS, and FLOLPIPS. + +* SSIM: structural similarity index measure, the higher the better +* PSNR: peak-signal-to-noise ratio, the higher the better +* LPIPS: learned perceptual image quality degradation, the lower the better +* [FloLPIPS](https://arxiv.org/pdf/2207.08119): LPIPS with video interpolation, the lower the better. + +```bash +python eval/vae/eval_common_metric.py --batch_size 2 --real_video_dir YOUR_VIDEO_DIR_ori --generated_video_dir YOUR_VIDEO_DIR_rec --device cuda --sample_fps 24 --crop_size 256 --resolution 256 --num_frames 17 --sample_rate 1 --metric ssim psnr lpips flolpips +``` + + +## Acknowledgement +We are grateful for the following work: +* [MAGVIT-v2](https://arxiv.org/abs/2310.05737): Language Model Beats Diffusion -- Tokenizer is Key to Visual Generation +* [Taming Transformers](https://github.com/CompVis/taming-transformers): Taming Transformers for High-Resolution Image Synthesis +* [3D blur pooling](https://github.com/adobe/antialiased-cnns/pull/39/commits/3d6f02b6943c58b68c19c07bc26fad57492ff3bc) +* [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan) + +Special thanks go to the authors of [Open-Sora-Plan](https://github.com/PKU-YuanGroup/Open-Sora-Plan) for their valuable advice and help. diff --git a/eval/vae/cal_flolpips.py b/eval/vae/cal_flolpips.py index 7d8f08c..a8824da 100644 --- a/eval/vae/cal_flolpips.py +++ b/eval/vae/cal_flolpips.py @@ -6,10 +6,10 @@ from tqdm import tqdm sys.path.append(".") -# ERROR: cannot locate the model file -from flolpips.pwcnet import Network as PWCNet from flolpips.flolpips import FloLPIPS -loss_fn = FloLPIPS(net='alex', version='0.1').eval().requires_grad_(False) +from flolpips.pwcnet import Network as PWCNet + +loss_fn = FloLPIPS(net="alex", version="0.1").eval().requires_grad_(False) flownet = PWCNet().eval().requires_grad_(False) diff --git a/opensora/models/vae/README.md b/opensora/models/vae/README.md deleted file mode 100644 index 0a6e6c6..0000000 --- a/opensora/models/vae/README.md +++ /dev/null @@ -1,79 +0,0 @@ -## Commands - -## 0. References - -* https://github.com/google-research/magvit -* https://github.com/CompVis/taming-transformers -* https://github.com/adobe/antialiased-cnns/pull/39/commits/3d6f02b6943c58b68c19c07bc26fad57492ff3bc -* https://github.com/PKU-YuanGroup/Open-Sora-Plan - - -## 1. VAE 3D -### 1.1 Train - -```yaml -# train on pexel dataset -WANDB_API_KEY= CUDA_VISIBLE_DEVICES= torchrun --master_port= --nnodes=1 --nproc_per_node=1 scripts/train-vae.py configs/vae_3d/train/16x256x256.py --data-path /home/shenchenhui/data/pexels/train.csv --wandb True -``` - -### 1.2 Inference - -```yaml -CUDA_VISIBLE_DEVICES=6 torchrun --standalone --nnodes=1 --nproc_per_node=1 scripts/inference-vae.py configs/vae_3d/inference/16x256x256.py --ckpt-path /home/shenchenhui/Open-Sora-dev/outputs/train_pexel_028/epoch3-global_step20000/ --data-path /home/shenchenhui/data/pexels/debug.csv --save-dir outputs/pexel - - -# resume training debug -CUDA_VISIBLE_DEVICES=5 torchrun --master_port=29530 --nnodes=1 --nproc_per_node=1 scripts/train-vae.py configs/vae_3d/train/16x256x256.py --data-path /home/shenchenhui/data/pexels/debug.csv --load /home/shenchenhui/Open-Sora-dev/outputs/006-F16S3-VAE_3D_B/epoch49-global_step50 -``` - -version 2 pipeline -```yaml -# NOTE: first VAE is pretrained 2D, 16x128x128 --> 16x16x16 -# then we train our own temporal VAE, 16x16x16 --> 4x16x16 -# we use a 3 layer discriminator on the intermediate of 16x16x16 -WANDB_API_KEY= CUDA_VISIBLE_DEVICES=7 torchrun --master_port=29580 --nnodes=1 --nproc_per_node=1 scripts/train-vae-v2.py configs/vae_magvit_v2/train/pipeline_16x128x128.py --data-path /home/shenchenhui/data/trial_data/train_short.csv --wandb True -``` - - -## 2. MAGVIT-v2 - -### 2.1 dependencies -``` -'accelerate>=0.24.0', -'beartype', -'einops>=0.7.0', -'ema-pytorch>=0.2.4', -'pytorch-warmup', -'gateloop-transformer>=0.2.2', -'kornia', -'opencv-python', -'pillow', -'pytorch-custom-utils>=0.0.9', -'numpy', -'vector-quantize-pytorch>=1.11.8', -'taylor-series-linear-attention>=0.1.5', -'torch', -'torchvision', -'x-transformers' -``` - -Note: -uses `hotfix/zero` branch of `https://github.com/ver217/ColossalAI.git`. -clone the repo, go to the branch, then do `pip install .` - - -### 2.2 Train - -```yaml -CUDA_VISIBLE_DEVICES7 torchrun --master_port=29510 --nnodes=1 --nproc_per_node=1 scripts/train-vae-v2.py configs/vae_magvit_v2/train/17x128x128.py --data-path /home/shenchenhui/data/pexels/train.csv -``` - -### 2.3 Inference - - -### 2.4 Data - -full data combining the follwing: `/home/shenchenhui/data/pixabay+pexels.csv` - -* ~/data/pixabay: `/home/data/sora_data/pixabay/raw/data/split-0` -* pexels: `/home/litianyi/data/pexels/processed/meta/pexels_caption_vinfo_ready_noempty_clean.csv` \ No newline at end of file diff --git a/opensora/models/vae/lpips.py b/opensora/models/vae/lpips.py index 358cfe5..e643cba 100644 --- a/opensora/models/vae/lpips.py +++ b/opensora/models/vae/lpips.py @@ -50,7 +50,7 @@ class LPIPS(nn.Module): super().__init__() self.scaling_layer = ScalingLayer() self.chns = [64, 128, 256, 512, 512] # vg16 features - self.net = vgg16(pretrained=True, requires_grad=False) # NOTE: TODO: need in_channels = 4 to use + self.net = vgg16(pretrained=True, requires_grad=False) self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout) self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout) self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout) @@ -90,7 +90,6 @@ class LPIPS(nn.Module): return val -# SCH: TODO: this channel shift & scale may need to be changed class ScalingLayer(nn.Module): def __init__(self): super(ScalingLayer, self).__init__() diff --git a/opensora/models/vae/utils.py b/opensora/models/vae/utils.py index 6c0c07c..6b0ba6b 100644 --- a/opensora/models/vae/utils.py +++ b/opensora/models/vae/utils.py @@ -1,26 +1,9 @@ import numpy as np import torch -# from taming.modules.losses.lpips import LPIPS # need to pip install https://github.com/CompVis/taming-transformers -# from taming.modules.discriminator.model import NLayerDiscriminator, weights_init - """Stripped version of https://github.com/richzhang/PerceptualSimilarity/tree/master/models""" -## NOTE: not used since we only have 'GN' -# def get_norm_layer(norm_type, dtype): -# if norm_type == 'LN': -# # supply a few args with partial function and pass the rest of the args when this norm_fn is called -# norm_fn = functools.partial(nn.LayerNorm, dtype=dtype) -# elif norm_type == 'GN': # -# norm_fn = functools.partial(nn.GroupNorm, dtype=dtype) -# elif norm_type is None: -# norm_fn = lambda: (lambda x: x) -# else: -# raise NotImplementedError(f'norm_type: {norm_type}') -# return norm_fn - - class DiagonalGaussianDistribution(object): def __init__( self, @@ -57,7 +40,7 @@ class DiagonalGaussianDistribution(object): dim=[1, 2, 3, 4], ) - def nll(self, sample, dims=[1, 2, 3, 4]): # TODO: what does this do? + def nll(self, sample, dims=[1, 2, 3, 4]): if self.deterministic: return torch.Tensor([0.0]) logtwopi = np.log(2.0 * np.pi) diff --git a/requirements/requirements-vae.txt b/requirements/requirements-vae.txt new file mode 100644 index 0000000..75530e4 --- /dev/null +++ b/requirements/requirements-vae.txt @@ -0,0 +1,5 @@ +beartype==0.18.5 +einops==0.8.0 +einops-exts==0.0.4 +opencv-python==4.9.0.80 +pillow==10.3.0 diff --git a/setup.py b/setup.py index a90450e..f9b1d42 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ setup( extras_require={ "data": fetch_requirements("requirements/requirements-data.txt"), "eval": fetch_requirements("requirements/requirements-eval.txt"), + "vae": fetch_requirements("requirements/requirements-vae.txt"), "full": fetch_requirements( [ "requirements/requirements-data.txt",