diff --git a/configs/vae_magvit_v2/train/16x128x128.py b/configs/vae_magvit_v2/train/16x128x128.py index 722f5cb..5be8214 100644 --- a/configs/vae_magvit_v2/train/16x128x128.py +++ b/configs/vae_magvit_v2/train/16x128x128.py @@ -30,7 +30,7 @@ model = dict( kl_embed_dim = 64, activation_fn = 'swish', separate_first_frame_encoding = False, - disable_space = True, + disable_space = False, custom_conv_padding = None ) diff --git a/configs/vae_magvit_v2/train/pipeline_16x128x128.py b/configs/vae_magvit_v2/train/pipeline_16x128x128.py index e86c2cc..722f5cb 100644 --- a/configs/vae_magvit_v2/train/pipeline_16x128x128.py +++ b/configs/vae_magvit_v2/train/pipeline_16x128x128.py @@ -30,6 +30,7 @@ model = dict( kl_embed_dim = 64, activation_fn = 'swish', separate_first_frame_encoding = False, + disable_space = True, custom_conv_padding = None ) diff --git a/opensora/models/vae/vae_3d_v2.py b/opensora/models/vae/vae_3d_v2.py index 0ff219b..05ee2a8 100644 --- a/opensora/models/vae/vae_3d_v2.py +++ b/opensora/models/vae/vae_3d_v2.py @@ -617,7 +617,7 @@ class Decoder(nn.Module): self.num_groups = num_groups # self.upsample = upsample - self.s_stride = 1 if self.disable_spatial_upsample else 2 # spatial stride + self.s_stride = 1 if disable_spatial_upsample else 2 # spatial stride self.custom_conv_padding = custom_conv_padding # self.norm_type = self.config.vqvae.norm_type # self.num_remat_block = self.config.vqvae.get('num_dec_remat_blocks', 0)