diff --git a/opensora/datasets/utils.py b/opensora/datasets/utils.py
index 9b0be4c..18ec6e3 100644
--- a/opensora/datasets/utils.py
+++ b/opensora/datasets/utils.py
@@ -145,7 +145,7 @@ def read_from_path(path, image_size, transform_name="center"):
         return read_image_from_path(path, image_size=image_size, transform_name=transform_name)
 
 
-def save_sample(x, fps=8, save_path=None, normalize=True, value_range=(-1, 1), force_video=False):
+def save_sample(x, fps=8, save_path=None, normalize=True, value_range=(-1, 1), force_video=False, verbose=True):
     """
     Args:
         x (Tensor): shape [C, T, H, W]
@@ -165,7 +165,8 @@ def save_sample(x, fps=8, save_path=None, normalize=True, value_range=(-1, 1), f
 
         x = x.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 3, 0).to("cpu", torch.uint8)
         write_video(save_path, x, fps=fps, video_codec="h264")
-    print(f"Saved to {save_path}")
+    if verbose:
+        print(f"Saved to {save_path}")
     return save_path
 
 
diff --git a/opensora/schedulers/dpms/__init__.py b/opensora/schedulers/dpms/__init__.py
index ed74427..df10477 100644
--- a/opensora/schedulers/dpms/__init__.py
+++ b/opensora/schedulers/dpms/__init__.py
@@ -22,6 +22,7 @@ class DPM_SOLVER:
         device,
         additional_args=None,
         mask=None,
+        progress=True,
     ):
         assert mask is None, "mask is not supported in dpm-solver"
         n = len(prompts)
@@ -38,7 +39,14 @@ class DPM_SOLVER:
             cfg_scale=self.cfg_scale,
             model_kwargs=model_args,
         )
-        samples = dpms.sample(z, steps=self.num_sampling_steps, order=2, skip_type="time_uniform", method="multistep")
+        samples = dpms.sample(
+            z,
+            steps=self.num_sampling_steps,
+            order=2,
+            skip_type="time_uniform",
+            method="multistep",
+            progress=progress,
+        )
         return samples
 
 
diff --git a/opensora/schedulers/dpms/dpm_solver.py b/opensora/schedulers/dpms/dpm_solver.py
index 106e59e..d422a0a 100644
--- a/opensora/schedulers/dpms/dpm_solver.py
+++ b/opensora/schedulers/dpms/dpm_solver.py
@@ -1255,6 +1255,7 @@ class DPM_Solver:
         atol=0.0078,
         rtol=0.05,
         return_intermediate=False,
+        progress=True,
     ):
         """
         Compute the sample at time `t_end` by DPM-Solver, given the initial `x` at time `t_start`.
@@ -1414,7 +1415,8 @@ class DPM_Solver:
                     t_prev_list.append(t)
                     model_prev_list.append(self.model_fn(x, t))
                 # Compute the remaining values by `order`-th order multistep DPM-Solver.
-                for step in tqdm(range(order, steps + 1)):
+                progress_fn = tqdm if progress else lambda x: x
+                for step in progress_fn(range(order, steps + 1)):
                     t = timesteps[step]
                     # We only use lower order for steps < 10
                     if lower_order_final and steps < 10:
diff --git a/opensora/schedulers/iddpm/__init__.py b/opensora/schedulers/iddpm/__init__.py
index 26a856d..7b65cad 100644
--- a/opensora/schedulers/iddpm/__init__.py
+++ b/opensora/schedulers/iddpm/__init__.py
@@ -61,6 +61,7 @@ class IDDPM(SpacedDiffusion):
         device,
         additional_args=None,
         mask=None,
+        progress=True,
     ):
         n = len(prompts)
         z = torch.cat([z, z], 0)
@@ -76,7 +77,7 @@ class IDDPM(SpacedDiffusion):
             z,
             clip_denoised=False,
             model_kwargs=model_args,
-            progress=True,
+            progress=progress,
             device=device,
             mask=mask,
         )
diff --git a/opensora/schedulers/rf/__init__.py b/opensora/schedulers/rf/__init__.py
index 3e71d15..e1c1e82 100644
--- a/opensora/schedulers/rf/__init__.py
+++ b/opensora/schedulers/rf/__init__.py
@@ -43,6 +43,7 @@ class RFLOW:
         additional_args=None,
         mask=None,
         guidance_scale=None,
+        progress=True,
     ):
         assert mask is None, "mask is not supported in rectified flow inference yet"
         # if no specific guidance scale is provided, use the default scale when initializing the scheduler
@@ -68,7 +69,8 @@ class RFLOW:
                 for t in timesteps
             ]
 
-        for i, t in tqdm(enumerate(timesteps)):
+        progress_wrap = tqdm if progress else (lambda x: x)
+        for i, t in progress_wrap(enumerate(timesteps)):
             z_in = torch.cat([z, z], 0)
             pred = model(z_in, torch.tensor([t] * z_in.shape[0], device=device), **model_args).chunk(2, dim=1)[0]
             pred_cond, pred_uncond = pred.chunk(2, dim=0)
diff --git a/opensora/utils/config_utils.py b/opensora/utils/config_utils.py
index 9e60a69..2ec2b71 100644
--- a/opensora/utils/config_utils.py
+++ b/opensora/utils/config_utils.py
@@ -37,6 +37,7 @@ def parse_args(training=False):
         parser.add_argument("--end-index", default=None, type=int, help="end index for sample name")
         parser.add_argument("--num-sample", default=None, type=int, help="number of samples to generate for one prompt")
         parser.add_argument("--prompt-as-path", action="store_true", help="use prompt as path to save samples")
+        parser.add_argument("--verbose", default=None, type=int, help="verbose level")
 
         # prompt
         parser.add_argument("--prompt-path", default=None, type=str, help="path to prompt txt file")
diff --git a/scripts/inference-long.py b/scripts/inference-long.py
index eec19cf..81b996d 100644
--- a/scripts/inference-long.py
+++ b/scripts/inference-long.py
@@ -217,7 +217,7 @@ def main():
     # ======================================================
     # 4. inference
     # ======================================================
-    sample_idx = 0
+    sample_idx = cfg.get("start_index", 0)
     if cfg.sample_name is not None:
         sample_name = cfg.sample_name
     elif cfg.prompt_as_path:
diff --git a/scripts/inference.py b/scripts/inference.py
index 7a83ead..5eb114c 100644
--- a/scripts/inference.py
+++ b/scripts/inference.py
@@ -5,6 +5,7 @@ import torch
 import torch.distributed as dist
 from colossalai.cluster import DistCoordinator
 from mmengine.runner import set_random_seed
+from tqdm import tqdm
 
 from opensora.acceleration.parallel_states import set_sequence_parallel_group
 from opensora.datasets import IMG_FPS, save_sample
@@ -19,6 +20,7 @@ def main():
     # 1. cfg and init distributed env
     # ======================================================
     cfg = parse_configs(training=False)
+    verbose = cfg.get("verbose", 2)
     print(cfg)
 
     # init distributed
@@ -99,7 +101,7 @@ def main():
     # ======================================================
     # 4. inference
     # ======================================================
-    sample_idx = 0
+    sample_idx = cfg.get("start_index", 0)
     if cfg.sample_name is not None:
         sample_name = cfg.sample_name
     elif cfg.prompt_as_path:
@@ -110,7 +112,8 @@ def main():
     os.makedirs(save_dir, exist_ok=True)
 
     # 4.1. batch generation
-    for i in range(0, len(prompts), cfg.batch_size):
+    progress_wrap = tqdm if verbose == 1 else (lambda x: x)
+    for i in progress_wrap(range(0, len(prompts), cfg.batch_size)):
         # 4.2 sample in hidden space
         batch_prompts_raw = prompts[i : i + cfg.batch_size]
         batch_prompts = [text_preprocessing(prompt) for prompt in batch_prompts_raw]
@@ -152,13 +155,15 @@ def main():
                 prompts=batch_prompts,
                 device=device,
                 additional_args=model_args,
+                progress=verbose >= 2,
             )
             samples = vae.decode(samples.to(dtype))
 
             # 4.4. save samples
             if not use_dist or coordinator.is_master():
                 for idx, sample in enumerate(samples):
-                    print(f"Prompt: {batch_prompts_raw[idx]}")
+                    if verbose >= 2:
+                        print(f"Prompt: {batch_prompts_raw[idx]}")
                     if cfg.prompt_as_path:
                         sample_name_suffix = batch_prompts_raw[idx]
                     else:
@@ -166,7 +171,12 @@ def main():
                     save_path = os.path.join(save_dir, f"{sample_name}{sample_name_suffix}")
                     if cfg.num_sample != 1:
                         save_path = f"{save_path}-{k}"
-                    save_sample(sample, fps=cfg.fps // cfg.frame_interval, save_path=save_path)
+                    save_sample(
+                        sample,
+                        fps=cfg.fps // cfg.frame_interval,
+                        save_path=save_path,
+                        verbose=verbose >= 2,
+                    )
                     sample_idx += 1
 
 
diff --git a/scripts/misc/generate.sh b/scripts/misc/generate.sh
new file mode 100644
index 0000000..6056d20
--- /dev/null
+++ b/scripts/misc/generate.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -x
+set -e
+
+TEXT_PATH=/home/data/sora_data/pixart-sigma-generated/text.txt
+OUTPUT_PATH=/home/data/sora_data/pixart-sigma-generated/raw
+CMD="python scripts/inference.py configs/pixart/inference/1x2048MS.py"
+LOG_BASE=logs/sample/generate
+NUM_PER_GPU=10000
+N_LAUNCH=6
+NUM_START=$(($N_LAUNCH * $NUM_PER_GPU * 8))
+
+CUDA_VISIBLE_DEVICES=0 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 0)) --end-index $(($NUM_START + $NUM_PER_GPU * 1)) --image-size 2048 2048 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_1.log 2>&1 &
+CUDA_VISIBLE_DEVICES=1 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 1)) --end-index $(($NUM_START + $NUM_PER_GPU * 2)) --image-size 1408 2816 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_2.log 2>&1 &
+CUDA_VISIBLE_DEVICES=2 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 2)) --end-index $(($NUM_START + $NUM_PER_GPU * 3)) --image-size 2816 1408 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_3.log 2>&1 &
+CUDA_VISIBLE_DEVICES=3 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 3)) --end-index $(($NUM_START + $NUM_PER_GPU * 4)) --image-size 1664 2304 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_4.log 2>&1 &
+CUDA_VISIBLE_DEVICES=4 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 4)) --end-index $(($NUM_START + $NUM_PER_GPU * 5)) --image-size 2304 1664 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_5.log 2>&1 &
+CUDA_VISIBLE_DEVICES=5 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 5)) --end-index $(($NUM_START + $NUM_PER_GPU * 6)) --image-size 1536 2560 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_6.log 2>&1 &
+CUDA_VISIBLE_DEVICES=6 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 6)) --end-index $(($NUM_START + $NUM_PER_GPU * 7)) --image-size 2560 1536 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_7.log 2>&1 &
+CUDA_VISIBLE_DEVICES=7 $CMD --prompt-path $TEXT_PATH --save-dir $OUTPUT_PATH --start-index $(($NUM_START + $NUM_PER_GPU * 7)) --end-index $(($NUM_START + $NUM_PER_GPU * 8)) --image-size 2048 2048 --verbose 1 --batch-size 2 >${LOG_BASE}_${N_LAUNCH}_8.log 2>&1 &
diff --git a/tools/architecture/net2net.py b/tools/architecture/net2net.py
index cb23b77..80d346a 100644
--- a/tools/architecture/net2net.py
+++ b/tools/architecture/net2net.py
@@ -7,7 +7,7 @@ Numpy modules for Net2Net
 Written by Kyunghyun Paeng
 
 """
-import numpy as np
+
 
 def net2net(teach_param, stu_param):
     # teach param with shape (a, b)
@@ -24,26 +24,29 @@ def net2net(teach_param, stu_param):
     assert len(teach_param_shape) == len(stu_param_shape), "teach_param and stu_param must have same dimension"
 
     if len(teach_param_shape) == 1:
-        stu_param[:teach_param_shape[0]] = teach_param
-
+        stu_param[: teach_param_shape[0]] = teach_param
     elif len(teach_param_shape) == 2:
-        stu_param[:teach_param_shape[0], :teach_param_shape[1]] = teach_param
+        stu_param[: teach_param_shape[0], : teach_param_shape[1]] = teach_param
+    else:
+        breakpoint()
 
     if stu_param.shape != stu_param_shape:
         stu_param = stu_param.reshape(stu_param_shape)
 
     return stu_param
 
-if __name__ == '__main__':
-    """ Net2Net Class Test """
 
-    from opensora.models.pixart import PixArt_Sigma_XL_2, PixArt_1B_2
+if __name__ == "__main__":
+    """Net2Net Class Test"""
+
     import torch
 
+    from opensora.models.pixart import PixArt_1B_2
+
     model = PixArt_1B_2(no_temporal_pos_emb=True, space_scale=4, enable_flashattn=True, enable_layernorm_kernel=True)
     print("load model done")
 
-    ckpt = torch.load('/home/zhouyukun/projs/opensora/pretrained_models/PixArt-Sigma-XL-2-2K-MS.pth')
+    ckpt = torch.load("/home/zhouyukun/projs/opensora/pretrained_models/PixArt-Sigma-XL-2-2K-MS.pth")
     print("load ckpt done")
 
     ckpt = ckpt["state_dict"]
diff --git a/train_pixart_1B.sh b/train_pixart_1B.sh
deleted file mode 100644
index d3d12ce..0000000
--- a/train_pixart_1B.sh
+++ /dev/null
@@ -1 +0,0 @@
-colossalai run --nproc_per_node 8 scripts/train.py configs/pixart/train/1x2048x2048.py --data-path /home/zhaowangbo/data/csv/image-v1_1_ext_noempty_rcp_clean_info.csv