From 970302fb140be5958e48b90598633355c9d8552b Mon Sep 17 00:00:00 2001 From: Shen Chenhui Date: Fri, 10 May 2024 14:24:53 +0800 Subject: [PATCH 1/3] Tmps/eval v1.2 (#97) * formate * save progress * format * save progress --------- Co-authored-by: Shen-Chenhui --- configs/opensora-v1-2/inference/sample.py | 2 +- eval/launch.sh | 23 ++++---- eval/launch_per_gpu.sh | 40 +++++++++++++ eval/sample.sh | 72 ++++++++++++----------- eval/test.sh | 14 +++++ 5 files changed, 107 insertions(+), 44 deletions(-) create mode 100644 eval/launch_per_gpu.sh create mode 100644 eval/test.sh diff --git a/configs/opensora-v1-2/inference/sample.py b/configs/opensora-v1-2/inference/sample.py index 9b6afac..0f9eb4d 100644 --- a/configs/opensora-v1-2/inference/sample.py +++ b/configs/opensora-v1-2/inference/sample.py @@ -1,5 +1,5 @@ image_size = (240, 426) -num_frames = 34 +num_frames = 51 fps = 24 frame_interval = 1 diff --git a/eval/launch.sh b/eval/launch.sh index 1ceffab..6c55f7b 100644 --- a/eval/launch.sh +++ b/eval/launch.sh @@ -4,26 +4,29 @@ set -x set -e CKPT=$1 +NUM_FRAMES=$2 +MODEL_NAME=$3 + if [[ $CKPT == *"ema"* ]]; then parentdir=$(dirname $CKPT) CKPT_BASE=$(basename $parentdir)_ema else CKPT_BASE=$(basename $CKPT) fi -LOG_BASE=logs/sample/$CKPT_BASE +LOG_BASE=logs/sample/${MODEL_NAME}_${CKPT_BASE} echo "Logging to $LOG_BASE" # == sample & human evaluation == -# CUDA_VISIBLE_DEVICES=0 bash eval/sample.sh $CKPT -1 >${LOG_BASE}_1.log 2>&1 & -# CUDA_VISIBLE_DEVICES=1 bash eval/sample.sh $CKPT -2a >${LOG_BASE}_2a.log 2>&1 & -# CUDA_VISIBLE_DEVICES=2 bash eval/sample.sh $CKPT -2b >${LOG_BASE}_2b.log 2>&1 & -# CUDA_VISIBLE_DEVICES=3 bash eval/sample.sh $CKPT -2c >${LOG_BASE}_2c.log 2>&1 & -# CUDA_VISIBLE_DEVICES=4 bash eval/sample.sh $CKPT -2d >${LOG_BASE}_2d.log 2>&1 & -# CUDA_VISIBLE_DEVICES=5 bash eval/sample.sh $CKPT -2e >${LOG_BASE}_2e.log 2>&1 & -# CUDA_VISIBLE_DEVICES=6 bash eval/sample.sh $CKPT -2f >${LOG_BASE}_2f.log 2>&1 & -# CUDA_VISIBLE_DEVICES=7 bash eval/sample.sh $CKPT -2g >${LOG_BASE}_2g.log 2>&1 & +CUDA_VISIBLE_DEVICES=0 bash eval/sample.sh $CKPT 1 $MODEL_NAME -1 >${LOG_BASE}_1.log 2>&1 & +CUDA_VISIBLE_DEVICES=1 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2a >${LOG_BASE}_2a.log 2>&1 & +CUDA_VISIBLE_DEVICES=2 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2b >${LOG_BASE}_2b.log 2>&1 & +CUDA_VISIBLE_DEVICES=3 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2c >${LOG_BASE}_2c.log 2>&1 & +CUDA_VISIBLE_DEVICES=4 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2d >${LOG_BASE}_2d.log 2>&1 & +CUDA_VISIBLE_DEVICES=5 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2e >${LOG_BASE}_2e.log 2>&1 & +CUDA_VISIBLE_DEVICES=6 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2f >${LOG_BASE}_2f.log 2>&1 & +CUDA_VISIBLE_DEVICES=7 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2g >${LOG_BASE}_2g.log 2>&1 & -# CUDA_VISIBLE_DEVICES=0 bash eval/sample.sh $CKPT -2h >${LOG_BASE}_2h.log 2>&1 & +# CUDA_VISIBLE_DEVICES=7 bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2h >${LOG_BASE}_2h.log 2>&1 & # == vbench == # CUDA_VISIBLE_DEVICES=0 bash eval/sample.sh $CKPT -4a >${LOG_BASE}_4a.log 2>&1 & diff --git a/eval/launch_per_gpu.sh b/eval/launch_per_gpu.sh new file mode 100644 index 0000000..2d65522 --- /dev/null +++ b/eval/launch_per_gpu.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -x +set -e + +CKPT=$1 +CUDA_NUM=$2 +NUM_FRAMES=$3 +MODEL_NAME=$4 + + +if [[ $CKPT == *"ema"* ]]; then + parentdir=$(dirname $CKPT) + CKPT_BASE=$(basename $parentdir)_ema +else + CKPT_BASE=$(basename $CKPT) +fi + +LOG_BASE=logs/sample/${MODEL_NAME}_${CKPT_BASE} +echo "Logging to $LOG_BASE" + +# == sample & human evaluation == +echo "running image task" +CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT 1 $MODEL_NAME -1 >${LOG_BASE}_1.log 2>&1 +echo "running task 2a" +CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2a >${LOG_BASE}_2a.log 2>&1 +# echo "running task 2b" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2b >${LOG_BASE}_2b.log 2>&1 +# echo "running task 2c" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2c >${LOG_BASE}_2c.log 2>&1 +# echo "running task 2d" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2d >${LOG_BASE}_2d.log 2>&1 +# echo "running task 2e" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2e >${LOG_BASE}_2e.log 2>&1 +# echo "running task 2f" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2f >${LOG_BASE}_2f.log 2>&1 +# echo "running task 2g" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2g >${LOG_BASE}_2g.log 2>&1 +# echo "running task 2h" +# CUDA_VISIBLE_DEVICES=$CUDA_NUM bash eval/sample.sh $CKPT $NUM_FRAMES $MODEL_NAME -2h >${LOG_BASE}_2h.log 2>&1 diff --git a/eval/sample.sh b/eval/sample.sh index e32d6d3..dc29e4b 100644 --- a/eval/sample.sh +++ b/eval/sample.sh @@ -4,33 +4,39 @@ set -e CKPT=$1 +NUM_FRAMES=$2 +MODEL_NAME=$3 + +let DOUBLE_FRAMES=$2*2 +let QUAD_FRAMES=$2*4 +let OCT_FRAMES=$2*8 CMD="python scripts/inference.py configs/opensora-v1-2/inference/sample.py" -CMD_REF="python scripts/inference-long.py configs/opensora-v1-1/inference/sample.py" +CMD_REF="python scripts/inference-long.py configs/opensora-v1-2/inference/sample.py" if [[ $CKPT == *"ema"* ]]; then parentdir=$(dirname $CKPT) CKPT_BASE=$(basename $parentdir)_ema else CKPT_BASE=$(basename $CKPT) fi -OUTPUT="./samples/samples_${CKPT_BASE}" +OUTPUT="./samples/samples_${MODEL_NAME}_${CKPT_BASE}" start=$(date +%s) -DEFAULT_BS=8 +DEFAULT_BS=1 ### Functions function run_image() { # 10min - # 1.1 1024x1024 + # # 1.1 1024x1024 eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2i_samples.txt --save-dir $OUTPUT --num-frames 1 --image-size 1024 1024 --sample-name 1024x1024 --batch-size $DEFAULT_BS # 1.2 240x426 eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2i_samples.txt --save-dir $OUTPUT --num-frames 1 --image-size 240 426 --sample-name 240x426 --end-index 3 --batch-size $DEFAULT_BS # 1.3 512x512 - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2i_samples.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name 512x512 --end-index 3 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name 512x512 --end-index 3 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name 512x512 --end-index 3 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name 512x512 --end-index 3 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2i_samples.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name t2i_512x512 --end-index 3 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name t2v_512x512 --end-index 3 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name short_512x512 --end-index 3 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 1 --image-size 512 512 --sample-name sora_512x512 --end-index 3 --batch-size $DEFAULT_BS # 1.4 720p multi-resolution # 1:1 @@ -52,80 +58,80 @@ function run_image() { # 10min function run_video_a() { # 30min, sample & multi-resolution # sample - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 16 --image-size 144 256 --sample-name sample_16x144x256 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 16 --image-size 240 426 --sample-name sample_16x240x426 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 32 --image-size 240 426 --sample-name sample_32x240x426 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 64 --image-size 240 426 --sample-name sample_64x240x426 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 16 --image-size 480 854 --sample-name sample_16x480x854 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 32 --image-size 480 854 --sample-name sample_32x480x854 --batch-size $DEFAULT_BS - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames 16 --image-size 720 1280 --sample-name sample_16x720x1280 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 144 256 --sample-name sample_${NUM_FRAMES}x144x256 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 240 426 --sample-name sample_${NUM_FRAMES}x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $DOUBLE_FRAMES --image-size 240 426 --sample-name sample_${DOUBLE_FRAMES}x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $QUAD_FRAMES --image-size 240 426 --sample-name sample_${QUAD_FRAMES}x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 480 854 --sample-name sample_${NUM_FRAMES}x480x854 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $DOUBLE_FRAMES --image-size 480 854 --sample-name sample_${DOUBLE_FRAMES}x480x854 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_samples.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 720 1280 --sample-name sample_${NUM_FRAMES}x720x1280 --batch-size $DEFAULT_BS } function run_video_b() { # 30min, short 16x240p & 64x240p # 32x240p, short - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames 32 --image-size 240 426 --sample-name short_32x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames $DOUBLE_FRAMES --image-size 240 426 --sample-name short_${DOUBLE_FRAMES}x240x426 --batch-size $DEFAULT_BS # 64x240p, short - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames 64 --image-size 240 426 --sample-name short_64x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames $QUAD_FRAMES --image-size 240 426 --sample-name short_${QUAD_FRAMES}x240x426 --batch-size $DEFAULT_BS } function run_video_c() { # 30min, sora 16x240p & short 128x240p # 16x240p, sora - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 16 --image-size 426 240 --sample-name sora_16x426x240 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 426 240 --sample-name sora_${NUM_FRAMES}x426x240 --batch-size $DEFAULT_BS # 16x240p, sora - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 16 --image-size 240 426 --sample-name sora_16x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 240 426 --sample-name sora_${NUM_FRAMES}x240x426 --batch-size $DEFAULT_BS # 128x240p, sora - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 128 --image-size 240 426 --sample-name sora_128x240x426 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames $OCT_FRAMES --image-size 240 426 --sample-name sora_${OCT_FRAMES}x240x426 --batch-size $DEFAULT_BS } function run_video_d() { # 30min, sora 32x480p # 32x480p, short - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames 32 --image-size 480 854 --sample-name short_32x480x854 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_short.txt --save-dir $OUTPUT --num-frames $DOUBLE_FRAMES --image-size 480 854 --sample-name short_${DOUBLE_FRAMES}x480x854 --batch-size $DEFAULT_BS } function run_video_e() { # 30min # 64x480p, sora - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 64 --image-size 480 854 --sample-name sora_64x480x854 --batch-size 4 + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames $QUAD_FRAMES --image-size 480 854 --sample-name sora_${QUAD_FRAMES}x480x854 --batch-size $DEFAULT_BS } function run_video_f() { # 30min # 16x720p, sora - eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames 16 --image-size 720 1280 --sample-name sora_16x720x1280 --batch-size $DEFAULT_BS + eval $CMD --ckpt-path $CKPT --prompt-path assets/texts/t2v_sora.txt --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 720 1280 --sample-name sora_${NUM_FRAMES}x720x1280 --batch-size $DEFAULT_BS } function run_video_g() { # 16x720p multi-resolution # 1:1 PROMPT="A soaring drone footage captures the majestic beauty of a coastal cliff, its red and yellow stratified rock faces rich in color and against the vibrant turquoise of the sea. Seabirds can be seen taking flight around the cliff's precipices. As the drone slowly moves from different angles, the changing sunlight casts shifting shadows that highlight the rugged textures of the cliff and the surrounding calm sea. The water gently laps at the rock base and the greenery that clings to the top of the cliff, and the scene gives a sense of peaceful isolation at the fringes of the ocean. The video captures the essence of pristine natural beauty untouched by human structures." - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 960 960 --sample-name 720p_1_1 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 960 960 --sample-name 720p_1_1 # 16:9 - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 720 1280 --sample-name 720p_16_9 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 720 1280 --sample-name 720p_16_9 # 9:16 - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 1280 720 --sample-name 720p_9_16 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 1280 720 --sample-name 720p_9_16 # 4:3 - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 832 1108 --sample-name 720p_4_3 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 832 1108 --sample-name 720p_4_3 # 3:4 - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 1108 832 --sample-name 720p_3_4 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 1108 832 --sample-name 720p_3_4 # 1:2 - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 1358 600 --sample-name 720p_1_2 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 1358 600 --sample-name 720p_1_2 # 2:1 - eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames 16 --image-size 600 1358 --sample-name 720p_2_1 + eval $CMD --ckpt-path $CKPT --prompt \"$PROMPT\" --save-dir $OUTPUT --num-frames $NUM_FRAMES --image-size 600 1358 --sample-name 720p_2_1 } function run_video_h() { # 23min # 3.1 image-conditioned long video generation eval $CMD_REF --ckpt-path $CKPT --save-dir $OUTPUT --sample-name ref_L10C4_16x240x426 \ --prompt-path assets/texts/t2v_ref.txt --start-index 0 --end-index 3 \ - --num-frames 16 --image-size 240 426 \ + --num-frames $NUM_FRAMES --image-size 240 426 \ --loop 5 --condition-frame-length 4 \ --reference-path assets/images/condition/cliff.png assets/images/condition/wave.png assets/images/condition/ship.png \ --mask-strategy "0" "0" "0" --batch-size $DEFAULT_BS eval $CMD_REF --ckpt-path $CKPT --save-dir $OUTPUT --sample-name ref_L10C4_64x240x426 \ --prompt-path assets/texts/t2v_ref.txt --start-index 0 --end-index 3 \ - --num-frames 64 --image-size 240 426 \ + --num-frames $NUM_FRAMES --image-size 240 426 \ --loop 5 --condition-frame-length 16 \ --reference-path assets/images/condition/cliff.png assets/images/condition/wave.png assets/images/condition/ship.png \ --mask-strategy "0" "0" "0" --batch-size $DEFAULT_BS @@ -133,7 +139,7 @@ function run_video_h() { # 23min # 3.2 eval $CMD_REF --ckpt-path $CKPT --save-dir $OUTPUT --sample-name ref_L1_128x240x426 \ --prompt-path assets/texts/t2v_ref.txt --start-index 3 --end-index 6 \ - --num-frames 128 --image-size 240 426 \ + --num-frames $NUM_FRAMES --image-size 240 426 \ --loop 1 \ --reference-path assets/images/condition/cliff.png "assets/images/condition/cactus-sad.png\;assets/images/condition/cactus-happy.png" https://cdn.openai.com/tmp/s/interp/d0.mp4 \ --mask-strategy "0\;0,0,0,-1,1" "0\;0,1,0,-1,1" "0,0,0,0,64,0.5" --batch-size $DEFAULT_BS diff --git a/eval/test.sh b/eval/test.sh new file mode 100644 index 0000000..77e5623 --- /dev/null +++ b/eval/test.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# set -x +set -e + +CKPT=$1 +NUM_FRAMES=$2 +MODEL_NAME=$3 + +let DOUBLE_FRAMES=$2*2 +let TRIPLE_FRAMES=$2*3 + +echo $DOUBLE_FRAMES +echo $TRIPLE_FRAMES From a6a0325b470ed6fb527693fbd478177bf5c8655d Mon Sep 17 00:00:00 2001 From: "Zheng Zangwei (Alex Zheng)" Date: Fri, 10 May 2024 14:36:31 +0800 Subject: [PATCH 2/3] change flashattn to flash_attn (#387) --- README.md | 2 +- configs/dit/train/16x256x256.py | 2 +- configs/dit/train/1x256x256.py | 2 +- configs/latte/train/16x256x256.py | 2 +- configs/opensora-v1-1/train/benchmark.py | 2 +- configs/opensora-v1-1/train/image.py | 2 +- configs/opensora-v1-1/train/stage1.py | 2 +- configs/opensora-v1-1/train/stage2.py | 2 +- configs/opensora-v1-1/train/stage3.py | 2 +- configs/opensora-v1-1/train/video.py | 2 +- configs/opensora/inference/16x256x256.py | 2 +- configs/opensora/inference/16x512x512.py | 2 +- configs/opensora/inference/64x512x512.py | 2 +- configs/opensora/train/16x256x256-mask.py | 2 +- configs/opensora/train/16x256x256-spee.py | 2 +- configs/opensora/train/16x256x256.py | 2 +- configs/opensora/train/16x512x512.py | 2 +- configs/opensora/train/360x512x512.py | 2 +- configs/opensora/train/64x512x512-sp.py | 2 +- configs/opensora/train/64x512x512.py | 2 +- configs/pixart/train/16x256x256.py | 2 +- configs/pixart/train/1x512x512.py | 2 +- configs/pixart/train/64x512x512.py | 2 +- docs/config.md | 6 +++--- docs/zh_CN/structure.md | 4 ++-- opensora/models/dit/dit.py | 14 +++++++------- opensora/models/pixart/pixart.py | 12 ++++++------ opensora/models/stdit/stdit.py | 14 +++++++------- tests/test_attn.py | 4 ++-- tests/test_seq_parallel_attention.py | 4 ++-- 30 files changed, 52 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 1a16f45..fa97f09 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ conda activate opensora pip install torch torchvision # install flash attention (optional) -# set enable_flashattn=False in config to avoid using flash attention +# set enable_flash_attn=False in config to avoid using flash attention pip install packaging ninja pip install flash-attn --no-build-isolation diff --git a/configs/dit/train/16x256x256.py b/configs/dit/train/16x256x256.py index 42845de..c36e06b 100644 --- a/configs/dit/train/16x256x256.py +++ b/configs/dit/train/16x256x256.py @@ -18,7 +18,7 @@ sp_size = 1 model = dict( type="DiT-XL/2", from_pretrained="DiT-XL-2-256x256.pt", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/dit/train/1x256x256.py b/configs/dit/train/1x256x256.py index c423b24..aa5d478 100644 --- a/configs/dit/train/1x256x256.py +++ b/configs/dit/train/1x256x256.py @@ -19,7 +19,7 @@ sp_size = 1 model = dict( type="DiT-XL/2", no_temporal_pos_emb=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/latte/train/16x256x256.py b/configs/latte/train/16x256x256.py index 41573a0..e087f8a 100644 --- a/configs/latte/train/16x256x256.py +++ b/configs/latte/train/16x256x256.py @@ -17,7 +17,7 @@ sp_size = 1 # Define model model = dict( type="Latte-XL/2", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora-v1-1/train/benchmark.py b/configs/opensora-v1-1/train/benchmark.py index 5310b43..dff6ef1 100644 --- a/configs/opensora-v1-1/train/benchmark.py +++ b/configs/opensora-v1-1/train/benchmark.py @@ -65,7 +65,7 @@ model = dict( from_pretrained=None, input_sq_size=512, # pretrained model is trained on 512x512 qk_norm=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora-v1-1/train/image.py b/configs/opensora-v1-1/train/image.py index 45748b7..ed25319 100644 --- a/configs/opensora-v1-1/train/image.py +++ b/configs/opensora-v1-1/train/image.py @@ -29,7 +29,7 @@ model = dict( from_pretrained=None, input_sq_size=512, # pretrained model is trained on 512x512 qk_norm=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora-v1-1/train/stage1.py b/configs/opensora-v1-1/train/stage1.py index 944b565..d44429e 100644 --- a/configs/opensora-v1-1/train/stage1.py +++ b/configs/opensora-v1-1/train/stage1.py @@ -41,7 +41,7 @@ model = dict( from_pretrained=None, input_sq_size=512, # pretrained model is trained on 512x512 qk_norm=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora-v1-1/train/stage2.py b/configs/opensora-v1-1/train/stage2.py index fb7e6d5..687a6c7 100644 --- a/configs/opensora-v1-1/train/stage2.py +++ b/configs/opensora-v1-1/train/stage2.py @@ -43,7 +43,7 @@ model = dict( from_pretrained=None, input_sq_size=512, # pretrained model is trained on 512x512 qk_norm=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora-v1-1/train/stage3.py b/configs/opensora-v1-1/train/stage3.py index 8485762..624ea9e 100644 --- a/configs/opensora-v1-1/train/stage3.py +++ b/configs/opensora-v1-1/train/stage3.py @@ -43,7 +43,7 @@ model = dict( from_pretrained=None, input_sq_size=512, # pretrained model is trained on 512x512 qk_norm=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora-v1-1/train/video.py b/configs/opensora-v1-1/train/video.py index ef574f2..1471664 100644 --- a/configs/opensora-v1-1/train/video.py +++ b/configs/opensora-v1-1/train/video.py @@ -31,7 +31,7 @@ model = dict( from_pretrained=None, input_sq_size=512, # pretrained model is trained on 512x512 qk_norm=True, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora/inference/16x256x256.py b/configs/opensora/inference/16x256x256.py index 50ead83..4053e12 100644 --- a/configs/opensora/inference/16x256x256.py +++ b/configs/opensora/inference/16x256x256.py @@ -7,7 +7,7 @@ model = dict( type="STDiT-XL/2", space_scale=0.5, time_scale=1.0, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, from_pretrained="PRETRAINED_MODEL", ) diff --git a/configs/opensora/inference/16x512x512.py b/configs/opensora/inference/16x512x512.py index 58d8243..478cb5b 100644 --- a/configs/opensora/inference/16x512x512.py +++ b/configs/opensora/inference/16x512x512.py @@ -7,7 +7,7 @@ model = dict( type="STDiT-XL/2", space_scale=1.0, time_scale=1.0, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, from_pretrained="PRETRAINED_MODEL", ) diff --git a/configs/opensora/inference/64x512x512.py b/configs/opensora/inference/64x512x512.py index dbbe240..03cce23 100644 --- a/configs/opensora/inference/64x512x512.py +++ b/configs/opensora/inference/64x512x512.py @@ -7,7 +7,7 @@ model = dict( type="STDiT-XL/2", space_scale=1.0, time_scale=2 / 3, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, from_pretrained="PRETRAINED_MODEL", ) diff --git a/configs/opensora/train/16x256x256-mask.py b/configs/opensora/train/16x256x256-mask.py index 0e478e5..12b4b65 100644 --- a/configs/opensora/train/16x256x256-mask.py +++ b/configs/opensora/train/16x256x256-mask.py @@ -20,7 +20,7 @@ model = dict( space_scale=0.5, time_scale=1.0, from_pretrained="PixArt-XL-2-512x512.pth", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) mask_ratios = { diff --git a/configs/opensora/train/16x256x256-spee.py b/configs/opensora/train/16x256x256-spee.py index b46c749..3af468b 100644 --- a/configs/opensora/train/16x256x256-spee.py +++ b/configs/opensora/train/16x256x256-spee.py @@ -20,7 +20,7 @@ model = dict( space_scale=0.5, time_scale=1.0, from_pretrained="PixArt-XL-2-512x512.pth", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) mask_ratios = { diff --git a/configs/opensora/train/16x256x256.py b/configs/opensora/train/16x256x256.py index 144adee..21ebb26 100644 --- a/configs/opensora/train/16x256x256.py +++ b/configs/opensora/train/16x256x256.py @@ -20,7 +20,7 @@ model = dict( space_scale=0.5, time_scale=1.0, from_pretrained="PixArt-XL-2-512x512.pth", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora/train/16x512x512.py b/configs/opensora/train/16x512x512.py index a2a87b1..c566fd1 100644 --- a/configs/opensora/train/16x512x512.py +++ b/configs/opensora/train/16x512x512.py @@ -20,7 +20,7 @@ model = dict( space_scale=1.0, time_scale=1.0, from_pretrained=None, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/opensora/train/360x512x512.py b/configs/opensora/train/360x512x512.py index 7cb4f56..62bfd14 100644 --- a/configs/opensora/train/360x512x512.py +++ b/configs/opensora/train/360x512x512.py @@ -26,7 +26,7 @@ model = dict( space_scale=1.0, time_scale=2 / 3, from_pretrained=None, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, enable_sequence_parallelism=True, # enable sq here ) diff --git a/configs/opensora/train/64x512x512-sp.py b/configs/opensora/train/64x512x512-sp.py index 880adb2..fd34a2a 100644 --- a/configs/opensora/train/64x512x512-sp.py +++ b/configs/opensora/train/64x512x512-sp.py @@ -20,7 +20,7 @@ model = dict( space_scale=1.0, time_scale=2 / 3, from_pretrained=None, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, enable_sequence_parallelism=True, # enable sq here ) diff --git a/configs/opensora/train/64x512x512.py b/configs/opensora/train/64x512x512.py index b65a419..e07f8c1 100644 --- a/configs/opensora/train/64x512x512.py +++ b/configs/opensora/train/64x512x512.py @@ -20,7 +20,7 @@ model = dict( space_scale=1.0, time_scale=2 / 3, from_pretrained=None, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/pixart/train/16x256x256.py b/configs/pixart/train/16x256x256.py index 701b9da..66285bf 100644 --- a/configs/pixart/train/16x256x256.py +++ b/configs/pixart/train/16x256x256.py @@ -20,7 +20,7 @@ model = dict( space_scale=0.5, time_scale=1.0, from_pretrained="PixArt-XL-2-512x512.pth", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/pixart/train/1x512x512.py b/configs/pixart/train/1x512x512.py index 8b2b5d4..0e7a1f7 100644 --- a/configs/pixart/train/1x512x512.py +++ b/configs/pixart/train/1x512x512.py @@ -21,7 +21,7 @@ model = dict( time_scale=1.0, no_temporal_pos_emb=True, from_pretrained="PixArt-XL-2-512x512.pth", - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/configs/pixart/train/64x512x512.py b/configs/pixart/train/64x512x512.py index 3448ce7..dd59d11 100644 --- a/configs/pixart/train/64x512x512.py +++ b/configs/pixart/train/64x512x512.py @@ -21,7 +21,7 @@ model = dict( space_scale=1.0, time_scale=2 / 3, from_pretrained=None, - enable_flashattn=True, + enable_flash_attn=True, enable_layernorm_kernel=True, ) vae = dict( diff --git a/docs/config.md b/docs/config.md index 5cecc73..3ecf726 100644 --- a/docs/config.md +++ b/docs/config.md @@ -26,8 +26,8 @@ model = dict( from_pretrained="PRETRAINED_MODEL", # (Optional) Load from pretrained model input_sq_size=512, # Base spatial position embedding size qk_norm=True, # Normalize query and key in attention - enable_flashattn=True, # (Optional) Speed up training and inference with flash attention - # Turn enable_flashattn to False if you skip flashattn installation + enable_flash_attn=True, # (Optional) Speed up training and inference with flash attention + # Turn enable_flash_attn to False if you skip flashattn installation enable_layernorm_kernel=True, # (Optional) Speed up training and inference with fused kernel # Turn enable_layernorm_kernel to False if you skip apex installation ) @@ -177,7 +177,7 @@ model = dict( from_pretrained=None, # Load from pretrained model input_sq_size=512, # Base spatial position embedding size qk_norm=True, # Normalize query and key in attention - enable_flashattn=True, # (Optional) Speed up training and inference with flash attention + enable_flash_attn=True, # (Optional) Speed up training and inference with flash attention enable_layernorm_kernel=True, # (Optional) Speed up training and inference with fused kernel ) vae = dict( diff --git a/docs/zh_CN/structure.md b/docs/zh_CN/structure.md index 6e25d84..1e3460e 100644 --- a/docs/zh_CN/structure.md +++ b/docs/zh_CN/structure.md @@ -89,7 +89,7 @@ model = dict( type="STDiT-XL/2", # Select model type (STDiT-XL/2, DiT-XL/2, etc.) space_scale=1.0, # (Optional) Space positional encoding scale (new height / old height) time_scale=2 / 3, # (Optional) Time positional encoding scale (new frame_interval / old frame_interval) - enable_flashattn=True, # (Optional) Speed up training and inference with flash attention + enable_flash_attn=True, # (Optional) Speed up training and inference with flash attention enable_layernorm_kernel=True, # (Optional) Speed up training and inference with fused kernel from_pretrained="PRETRAINED_MODEL", # (Optional) Load from pretrained model no_temporal_pos_emb=True, # (Optional) Disable temporal positional encoding (for image) @@ -144,7 +144,7 @@ model = dict( space_scale=1.0, time_scale=2 / 3, from_pretrained="YOUR_PRETRAINED_MODEL", - enable_flashattn=True, # Enable flash attention + enable_flash_attn=True, # Enable flash attention enable_layernorm_kernel=True, # Enable layernorm kernel ) vae = dict( diff --git a/opensora/models/dit/dit.py b/opensora/models/dit/dit.py index f264f8e..68a3eb7 100644 --- a/opensora/models/dit/dit.py +++ b/opensora/models/dit/dit.py @@ -44,13 +44,13 @@ class DiTBlock(nn.Module): hidden_size, num_heads, mlp_ratio=4.0, - enable_flashattn=False, + enable_flash_attn=False, enable_layernorm_kernel=False, ): super().__init__() self.hidden_size = hidden_size self.num_heads = num_heads - self.enable_flashattn = enable_flashattn + self.enable_flash_attn = enable_flash_attn mlp_hidden_dim = int(hidden_size * mlp_ratio) self.norm1 = get_layernorm(hidden_size, eps=1e-6, affine=False, use_kernel=enable_layernorm_kernel) @@ -58,7 +58,7 @@ class DiTBlock(nn.Module): hidden_size, num_heads=num_heads, qkv_bias=True, - enable_flashattn=enable_flashattn, + enable_flash_attn=enable_flash_attn, ) self.norm2 = get_layernorm(hidden_size, eps=1e-6, affine=False, use_kernel=enable_layernorm_kernel) self.mlp = Mlp(in_features=hidden_size, hidden_features=mlp_hidden_dim, act_layer=approx_gelu, drop=0) @@ -93,7 +93,7 @@ class DiT(nn.Module): caption_channels=512, model_max_length=77, dtype=torch.float32, - enable_flashattn=False, + enable_flash_attn=False, enable_layernorm_kernel=False, enable_sequence_parallelism=False, ): @@ -111,7 +111,7 @@ class DiT(nn.Module): self.num_heads = num_heads self.dtype = dtype self.use_text_encoder = not condition.startswith("label") - if enable_flashattn: + if enable_flash_attn: assert dtype in [ torch.float16, torch.bfloat16, @@ -143,7 +143,7 @@ class DiT(nn.Module): hidden_size, num_heads, mlp_ratio=mlp_ratio, - enable_flashattn=enable_flashattn, + enable_flash_attn=enable_flash_attn, enable_layernorm_kernel=enable_layernorm_kernel, ) for _ in range(depth) @@ -152,7 +152,7 @@ class DiT(nn.Module): self.final_layer = FinalLayer(hidden_size, np.prod(self.patch_size), self.out_channels) self.initialize_weights() - self.enable_flashattn = enable_flashattn + self.enable_flash_attn = enable_flash_attn self.enable_layernorm_kernel = enable_layernorm_kernel def get_spatial_pos_embed(self): diff --git a/opensora/models/pixart/pixart.py b/opensora/models/pixart/pixart.py index 421f836..376bd5b 100644 --- a/opensora/models/pixart/pixart.py +++ b/opensora/models/pixart/pixart.py @@ -61,13 +61,13 @@ class PixArtBlock(nn.Module): num_heads, mlp_ratio=4.0, drop_path=0.0, - enable_flashattn=False, + enable_flash_attn=False, enable_layernorm_kernel=False, enable_sequence_parallelism=False, ): super().__init__() self.hidden_size = hidden_size - self.enable_flashattn = enable_flashattn + self.enable_flash_attn = enable_flash_attn self._enable_sequence_parallelism = enable_sequence_parallelism if enable_sequence_parallelism: @@ -82,7 +82,7 @@ class PixArtBlock(nn.Module): hidden_size, num_heads=num_heads, qkv_bias=True, - enable_flashattn=enable_flashattn, + enable_flash_attn=enable_flash_attn, ) self.cross_attn = self.mha_cls(hidden_size, num_heads) self.norm2 = get_layernorm(hidden_size, eps=1e-6, affine=False, use_kernel=enable_layernorm_kernel) @@ -130,7 +130,7 @@ class PixArt(nn.Module): freeze=None, space_scale=1.0, time_scale=1.0, - enable_flashattn=False, + enable_flash_attn=False, enable_layernorm_kernel=False, enable_sequence_parallelism=False, ): @@ -152,7 +152,7 @@ class PixArt(nn.Module): self.no_temporal_pos_emb = no_temporal_pos_emb self.depth = depth self.mlp_ratio = mlp_ratio - self.enable_flashattn = enable_flashattn + self.enable_flash_attn = enable_flash_attn self.enable_layernorm_kernel = enable_layernorm_kernel self.space_scale = space_scale self.time_scale = time_scale @@ -179,7 +179,7 @@ class PixArt(nn.Module): num_heads, mlp_ratio=mlp_ratio, drop_path=drop_path[i], - enable_flashattn=enable_flashattn, + enable_flash_attn=enable_flash_attn, enable_layernorm_kernel=enable_layernorm_kernel, ) for i in range(depth) diff --git a/opensora/models/stdit/stdit.py b/opensora/models/stdit/stdit.py index 6e16058..6882572 100644 --- a/opensora/models/stdit/stdit.py +++ b/opensora/models/stdit/stdit.py @@ -37,13 +37,13 @@ class STDiTBlock(nn.Module): d_t=None, mlp_ratio=4.0, drop_path=0.0, - enable_flashattn=False, + enable_flash_attn=False, enable_layernorm_kernel=False, enable_sequence_parallelism=False, ): super().__init__() self.hidden_size = hidden_size - self.enable_flashattn = enable_flashattn + self.enable_flash_attn = enable_flash_attn self._enable_sequence_parallelism = enable_sequence_parallelism if enable_sequence_parallelism: @@ -58,7 +58,7 @@ class STDiTBlock(nn.Module): hidden_size, num_heads=num_heads, qkv_bias=True, - enable_flashattn=enable_flashattn, + enable_flash_attn=enable_flash_attn, ) self.cross_attn = self.mha_cls(hidden_size, num_heads) self.norm2 = get_layernorm(hidden_size, eps=1e-6, affine=False, use_kernel=enable_layernorm_kernel) @@ -82,7 +82,7 @@ class STDiTBlock(nn.Module): hidden_size, num_heads=num_heads, qkv_bias=True, - enable_flashattn=self.enable_flashattn, + enable_flash_attn=self.enable_flash_attn, ) def t_mask_select(self, x, masked_x, x_mask): @@ -174,7 +174,7 @@ class STDiT(nn.Module): space_scale=1.0, time_scale=1.0, freeze=None, - enable_flashattn=False, + enable_flash_attn=False, enable_layernorm_kernel=False, enable_sequence_parallelism=False, ): @@ -194,7 +194,7 @@ class STDiT(nn.Module): self.no_temporal_pos_emb = no_temporal_pos_emb self.depth = depth self.mlp_ratio = mlp_ratio - self.enable_flashattn = enable_flashattn + self.enable_flash_attn = enable_flash_attn self.enable_layernorm_kernel = enable_layernorm_kernel self.space_scale = space_scale self.time_scale = time_scale @@ -221,7 +221,7 @@ class STDiT(nn.Module): self.num_heads, mlp_ratio=self.mlp_ratio, drop_path=drop_path[i], - enable_flashattn=self.enable_flashattn, + enable_flash_attn=self.enable_flash_attn, enable_layernorm_kernel=self.enable_layernorm_kernel, enable_sequence_parallelism=enable_sequence_parallelism, d_t=self.num_temporal, diff --git a/tests/test_attn.py b/tests/test_attn.py index 48f8c43..4cf5844 100644 --- a/tests/test_attn.py +++ b/tests/test_attn.py @@ -11,7 +11,7 @@ B, S, H = 128, 32, 1152 N, D = 16, 72 -def run_attn(enable_flashattn: bool): +def run_attn(enable_flash_attn: bool): get_accelerator().reset_peak_memory_stats() rope = RotaryEmbedding(D).to(device=get_current_device(), dtype=torch.bfloat16) attn = Attention( @@ -19,7 +19,7 @@ def run_attn(enable_flashattn: bool): N, qkv_bias=True, rope=rope.rotate_queries_or_keys, - enable_flashattn=enable_flashattn, + enable_flash_attn=enable_flash_attn, ).to(device=get_current_device(), dtype=torch.bfloat16) x = torch.randn(B, S, H, device=get_current_device(), dtype=torch.bfloat16).requires_grad_() y = attn(x) diff --git a/tests/test_seq_parallel_attention.py b/tests/test_seq_parallel_attention.py index 9cce2b2..841115a 100644 --- a/tests/test_seq_parallel_attention.py +++ b/tests/test_seq_parallel_attention.py @@ -18,14 +18,14 @@ def run_attention(rank, world_size): torch.manual_seed(1024) set_sequence_parallel_group(dist.group.WORLD) - seq_parallel_attention = SeqParallelAttention(dim=256, num_heads=4, qkv_bias=True, enable_flashattn=False).cuda() + seq_parallel_attention = SeqParallelAttention(dim=256, num_heads=4, qkv_bias=True, enable_flash_attn=False).cuda() torch.manual_seed(1024) attention = Attention( dim=256, num_heads=4, qkv_bias=True, - enable_flashattn=False, + enable_flash_attn=False, ).cuda() # create inputs From 73128353399f6e1fa2453062db51e1ba7a81568c Mon Sep 17 00:00:00 2001 From: Yuan-Man <68322456+Yuan-ManX@users.noreply.github.com> Date: Fri, 10 May 2024 15:21:46 +0800 Subject: [PATCH 3/3] Update README.md (#385) --- docs/zh_CN/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/zh_CN/README.md b/docs/zh_CN/README.md index 404e8e8..545608b 100644 --- a/docs/zh_CN/README.md +++ b/docs/zh_CN/README.md @@ -23,6 +23,8 @@ ## 📰 资讯 > 由于文档需要进行翻译,最新资讯请看[英文文档](/README.md#-news) +* **[2024.04.25]** 🤗 我们在Hugging Face Spaces上发布了Open-Sora的[Gradio demo](https://huggingface.co/spaces/hpcai-tech/open-sora)。 +* **[2024.04.25]** 🔥 我们发布了支持**2秒至15秒、144p至720p、任意宽高比**的文本到图像、文本到视频、图像到视频、视频到视频、无限时间生成的**Open-Sora 1.1**版本。此外,还发布了一个完整的视频处理流程。 [[checkpoints]]() [[report]](/docs/report_02.md) * **[2024.03.18]** 🔥 我们发布了**Open-Sora 1.0**,这是一个完全开源的视频生成项目。 * Open-Sora 1.0 支持视频数据预处理、加速训练、推理等全套流程。 * 我们提供的[模型权重](#模型权重)只需 3 天的训练就能生成 2 秒的 512x512 视频。