4 Commits

Author SHA1 Message Date
  zhengzangw b88daa1cf0 update readme 9 months ago
  zhengzangw 0a1270f463 update readme 9 months ago
  zhengzangw 5e0a3f02d5 update readme 9 months ago
  Zheng Zangwei (Alex Zheng) febf3ad4b2
Update Open-Sora 2.0 (#807) 9 months ago
100 changed files with 940 additions and 7674 deletions
Split View
  1. +6
    -12
      .gitignore
  2. +0
    -26
      Dockerfile
  3. +98
    -454
      LICENSE
  4. +124
    -525
      README.md
  5. BIN
      assets/images/watermark/watermark.png
  6. +0
    -800
      assets/texts/VBench/all_category.txt
  7. +0
    -946
      assets/texts/VBench/all_dimension.txt
  8. +0
    -1118
      assets/texts/VBench/all_i2v.txt
  9. +0
    -100
      assets/texts/VBench/prompts_per_category/animal.txt
  10. +0
    -100
      assets/texts/VBench/prompts_per_category/architecture.txt
  11. +0
    -100
      assets/texts/VBench/prompts_per_category/food.txt
  12. +0
    -100
      assets/texts/VBench/prompts_per_category/human.txt
  13. +0
    -100
      assets/texts/VBench/prompts_per_category/lifestyle.txt
  14. +0
    -100
      assets/texts/VBench/prompts_per_category/plant.txt
  15. +0
    -100
      assets/texts/VBench/prompts_per_category/scenery.txt
  16. +0
    -100
      assets/texts/VBench/prompts_per_category/vehicles.txt
  17. +0
    -90
      assets/texts/VBench/prompts_per_dimension/appearance_style.txt
  18. +0
    -85
      assets/texts/VBench/prompts_per_dimension/color.txt
  19. +0
    -100
      assets/texts/VBench/prompts_per_dimension/human_action.txt
  20. +0
    -82
      assets/texts/VBench/prompts_per_dimension/multiple_objects.txt
  21. +0
    -79
      assets/texts/VBench/prompts_per_dimension/object_class.txt
  22. +0
    -93
      assets/texts/VBench/prompts_per_dimension/overall_consistency.txt
  23. +0
    -86
      assets/texts/VBench/prompts_per_dimension/scene.txt
  24. +0
    -84
      assets/texts/VBench/prompts_per_dimension/spatial_relationship.txt
  25. +0
    -72
      assets/texts/VBench/prompts_per_dimension/subject_consistency.txt
  26. +0
    -75
      assets/texts/VBench/prompts_per_dimension/temporal_flickering.txt
  27. +0
    -100
      assets/texts/VBench/prompts_per_dimension/temporal_style.txt
  28. +0
    -5
      assets/texts/celebrities_interaction.txt
  29. +9
    -0
      assets/texts/example.csv
  30. +2
    -0
      assets/texts/i2v.csv
  31. BIN
      assets/texts/i2v.png
  32. +0
    -14
      assets/texts/i2v/celebrities_interaction.txt
  33. +0
    -10
      assets/texts/i2v/multi_human.txt
  34. +0
    -20
      assets/texts/i2v/prompts_head.txt
  35. +0
    -10
      assets/texts/i2v/prompts_human_i2v_head.txt
  36. +0
    -20
      assets/texts/i2v/prompts_loop.txt
  37. +0
    -20
      assets/texts/i2v/prompts_ori.txt
  38. +0
    -20
      assets/texts/i2v/prompts_tail.txt
  39. +0
    -8
      assets/texts/imagenet_labels.txt
  40. +0
    -24
      assets/texts/internal_prompts_1.txt
  41. +0
    -8
      assets/texts/internal_prompts_2.txt
  42. +0
    -32
      assets/texts/internal_test.txt
  43. +0
    -10
      assets/texts/multi_human.txt
  44. +0
    -8
      assets/texts/rand_types.txt
  45. +49
    -0
      assets/texts/sora.csv
  46. +0
    -8
      assets/texts/t2i_samples.txt
  47. +0
    -10
      assets/texts/t2i_sigma.txt
  48. +0
    -1
      assets/texts/t2v_car.txt
  49. +0
    -12
      assets/texts/t2v_demo.txt
  50. +0
    -7
      assets/texts/t2v_latte.txt
  51. +0
    -12
      assets/texts/t2v_pllava.txt
  52. +0
    -6
      assets/texts/t2v_ref.txt
  53. +0
    -10
      assets/texts/t2v_samples.txt
  54. +0
    -20
      assets/texts/t2v_short.txt
  55. +0
    -48
      assets/texts/t2v_sora.txt
  56. +0
    -6
      assets/texts/ucf101_id.txt
  57. +0
    -6
      assets/texts/ucf101_labels.txt
  58. +76
    -0
      configs/diffusion/inference/256px.py
  59. +4
    -0
      configs/diffusion/inference/256px_tp.py
  60. +8
    -0
      configs/diffusion/inference/768px.py
  61. +35
    -0
      configs/diffusion/inference/high_compression.py
  62. +20
    -0
      configs/diffusion/inference/plugins/sp.py
  63. +36
    -0
      configs/diffusion/inference/plugins/t2i2v.py
  64. +17
    -0
      configs/diffusion/inference/plugins/tp.py
  65. +4
    -0
      configs/diffusion/inference/t2i2v_256px.py
  66. +4
    -0
      configs/diffusion/inference/t2i2v_768px.py
  67. +12
    -0
      configs/diffusion/train/demo.py
  68. +71
    -0
      configs/diffusion/train/high_compression.py
  69. +114
    -0
      configs/diffusion/train/image.py
  70. +56
    -0
      configs/diffusion/train/stage1.py
  71. +14
    -0
      configs/diffusion/train/stage1_i2v.py
  72. +94
    -0
      configs/diffusion/train/stage2.py
  73. +87
    -0
      configs/diffusion/train/stage2_i2v.py
  74. +0
    -31
      configs/dit/inference/16x256x256.py
  75. +0
    -31
      configs/dit/inference/1x256x256-class.py
  76. +0
    -32
      configs/dit/inference/1x256x256.py
  77. +0
    -50
      configs/dit/train/16x256x256.py
  78. +0
    -51
      configs/dit/train/1x256x256.py
  79. +0
    -30
      configs/latte/inference/16x256x256-class.py
  80. +0
    -31
      configs/latte/inference/16x256x256.py
  81. +0
    -49
      configs/latte/train/16x256x256.py
  82. +0
    -64
      configs/opensora-v1-1/inference/sample-ref.py
  83. +0
    -44
      configs/opensora-v1-1/inference/sample.py
  84. +0
    -102
      configs/opensora-v1-1/train/benchmark.py
  85. +0
    -66
      configs/opensora-v1-1/train/image.py
  86. +0
    -88
      configs/opensora-v1-1/train/image_rflow.py
  87. +0
    -78
      configs/opensora-v1-1/train/stage1.py
  88. +0
    -80
      configs/opensora-v1-1/train/stage2.py
  89. +0
    -80
      configs/opensora-v1-1/train/stage3.py
  90. +0
    -68
      configs/opensora-v1-1/train/video.py
  91. +0
    -42
      configs/opensora-v1-2/inference/sample.py
  92. +0
    -44
      configs/opensora-v1-2/inference/sample_hf.py
  93. +0
    -117
      configs/opensora-v1-2/misc/bs.py
  94. +0
    -49
      configs/opensora-v1-2/misc/eval_loss.py
  95. +0
    -62
      configs/opensora-v1-2/misc/extract.py
  96. +0
    -94
      configs/opensora-v1-2/misc/feat.py
  97. +0
    -83
      configs/opensora-v1-2/train/adapt.py
  98. +0
    -58
      configs/opensora-v1-2/train/demo_360p.py
  99. +0
    -58
      configs/opensora-v1-2/train/demo_480p.py
  100. +0
    -110
      configs/opensora-v1-2/train/stage1.py

+ 6
- 12
.gitignore View File

@@ -1,4 +1,3 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
@@ -182,23 +181,18 @@ cache/
debug/

# Secret files
hostfile
hostfiles/
hostfile*
run.sh
gradio_cached_examples/
wandb/

# vae weights
eval/vae/flolpips/weights/

# npm
node_modules/
package-lock.json
package.json

# PLLaVA
tools/caption/pllava_dir/PLLaVA/

# vbench
vbench
!eval/vbench
vbench2_beta_i2v
exps
ckpts
flash-attention
datasets

+ 0
- 26
Dockerfile View File

@@ -1,26 +0,0 @@
FROM hpcaitech/pytorch-cuda:2.1.0-12.1.0

# metainformation
LABEL org.opencontainers.image.source = "https://github.com/hpcaitech/Open-Sora"
LABEL org.opencontainers.image.licenses = "Apache License 2.0"
LABEL org.opencontainers.image.base.name = "docker.io/library/hpcaitech/pytorch-cuda:2.1.0-12.1.0"

# inatall library dependencies
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y

# install flash attention
RUN pip install flash-attn --no-build-isolation

# install apex
RUN pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git

# install xformers
RUN pip install xformers --index-url https://download.pytorch.org/whl/cu121

# Set the working directory
WORKDIR /workspace/Open-Sora
# Copy the current directory contents into the container at /workspace/Open-Sora
COPY . .

# install this project
RUN pip install -v .

+ 98
- 454
LICENSE View File

@@ -1,4 +1,3 @@
Copyright 2024 HPC-AI Technology Inc. All rights reserved.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
@@ -204,9 +203,9 @@ Copyright 2024 HPC-AI Technology Inc. All rights reserved.
=========================================================================
This project is inspired by the listed projects and is subject to the following licenses:

1. Latte (https://github.com/Vchitect/Latte/blob/main/LICENSE)
10. [T5: Text-To-Text Transfer Transformer](https://github.com/google-research/text-to-text-transfer-transformer)

Copyright 2024 Latte
Copyright 2019 Google

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -220,28 +219,11 @@ Copyright 2024 HPC-AI Technology Inc. All rights reserved.
See the License for the specific language governing permissions and
limitations under the License.

2. PixArt-alpha (https://github.com/PixArt-alpha/PixArt-alpha/blob/master/LICENSE)

Copyright (C) 2024 PixArt-alpha/PixArt-alpha

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.

3. dpm-solver (https://github.com/LuChengTHU/dpm-solver/blob/main/LICENSE)
11. [CLIP](https://github.com/openai/CLIP/tree/main)

MIT License

Copyright (c) 2022 Cheng Lu
Copyright (c) 2021 OpenAI

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -253,419 +235,9 @@ Copyright 2024 HPC-AI Technology Inc. All rights reserved.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

4. DiT (https://github.com/facebookresearch/DiT/blob/main/LICENSE.txt)

Attribution-NonCommercial 4.0 International

=======================================================================

Creative Commons Corporation ("Creative Commons") is not a law firm and
does not provide legal services or legal advice. Distribution of
Creative Commons public licenses does not create a lawyer-client or
other relationship. Creative Commons makes its licenses and related
information available on an "as-is" basis. Creative Commons gives no
warranties regarding its licenses, any material licensed under their
terms and conditions, or any related information. Creative Commons
disclaims all liability for damages resulting from their use to the
fullest extent possible.

Using Creative Commons Public Licenses

Creative Commons public licenses provide a standard set of terms and
conditions that creators and other rights holders may use to share
original works of authorship and other material subject to copyright
and certain other rights specified in the public license below. The
following considerations are for informational purposes only, are not
exhaustive, and do not form part of our licenses.

Considerations for licensors: Our public licenses are
intended for use by those authorized to give the public
permission to use material in ways otherwise restricted by
copyright and certain other rights. Our licenses are
irrevocable. Licensors should read and understand the terms
and conditions of the license they choose before applying it.
Licensors should also secure all rights necessary before
applying our licenses so that the public can reuse the
material as expected. Licensors should clearly mark any
material not subject to the license. This includes other CC-
licensed material, or material used under an exception or
limitation to copyright. More considerations for licensors:
wiki.creativecommons.org/Considerations_for_licensors

Considerations for the public: By using one of our public
licenses, a licensor grants the public permission to use the
licensed material under specified terms and conditions. If
the licensor's permission is not necessary for any reason--for
example, because of any applicable exception or limitation to
copyright--then that use is not regulated by the license. Our
licenses grant only permissions under copyright and certain
other rights that a licensor has authority to grant. Use of
the licensed material may still be restricted for other
reasons, including because others have copyright or other
rights in the material. A licensor may make special requests,
such as asking that all changes be marked or described.
Although not required by our licenses, you are encouraged to
respect those requests where reasonable. More_considerations
for the public:
wiki.creativecommons.org/Considerations_for_licensees

=======================================================================

Creative Commons Attribution-NonCommercial 4.0 International Public
License

By exercising the Licensed Rights (defined below), You accept and agree
to be bound by the terms and conditions of this Creative Commons
Attribution-NonCommercial 4.0 International Public License ("Public
License"). To the extent this Public License may be interpreted as a
contract, You are granted the Licensed Rights in consideration of Your
acceptance of these terms and conditions, and the Licensor grants You
such rights in consideration of benefits the Licensor receives from
making the Licensed Material available under these terms and
conditions.

Section 1 -- Definitions.

a. Adapted Material means material subject to Copyright and Similar
Rights that is derived from or based upon the Licensed Material
and in which the Licensed Material is translated, altered,
arranged, transformed, or otherwise modified in a manner requiring
permission under the Copyright and Similar Rights held by the
Licensor. For purposes of this Public License, where the Licensed
Material is a musical work, performance, or sound recording,
Adapted Material is always produced where the Licensed Material is
synched in timed relation with a moving image.

b. Adapter's License means the license You apply to Your Copyright
and Similar Rights in Your contributions to Adapted Material in
accordance with the terms and conditions of this Public License.

c. Copyright and Similar Rights means copyright and/or similar rights
closely related to copyright including, without limitation,
performance, broadcast, sound recording, and Sui Generis Database
Rights, without regard to how the rights are labeled or
categorized. For purposes of this Public License, the rights
specified in Section 2(b)(1)-(2) are not Copyright and Similar
Rights.
d. Effective Technological Measures means those measures that, in the
absence of proper authority, may not be circumvented under laws
fulfilling obligations under Article 11 of the WIPO Copyright
Treaty adopted on December 20, 1996, and/or similar international
agreements.

e. Exceptions and Limitations means fair use, fair dealing, and/or
any other exception or limitation to Copyright and Similar Rights
that applies to Your use of the Licensed Material.

f. Licensed Material means the artistic or literary work, database,
or other material to which the Licensor applied this Public
License.

g. Licensed Rights means the rights granted to You subject to the
terms and conditions of this Public License, which are limited to
all Copyright and Similar Rights that apply to Your use of the
Licensed Material and that the Licensor has authority to license.

h. Licensor means the individual(s) or entity(ies) granting rights
under this Public License.

i. NonCommercial means not primarily intended for or directed towards
commercial advantage or monetary compensation. For purposes of
this Public License, the exchange of the Licensed Material for
other material subject to Copyright and Similar Rights by digital
file-sharing or similar means is NonCommercial provided there is
no payment of monetary compensation in connection with the
exchange.

j. Share means to provide material to the public by any means or
process that requires permission under the Licensed Rights, such
as reproduction, public display, public performance, distribution,
dissemination, communication, or importation, and to make material
available to the public including in ways that members of the
public may access the material from a place and at a time
individually chosen by them.

k. Sui Generis Database Rights means rights other than copyright
resulting from Directive 96/9/EC of the European Parliament and of
the Council of 11 March 1996 on the legal protection of databases,
as amended and/or succeeded, as well as other essentially
equivalent rights anywhere in the world.

l. You means the individual or entity exercising the Licensed Rights
under this Public License. Your has a corresponding meaning.

Section 2 -- Scope.

a. License grant.

1. Subject to the terms and conditions of this Public License,
the Licensor hereby grants You a worldwide, royalty-free,
non-sublicensable, non-exclusive, irrevocable license to
exercise the Licensed Rights in the Licensed Material to:

a. reproduce and Share the Licensed Material, in whole or
in part, for NonCommercial purposes only; and

b. produce, reproduce, and Share Adapted Material for
NonCommercial purposes only.

2. Exceptions and Limitations. For the avoidance of doubt, where
Exceptions and Limitations apply to Your use, this Public
License does not apply, and You do not need to comply with
its terms and conditions.

3. Term. The term of this Public License is specified in Section
6(a).

4. Media and formats; technical modifications allowed. The
Licensor authorizes You to exercise the Licensed Rights in
all media and formats whether now known or hereafter created,
and to make technical modifications necessary to do so. The
Licensor waives and/or agrees not to assert any right or
authority to forbid You from making technical modifications
necessary to exercise the Licensed Rights, including
technical modifications necessary to circumvent Effective
Technological Measures. For purposes of this Public License,
simply making modifications authorized by this Section 2(a)
(4) never produces Adapted Material.

5. Downstream recipients.

a. Offer from the Licensor -- Licensed Material. Every
recipient of the Licensed Material automatically
receives an offer from the Licensor to exercise the
Licensed Rights under the terms and conditions of this
Public License.

b. No downstream restrictions. You may not offer or impose
any additional or different terms or conditions on, or
apply any Effective Technological Measures to, the
Licensed Material if doing so restricts exercise of the
Licensed Rights by any recipient of the Licensed
Material.

6. No endorsement. Nothing in this Public License constitutes or
may be construed as permission to assert or imply that You
are, or that Your use of the Licensed Material is, connected
with, or sponsored, endorsed, or granted official status by,
the Licensor or others designated to receive attribution as
provided in Section 3(a)(1)(A)(i).

b. Other rights.

1. Moral rights, such as the right of integrity, are not
licensed under this Public License, nor are publicity,
privacy, and/or other similar personality rights; however, to
the extent possible, the Licensor waives and/or agrees not to
assert any such rights held by the Licensor to the limited
extent necessary to allow You to exercise the Licensed
Rights, but not otherwise.

2. Patent and trademark rights are not licensed under this
Public License.

3. To the extent possible, the Licensor waives any right to
collect royalties from You for the exercise of the Licensed
Rights, whether directly or through a collecting society
under any voluntary or waivable statutory or compulsory
licensing scheme. In all other cases the Licensor expressly
reserves any right to collect such royalties, including when
the Licensed Material is used other than for NonCommercial
purposes.

Section 3 -- License Conditions.

Your exercise of the Licensed Rights is expressly made subject to the
following conditions.

a. Attribution.

1. If You Share the Licensed Material (including in modified
form), You must:

a. retain the following if it is supplied by the Licensor
with the Licensed Material:

i. identification of the creator(s) of the Licensed
Material and any others designated to receive
attribution, in any reasonable manner requested by
the Licensor (including by pseudonym if
designated);

ii. a copyright notice;

iii. a notice that refers to this Public License;

iv. a notice that refers to the disclaimer of
warranties;

v. a URI or hyperlink to the Licensed Material to the
extent reasonably practicable;

b. indicate if You modified the Licensed Material and
retain an indication of any previous modifications; and

c. indicate the Licensed Material is licensed under this
Public License, and include the text of, or the URI or
hyperlink to, this Public License.

2. You may satisfy the conditions in Section 3(a)(1) in any
reasonable manner based on the medium, means, and context in
which You Share the Licensed Material. For example, it may be
reasonable to satisfy the conditions by providing a URI or
hyperlink to a resource that includes the required
information.

3. If requested by the Licensor, You must remove any of the
information required by Section 3(a)(1)(A) to the extent
reasonably practicable.

4. If You Share Adapted Material You produce, the Adapter's
License You apply must not prevent recipients of the Adapted
Material from complying with this Public License.

Section 4 -- Sui Generis Database Rights.

Where the Licensed Rights include Sui Generis Database Rights that
apply to Your use of the Licensed Material:

a. for the avoidance of doubt, Section 2(a)(1) grants You the right
to extract, reuse, reproduce, and Share all or a substantial
portion of the contents of the database for NonCommercial purposes
only;

b. if You include all or a substantial portion of the database
contents in a database in which You have Sui Generis Database
Rights, then the database in which You have Sui Generis Database
Rights (but not its individual contents) is Adapted Material; and

c. You must comply with the conditions in Section 3(a) if You Share
all or a substantial portion of the contents of the database.

For the avoidance of doubt, this Section 4 supplements and does not
replace Your obligations under this Public License where the Licensed
Rights include other Copyright and Similar Rights.

Section 5 -- Disclaimer of Warranties and Limitation of Liability.

a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.

b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.

c. The disclaimer of warranties and limitation of liability provided
above shall be interpreted in a manner that, to the extent
possible, most closely approximates an absolute disclaimer and
waiver of all liability.
12. [FLUX](https://github.com/black-forest-labs/flux)

Section 6 -- Term and Termination.

a. This Public License applies for the term of the Copyright and
Similar Rights licensed here. However, if You fail to comply with
this Public License, then Your rights under this Public License
terminate automatically.

b. Where Your right to use the Licensed Material has terminated under
Section 6(a), it reinstates:

1. automatically as of the date the violation is cured, provided
it is cured within 30 days of Your discovery of the
violation; or

2. upon express reinstatement by the Licensor.

For the avoidance of doubt, this Section 6(b) does not affect any
right the Licensor may have to seek remedies for Your violations
of this Public License.

c. For the avoidance of doubt, the Licensor may also offer the
Licensed Material under separate terms or conditions or stop
distributing the Licensed Material at any time; however, doing so
will not terminate this Public License.

d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
License.

Section 7 -- Other Terms and Conditions.

a. The Licensor shall not be bound by any additional or different
terms or conditions communicated by You unless expressly agreed.

b. Any arrangements, understandings, or agreements regarding the
Licensed Material not stated herein are separate from and
independent of the terms and conditions of this Public License.

Section 8 -- Interpretation.

a. For the avoidance of doubt, this Public License does not, and
shall not be interpreted to, reduce, limit, restrict, or impose
conditions on any use of the Licensed Material that could lawfully
be made without permission under this Public License.

b. To the extent possible, if any provision of this Public License is
deemed unenforceable, it shall be automatically reformed to the
minimum extent necessary to make it enforceable. If the provision
cannot be reformed, it shall be severed from this Public License
without affecting the enforceability of the remaining terms and
conditions.

c. No term or condition of this Public License will be waived and no
failure to comply consented to unless expressly agreed to by the
Licensor.

d. Nothing in this Public License constitutes or may be interpreted
as a limitation upon, or waiver of, any privileges and immunities
that apply to the Licensor or You, including from the legal
processes of any jurisdiction or authority.

=======================================================================

Creative Commons is not a party to its public
licenses. Notwithstanding, Creative Commons may elect to apply one of
its public licenses to material it publishes and in those instances
will be considered the “Licensor.” The text of the Creative Commons
public licenses is dedicated to the public domain under the CC0 Public
Domain Dedication. Except for the limited purpose of indicating that
material is shared under a Creative Commons public license or as
otherwise permitted by the Creative Commons policies published at
creativecommons.org/policies, Creative Commons does not authorize the
use of the trademark "Creative Commons" or any other trademark or logo
of Creative Commons without its prior written consent including,
without limitation, in connection with any unauthorized modifications
to any of its public licenses or any other arrangements,
understandings, or agreements concerning use of licensed material. For
the avoidance of doubt, this paragraph does not form part of the
public licenses.

Creative Commons may be contacted at creativecommons.org.

5. OpenDiT (https://github.com/NUS-HPC-AI-Lab/OpenDiT/blob/master/LICENSE)

Copyright OpenDiT
Copyright 2024 Black Forest Labs

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -679,28 +251,100 @@ Copyright 2024 HPC-AI Technology Inc. All rights reserved.
See the License for the specific language governing permissions and
limitations under the License.

6. Asymmetric_magvitv2 (https://github.com/bornfly-detachment/asymmetric_magvitv2/blob/main/LICENSE)
13. [EfficientViT](https://github.com/mit-han-lab/efficientvit)

MIT License

Copyright (c) 2024 bornfly-detachment

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Copyright [2023] [Han Cai]

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

14. [HunyuanVideo](https://github.com/Tencent/HunyuanVideo/tree/main)

TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT
Tencent HunyuanVideo Release Date: December 3, 2024
THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION, UNITED KINGDOM AND SOUTH KOREA AND IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Tencent Hunyuan Works, including via any Hosted Service, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.

1. DEFINITIONS.
a. “Acceptable Use Policy” shall mean the policy made available by Tencent as set forth in the Exhibit A.
b. “Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of Tencent Hunyuan Works or any portion or element thereof set forth herein.
c. “Documentation” shall mean the specifications, manuals and documentation for Tencent Hunyuan made publicly available by Tencent.
d. “Hosted Service” shall mean a hosted service offered via an application programming interface (API), web access, or any other electronic or remote means.
e. “Licensee,” “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Tencent Hunyuan Works for any purpose and in any field of use.
f. “Materials” shall mean, collectively, Tencent’s proprietary Tencent Hunyuan and Documentation (and any portion thereof) as made available by Tencent under this Agreement.
g. “Model Derivatives” shall mean all: (i) modifications to Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; (ii) works based on Tencent Hunyuan or any Model Derivative of Tencent Hunyuan; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Tencent Hunyuan or any Model Derivative of Tencent Hunyuan, to that model in order to cause that model to perform similarly to Tencent Hunyuan or a Model Derivative of Tencent Hunyuan, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs by Tencent Hunyuan or a Model Derivative of Tencent Hunyuan for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
h. “Output” shall mean the information and/or content output of Tencent Hunyuan or a Model Derivative that results from operating or otherwise using Tencent Hunyuan or a Model Derivative, including via a Hosted Service.
i. “Tencent,” “We” or “Us” shall mean THL A29 Limited.
j. “Tencent Hunyuan” shall mean the large language models, text/image/video/audio/3D generation models, and multimodal large language models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us, including, without limitation to, Tencent HunyuanVideo released at [https://github.com/Tencent/HunyuanVideo].
k. “Tencent Hunyuan Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
l. “Territory” shall mean the worldwide territory, excluding the territory of the European Union, United Kingdom and South Korea.
m. “Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
n. “including” shall mean including but not limited to.
2. GRANT OF RIGHTS.
We grant You, for the Territory only, a non-exclusive, non-transferable and royalty-free limited license under Tencent’s intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
3. DISTRIBUTION.
You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Tencent Hunyuan Works, exclusively in the Territory, provided that You meet all of the following conditions:
a. You must provide all such Third Party recipients of the Tencent Hunyuan Works or products or services using them a copy of this Agreement;
b. You must cause any modified files to carry prominent notices stating that You changed the files;
c. You are encouraged to: (i) publish at least one technology introduction blogpost or one public statement expressing Your experience of using the Tencent Hunyuan Works; and (ii) mark the products or services developed by using the Tencent Hunyuan Works to indicate that the product/service is “Powered by Tencent Hunyuan”; and
d. All distributions to Third Parties (other than through a Hosted Service) must be accompanied by a “Notice” text file that contains the following notice: “Tencent Hunyuan is licensed under the Tencent Hunyuan Community License Agreement, Copyright © 2024 Tencent. All Rights Reserved. The trademark rights of “Tencent Hunyuan” are owned by Tencent or its affiliate.”
You may add Your own copyright statement to Your modifications and, except as set forth in this Section and in Section 5, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement (including as regards the Territory). If You receive Tencent Hunyuan Works from a Licensee as part of an integrated end user product, then this Section 3 of this Agreement will not apply to You.
4. ADDITIONAL COMMERCIAL TERMS.
If, on the Tencent Hunyuan version release date, the monthly active users of all products or services made available by or for Licensee is greater than 100 million monthly active users in the preceding calendar month, You must request a license from Tencent, which Tencent may grant to You in its sole discretion, and You are not authorized to exercise any of the rights under this Agreement unless or until Tencent otherwise expressly grants You such rights.
5. RULES OF USE.
a. Your use of the Tencent Hunyuan Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Tencent Hunyuan Works, which is hereby incorporated by reference into this Agreement. You must include the use restrictions referenced in these Sections 5(a) and 5(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Tencent Hunyuan Works and You must provide notice to subsequent users to whom You distribute that Tencent Hunyuan Works are subject to the use restrictions in these Sections 5(a) and 5(b).
b. You must not use the Tencent Hunyuan Works or any Output or results of the Tencent Hunyuan Works to improve any other AI model (other than Tencent Hunyuan or Model Derivatives thereof).
c. You must not use, reproduce, modify, distribute, or display the Tencent Hunyuan Works, Output or results of the Tencent Hunyuan Works outside the Territory. Any such use outside the Territory is unlicensed and unauthorized under this Agreement.
6. INTELLECTUAL PROPERTY.
a. Subject to Tencent’s ownership of Tencent Hunyuan Works made by or for Tencent and intellectual property rights therein, conditioned upon Your compliance with the terms and conditions of this Agreement, as between You and Tencent, You will be the owner of any derivative works and modifications of the Materials and any Model Derivatives that are made by or for You.
b. No trademark licenses are granted under this Agreement, and in connection with the Tencent Hunyuan Works, Licensee may not use any name or mark owned by or associated with Tencent or any of its affiliates, except as required for reasonable and customary use in describing and distributing the Tencent Hunyuan Works. Tencent hereby grants You a license to use “Tencent Hunyuan” (the “Mark”) in the Territory solely as required to comply with the provisions of Section 3(c), provided that You comply with any applicable laws related to trademark protection. All goodwill arising out of Your use of the Mark will inure to the benefit of Tencent.
c. If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed. You will defend, indemnify and hold harmless Us from and against any claim by any Third Party arising out of or related to Your or the Third Party’s use or distribution of the Tencent Hunyuan Works.
d. Tencent claims no rights in Outputs You generate. You and Your users are solely responsible for Outputs and their subsequent uses.
7. DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
a. We are not obligated to support, update, provide training for, or develop any further version of the Tencent Hunyuan Works or to grant any license thereto.
b. UNLESS AND ONLY TO THE EXTENT REQUIRED BY APPLICABLE LAW, THE TENCENT HUNYUAN WORKS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND INCLUDING ANY WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, COURSE OF DEALING, USAGE OF TRADE, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR OR A THIRD PARTY’S USE OR DISTRIBUTION OF ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
c. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL TENCENT OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, FOR ANY DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO ANY OF THE TENCENT HUNYUAN WORKS OR OUTPUTS, EVEN IF TENCENT OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
8. SURVIVAL AND TERMINATION.
a. The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
b. We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Tencent Hunyuan Works. Sections 6(a), 6(c), 7 and 9 shall survive the termination of this Agreement.
9. GOVERNING LAW AND JURISDICTION.
a. This Agreement and any dispute arising out of or relating to it will be governed by the laws of the Hong Kong Special Administrative Region of the People’s Republic of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
b. Exclusive jurisdiction and venue for any dispute arising out of or relating to this Agreement will be a court of competent jurisdiction in the Hong Kong Special Administrative Region of the People’s Republic of China, and Tencent and Licensee consent to the exclusive jurisdiction of such court with respect to any such dispute.

EXHIBIT A
ACCEPTABLE USE POLICY

Tencent reserves the right to update this Acceptable Use Policy from time to time.
Last modified: November 5, 2024

Tencent endeavors to promote safe and fair use of its tools and features, including Tencent Hunyuan. You agree not to use Tencent Hunyuan or Model Derivatives:

1. Outside the Territory;
2. In any way that violates any applicable national, federal, state, local, international or any other law or regulation;
3. To harm Yourself or others;
4. To repurpose or distribute output from Tencent Hunyuan or any Model Derivatives to harm Yourself or others;
5. To override or circumvent the safety guardrails and safeguards We have put in place;
6. For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
7. To generate or disseminate verifiably false information and/or content with the purpose of harming others or influencing elections;
8. To generate or facilitate false online engagement, including fake reviews and other means of fake online engagement;
9. To intentionally defame, disparage or otherwise harass others;
10. To generate and/or disseminate malware (including ransomware) or any other content to be used for the purpose of harming electronic systems;
11. To generate or disseminate personal identifiable information with the purpose of harming others;
12. To generate or disseminate information (including images, code, posts, articles), and place the information in any public context (including –through the use of bot generated tweets), without expressly and conspicuously identifying that the information and/or content is machine generated;
13. To impersonate another individual without consent, authorization, or legal right;
14. To make high-stakes automated decisions in domains that affect an individual’s safety, rights or wellbeing (e.g., law enforcement, migration, medicine/health, management of critical infrastructure, safety components of products, essential services, credit, employment, housing, education, social scoring, or insurance);
15. In a manner that violates or disrespects the social ethics and moral standards of other countries or regions;
16. To perform, facilitate, threaten, incite, plan, promote or encourage violent extremism or terrorism;
17. For any use intended to discriminate against or harm individuals or groups based on protected characteristics or categories, online or offline social behavior or known or predicted personal or personality characteristics;
18. To intentionally exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
19. For military purposes;
20. To engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or other professional practices.

+ 124
- 525
README.md View File

@@ -3,13 +3,16 @@
</p>
<div align="center">
<a href="https://github.com/hpcaitech/Open-Sora/stargazers"><img src="https://img.shields.io/github/stars/hpcaitech/Open-Sora?style=social"></a>
<a href="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/paper/Open_Sora_2_tech_report.pdf"><img src="https://img.shields.io/static/v1?label=Tech Report 2.0&message=Arxiv&color=red"></a>
<a href="https://arxiv.org/abs/2412.20404"><img src="https://img.shields.io/static/v1?label=Tech Report 1.2&message=Arxiv&color=red"></a>
<a href="https://hpcaitech.github.io/Open-Sora/"><img src="https://img.shields.io/badge/Gallery-View-orange?logo=&amp"></a>
</div>

<div align="center">
<a href="https://discord.gg/kZakZzrSUT"><img src="https://img.shields.io/badge/Discord-join-blueviolet?logo=discord&amp"></a>
<a href="https://join.slack.com/t/colossalaiworkspace/shared_invite/zt-247ipg9fk-KRRYmUl~u2ll2637WRURVA"><img src="https://img.shields.io/badge/Slack-ColossalAI-blueviolet?logo=slack&amp"></a>
<a href="https://twitter.com/yangyou1991/status/1769411544083996787?s=61&t=jT0Dsx2d-MS5vS9rNM5e5g"><img src="https://img.shields.io/badge/Twitter-Discuss-blue?logo=twitter&amp"></a>
<a href="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png"><img src="https://img.shields.io/badge/微信-小助手加群-green?logo=wechat&amp"></a>
<a href="https://hpc-ai.com/blog/open-sora-v1.0"><img src="https://img.shields.io/badge/Open_Sora-Blog-blue"></a>
<a href="https://huggingface.co/spaces/hpcai-tech/open-sora"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Gradio Demo-blue"></a>
</div>

## Open-Sora: Democratizing Efficient Video Production for All
@@ -27,19 +30,20 @@ With Open-Sora, our goal is to foster innovation, creativity, and inclusivity wi
</a>
</div>

[[中文文档](/docs/zh_CN/README.md)] [[潞晨云](https://cloud.luchentech.com/)|[OpenSora镜像](https://cloud.luchentech.com/doc/docs/image/open-sora/)|[视频教程](https://www.bilibili.com/video/BV1ow4m1e7PX/?vd_source=c6b752764cd36ff0e535a768e35d98d2)]

<div align="center">
<a href="https://hpc-ai.com/?utm_source=github&utm_medium=social&utm_campaign=promotion-opensora">
<img src="https://github.com/hpcaitech/public_assets/blob/main/colossalai/img/1.gif" width="850" />
</a>
</div>

<!-- [[中文文档](/docs/zh_CN/README.md)] [[潞晨云](https://cloud.luchentech.com/)|[OpenSora镜像](https://cloud.luchentech.com/doc/docs/image/open-sora/)|[视频教程](https://www.bilibili.com/video/BV1ow4m1e7PX/?vd_source=c6b752764cd36ff0e535a768e35d98d2)] -->

## 📰 News

- **[2025.02.20]** 🔥 We released **Open-Sora 1.3**. With the upgraded VAE and Transformer architecture, the quality of our generated videos has been greatly improved 🚀. [[checkpoints]](#open-sora-13-model-weights) [[report]](/docs/report_04.md) [[demo]](https://huggingface.co/spaces/hpcai-tech/open-sora)
- **[2024.12.23]** 🔥 [The development cost of video generation models has saved by 50%! Open-source solutions are now available with H200 GPU vouchers](https://company.hpc-ai.com/blog/the-development-cost-of-video-generation-models-has-saved-by-50-open-source-solutions-are-now-available-with-h200-gpu-vouchers) [[code]](https://github.com/hpcaitech/Open-Sora/blob/main/scripts/train.py) [[vouchers]](https://colossalai.org/zh-Hans/docs/get_started/bonus/)
- **[2024.06.17]** 🔥 We released **Open-Sora 1.2**, which includes **3D-VAE**, **rectified flow**, and **score condition**. The video quality is greatly improved. [[checkpoints]](#open-sora-12-model-weights) [[report]](/docs/report_03.md) [[blog]](https://hpc-ai.com/blog/open-sora-from-hpc-ai-tech-team-continues-open-source-generate-any-16-second-720p-hd-video-with-one-click-model-weights-ready-to-use)
- **[2025.03.12]** 🔥 We released **Open-Sora 2.0** (11B). 🎬 11B model achieves [on-par performance](#evaluation) with 14B HunyuanVideo & 30B Step-Video on 📐VBench & 📊Human Preference. 🛠️ Fully open-source: checkpoints and training codes for training with only **$200K**. [[report]](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/paper/Open_Sora_2_tech_report.pdf)
- **[2025.02.20]** 🔥 We released **Open-Sora 1.3** (1B). With the upgraded VAE and Transformer architecture, the quality of our generated videos has been greatly improved 🚀. [[checkpoints]](#open-sora-13-model-weights) [[report]](/docs/report_04.md) [[demo]](https://huggingface.co/spaces/hpcai-tech/open-sora)
- **[2024.12.23]** The development cost of video generation models has saved by 50%! Open-source solutions are now available with H200 GPU vouchers. [[blog]](https://company.hpc-ai.com/blog/the-development-cost-of-video-generation-models-has-saved-by-50-open-source-solutions-are-now-available-with-h200-gpu-vouchers) [[code]](https://github.com/hpcaitech/Open-Sora/blob/main/scripts/train.py) [[vouchers]](https://colossalai.org/zh-Hans/docs/get_started/bonus/)
- **[2024.06.17]** We released **Open-Sora 1.2**, which includes **3D-VAE**, **rectified flow**, and **score condition**. The video quality is greatly improved. [[checkpoints]](#open-sora-12-model-weights) [[report]](/docs/report_03.md) [[arxiv]](https://arxiv.org/abs/2412.20404)
- **[2024.04.25]** 🤗 We released the [Gradio demo for Open-Sora](https://huggingface.co/spaces/hpcai-tech/open-sora) on Hugging Face Spaces.
- **[2024.04.25]** We released **Open-Sora 1.1**, which supports **2s~15s, 144p to 720p, any aspect ratio** text-to-image, **text-to-video, image-to-video, video-to-video, infinite time** generation. In addition, a full video processing pipeline is released. [[checkpoints]](#open-sora-11-model-weights) [[report]](/docs/report_02.md)
- **[2024.03.18]** We released **Open-Sora 1.0**, a fully open-source project for video generation.
@@ -51,24 +55,36 @@ With Open-Sora, our goal is to foster innovation, creativity, and inclusivity wi
- **[2024.03.04]** Open-Sora provides training with 46% cost reduction.
[[blog]](https://hpc-ai.com/blog/open-sora)

Since Open-Sora is under active development, we remain different branchs for different versions. The latest version is [main](https://github.com/hpcaitech/Open-Sora). Old versions include: [v1.0](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.0), [v1.1](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.1), [v1.2](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.2).
📍 Since Open-Sora is under active development, we remain different branchs for different versions. The latest version is [main](https://github.com/hpcaitech/Open-Sora). Old versions include: [v1.0](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.0), [v1.1](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.1), [v1.2](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.2), [v1.3](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.3).

## 🎥 Latest Demo

🔥 You can experience Open-Sora on our [🤗 Gradio application on Hugging Face](https://huggingface.co/spaces/hpcai-tech/open-sora). More samples and corresponding prompts are available in our [Gallery](https://hpcaitech.github.io/Open-Sora/).
Demos are presented in compressed GIF format for convenience. For original quality samples and their corresponding prompts, please visit our [Gallery](https://hpcaitech.github.io/Open-Sora/).

| **5s 1024×576** | **5s 576×1024** | **5s 576×1024** |
| -------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/ft_0001_1_1.gif" width="">](https://streamable.com/e/8g9y9h?autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/movie_0160.gif" width="">](https://streamable.com/e/k50mnv?autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/movie_0017.gif" width="">](https://streamable.com/e/bzrn9n?autoplay=1) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/ft_0012_1_1.gif" width="">](https://streamable.com/e/dsv8da?autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/douyin_0005.gif" width="">](https://streamable.com/e/3wif07?autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/movie_0037.gif" width="">](https://streamable.com/e/us2w7h?autoplay=1) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/ft_0055_1_1.gif" width="">](https://streamable.com/e/yfwk8i?autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/sora_0019.gif" width="">](https://streamable.com/e/jgjil0?autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/movie_0463.gif" width="">](https://streamable.com/e/lsoai1?autoplay=1) |

| **5s 720×1280** | **5s 720×1280** | **5s 720×1280** |
| --------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------- |

<details>
<summary>OpenSora 1.3 Demo</summary>

| **5s 720×1280** | **5s 720×1280** | **5s 720×1280** |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_tomato.gif" width="">](https://streamable.com/e/r0imrp?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_fisherman.gif" width="">](https://streamable.com/e/hfvjkh?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_girl2.gif" width="">](https://streamable.com/e/kutmma?quality=highest&amp;autoplay=1) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_grape.gif" width="">](https://streamable.com/e/osn1la?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_mushroom.gif" width="">](https://streamable.com/e/l1pzws?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_parrot.gif" width="">](https://streamable.com/e/2vqari?quality=highest&amp;autoplay=1) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_trans.gif" width="">](https://streamable.com/e/1in7d6?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_bear.gif" width="">](https://streamable.com/e/e9bi4o?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_futureflower.gif" width="">](https://streamable.com/e/09z7xi?quality=highest&amp;autoplay=1) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_fire.gif" width="">](https://streamable.com/e/16c3hk?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_man.gif" width="">](https://streamable.com/e/wi250w?quality=highest&amp;autoplay=1) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.3/demo_black.gif" width="">](https://streamable.com/e/vw5b64?quality=highest&amp;autoplay=1) |

</details>

<details>
<summary>OpenSora 1.2 Demo</summary>

| **4s 720×1280** | **4s 720×1280** | **4s 720×1280** |
| ---------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
| **4s 720×1280** | **4s 720×1280** | **4s 720×1280** |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0013.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/7895aab6-ed23-488c-8486-091480c26327) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_1718.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/20f07c7b-182b-4562-bbee-f1df74c86c9a) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0087.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/3d897e0d-dc21-453a-b911-b3bda838acc2) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0052.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/644bf938-96ce-44aa-b797-b3c0b513d64c) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_1719.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/272d88ac-4b4a-484d-a665-8d07431671d0) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0002.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/ebbac621-c34e-4bb4-9543-1c34f8989764) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0011.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/a1e3a1a3-4abd-45f5-8df2-6cced69da4ca) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0004.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/d6ce9c13-28e1-4dff-9644-cc01f5f11926) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.2/sample_0061.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/561978f8-f1b0-4f4d-ae7b-45bec9001b4a) |
@@ -78,17 +94,17 @@ Since Open-Sora is under active development, we remain different branchs for dif
<details>
<summary>OpenSora 1.1 Demo</summary>

| **2s 240×426** | **2s 240×426** |
| ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **2s 240×426** | **2s 240×426** |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sample_16x240x426_9.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/c31ebc52-de39-4a4e-9b1e-9211d45e05b2) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sora_16x240x426_26.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/c31ebc52-de39-4a4e-9b1e-9211d45e05b2) |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sora_16x240x426_27.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/f7ce4aaa-528f-40a8-be7a-72e61eaacbbd) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sora_16x240x426_40.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/5d58d71e-1fda-4d90-9ad3-5f2f7b75c6a9) |

| **2s 426×240** | **4s 480×854** |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **2s 426×240** | **4s 480×854** |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sora_16x426x240_24.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/34ecb4a0-4eef-4286-ad4c-8e3a87e5a9fd) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sample_32x480x854_9.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/c1619333-25d7-42ba-a91c-18dbc1870b18) |

| **16s 320×320** | **16s 224×448** | **2s 426×240** |
| ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **16s 320×320** | **16s 224×448** | **2s 426×240** |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sample_16s_320x320.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/3cab536e-9b43-4b33-8da8-a0f9cf842ff2) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sample_16s_224x448.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/9fb0b9e0-c6f4-4935-b29e-4cac10b373c4) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.1/sora_16x426x240_3.gif" width="">](https://github.com/hpcaitech/Open-Sora-dev/assets/99191637/3e892ad2-9543-4049-b005-643a4c1bf3bf) |

</details>
@@ -96,600 +112,182 @@ Since Open-Sora is under active development, we remain different branchs for dif
<details>
<summary>OpenSora 1.0 Demo</summary>

| **2s 512×512** | **2s 512×512** | **2s 512×512** |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_0.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/de1963d3-b43b-4e68-a670-bb821ebb6f80) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_1.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/13f8338f-3d42-4b71-8142-d234fbd746cc) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_2.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/fa6a65a6-e32a-4d64-9a9e-eabb0ebb8c16) |
| A serene night scene in a forested area. [...] The video is a time-lapse, capturing the transition from day to night, with the lake and forest serving as a constant backdrop. | A soaring drone footage captures the majestic beauty of a coastal cliff, [...] The water gently laps at the rock base and the greenery that clings to the top of the cliff. | The majestic beauty of a waterfall cascading down a cliff into a serene lake. [...] The camera angle provides a bird's eye view of the waterfall. |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_3.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/64232f84-1b36-4750-a6c0-3e610fa9aa94) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_4.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/983a1965-a374-41a7-a76b-c07941a6c1e9) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_5.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/ec10c879-9767-4c31-865f-2e8d6cf11e65) |
| A bustling city street at night, filled with the glow of car headlights and the ambient light of streetlights. [...] | The vibrant beauty of a sunflower field. The sunflowers are arranged in neat rows, creating a sense of order and symmetry. [...] | A serene underwater scene featuring a sea turtle swimming through a coral reef. The turtle, with its greenish-brown shell [...] |
| **2s 512×512** | **2s 512×512** | **2s 512×512** |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_0.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/de1963d3-b43b-4e68-a670-bb821ebb6f80) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_1.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/13f8338f-3d42-4b71-8142-d234fbd746cc) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_2.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/fa6a65a6-e32a-4d64-9a9e-eabb0ebb8c16) |
| A serene night scene in a forested area. [...] The video is a time-lapse, capturing the transition from day to night, with the lake and forest serving as a constant backdrop. | A soaring drone footage captures the majestic beauty of a coastal cliff, [...] The water gently laps at the rock base and the greenery that clings to the top of the cliff. | The majestic beauty of a waterfall cascading down a cliff into a serene lake. [...] The camera angle provides a bird's eye view of the waterfall. |
| [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_3.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/64232f84-1b36-4750-a6c0-3e610fa9aa94) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_4.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/983a1965-a374-41a7-a76b-c07941a6c1e9) | [<img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v1.0/sample_5.gif" width="">](https://github.com/hpcaitech/Open-Sora/assets/99191637/ec10c879-9767-4c31-865f-2e8d6cf11e65) |
| A bustling city street at night, filled with the glow of car headlights and the ambient light of streetlights. [...] | The vibrant beauty of a sunflower field. The sunflowers are arranged in neat rows, creating a sense of order and symmetry. [...] | A serene underwater scene featuring a sea turtle swimming through a coral reef. The turtle, with its greenish-brown shell [...] |

Videos are downsampled to `.gif` for display. Click for original videos. Prompts are trimmed for display,
see [here](/assets/texts/t2v_samples.txt) for full prompts.

</details>

## 🔆 New Features/Updates
## 🔆 Reports

- 📍 **Open-Sora 1.3** released. Model weights are available [here](#model-weights). See our **[report 1.3](docs/report_04.md)** for more infomation.
- ✅ Upgraded model architecture, delivering enhanced performance and efficiency.
- ✅ Incorporated more high-quality training data, significantly improving overall results.
- 📍 **Open-Sora 1.2** released. Model weights are available [here](#model-weights). See our **[report 1.2](docs/report_03.md)** for more details.
- 📍 **Open-Sora 1.1** released. Model weights are available [here](#model-weights). It is trained on **0s~15s, 144p to 720p, various aspect ratios** videos. See our **[report 1.1](docs/report_02.md)** for more discussions.
- 🔧 **Data processing pipeline v1.1** is released. An automatic [processing pipeline](#data-processing) from raw videos to (text, video clip) pairs is provided, including scene cutting $\rightarrow$ filtering(aesthetic, optical flow, OCR, etc.) $\rightarrow$ captioning $\rightarrow$ managing. With this tool, you can easily build your video dataset.
- **[Tech Report of Open-Sora 2.0](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/paper/Open_Sora_2_tech_report.pdf)**
- **[Step by step to train or finetune your own model](docs/train.md)**
- **[Step by step to train and evaluate an video autoencoder](docs/ae.md)**
- **[Visit the high compression video autoencoder](docs/hcae.md)**
- Reports of previous version (better see in according branch):
- [Open-Sora 1.3](docs/report_04.md): shift-window attention, unified spatial-temporal VAE, etc.
- [Open-Sora 1.2](docs/report_03.md), [Tech Report](https://arxiv.org/abs/2412.20404): rectified flow, 3d-VAE, score condition, evaluation, etc.
- [Open-Sora 1.1](docs/report_02.md): multi-resolution/length/aspect-ratio, image/video conditioning/editing, data preprocessing, etc.
- [Open-Sora 1.0](docs/report_01.md): architecture, captioning, etc.

<details>
<summary>View more</summary>

- ✅ Improved ST-DiT architecture includes rope positional encoding, qk norm, longer text length, etc.
- ✅ Support training with any resolution, aspect ratio, and duration (including images).
- ✅ Support image and video conditioning and video editing, and thus support animating images, connecting videos, etc.
- 📍 **Open-Sora 1.0** released. Model weights are available [here](#model-weights). With only 400K video clips and 200 H800
days (compared with 152M samples in Stable Video Diffusion), we are able to generate 2s 512×512 videos. See our **[report 1.0](docs/report_01.md)** for more discussions.
- ✅ Three-stage training from an image diffusion model to a video diffusion model. We provide the weights for each
stage.
- ✅ Support training acceleration including accelerated transformer, faster T5 and VAE, and sequence parallelism.
Open-Sora improves **55%** training speed when training on 64x512x512 videos. Details locates
at [acceleration.md](docs/acceleration.md).
- 🔧 **Data preprocessing pipeline v1.0**,
including [downloading](tools/datasets/README.md), [video cutting](tools/scene_cut/README.md),
and [captioning](tools/caption/README.md) tools. Our data collection plan can be found
at [datasets.md](docs/datasets.md).
- ✅ We find VQ-VAE from [VideoGPT](https://wilson1yan.github.io/videogpt/index.html) has a low quality and thus adopt a
better VAE from [Stability-AI](https://huggingface.co/stabilityai/sd-vae-ft-mse-original). We also find patching in
the time dimension deteriorates the quality. See our **[report](docs/report_01.md)** for more discussions.
- ✅ We investigate different architectures including DiT, Latte, and our proposed STDiT. Our **STDiT** achieves a better
trade-off between quality and speed. See our **[report](docs/report_01.md)** for more discussions.
- ✅ Support clip and T5 text conditioning.
- ✅ By viewing images as one-frame videos, our project supports training DiT on both images and videos (e.g., ImageNet &
UCF101). See [commands.md](docs/commands.md) for more instructions.
- ✅ Support inference with official weights
from [DiT](https://github.com/facebookresearch/DiT), [Latte](https://github.com/Vchitect/Latte),
and [PixArt](https://pixart-alpha.github.io/).
- ✅ Refactor the codebase. See [structure.md](docs/structure.md) to learn the project structure and how to use the
config files.
📍 Since Open-Sora is under active development, we remain different branchs for different versions. The latest version is [main](https://github.com/hpcaitech/Open-Sora). Old versions include: [v1.0](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.0), [v1.1](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.1), [v1.2](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.2), [v1.3](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.3).

</details>

Since Open-Sora is under active development, we remain different branchs for different versions. The latest version is [main](https://github.com/hpcaitech/Open-Sora). Old versions include: [v1.0](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.0), [v1.1](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.1), [v1.2](https://github.com/hpcaitech/Open-Sora/tree/opensora/v1.2).

### TODO list sorted by priority
## Quickstart

<details>
<summary>View more</summary>

- [x] Training Video-VAE and adapt our model to new VAE.
- [x] Scaling model parameters and dataset size.
- [x] Incorporate a better scheduler (rectified flow).
- [x] Evaluation pipeline.
- [x] Complete the data processing pipeline (including dense optical flow, aesthetics scores, text-image similarity, etc.). See [the dataset](/docs/datasets.md) for more information
- [x] Support image and video conditioning.
- [x] Support variable aspect ratios, resolutions, durations.

</details>

## Contents

- [Installation](#installation)
- [Model Weights](#model-weights)
- [Gradio Demo](#gradio-demo)
- [Inference](#inference)
- [Data Processing](#data-processing)
- [Training](#training)
- [Evaluation](#evaluation)
- [VAE Training & Evaluation](#vae-training--evaluation)
- [Contribution](#contribution)
- [Citation](#citation)
- [Acknowledgement](#acknowledgement)

Other useful documents and links are listed below.

- Report: each version is trained from a image base seperately (not continuously trained), while a newer version will incorporate the techniques from the previous version.
- [report 1.3](docs/report_04.md): shift-window attention, unified spatial-temporal VAE, etc.
- [report 1.2](docs/report_03.md): rectified flow, 3d-VAE, score condition, evaluation, etc.
- [report 1.1](docs/report_02.md): multi-resolution/length/aspect-ratio, image/video conditioning/editing, data preprocessing, etc.
- [report 1.0](docs/report_01.md): architecture, captioning, etc.
- [acceleration.md](docs/acceleration.md)
- Repo structure: [structure.md](docs/structure.md)
- Config file explanation: [config.md](docs/config.md)
- Useful commands: [commands.md](docs/commands.md)
- Data processing pipeline and dataset: [datasets.md](docs/datasets.md)
- Each data processing tool's README: [dataset conventions and management](/tools/datasets/README.md), [scene cutting](/tools/scene_cut/README.md), [scoring](/tools/scoring/README.md), [caption](/tools/caption/README.md)
- Evaluation: [eval/README.md](/eval/README.md)
- Gallery: [gallery](https://hpcaitech.github.io/Open-Sora/)

## Installation

### Install from Source

For CUDA 12.1, you can install the dependencies with the following commands. Otherwise, please refer to [Installation Documentation](docs/installation.md) for more instructions on different cuda version, and additional dependency for data preprocessing, VAE, and model evaluation.
### Installation

```bash
# create a virtual env and activate (conda as an example)
conda create -n opensora python=3.9
conda create -n opensora python=3.10
conda activate opensora

# download the repo
git clone https://github.com/hpcaitech/Open-Sora
cd Open-Sora

# install torch, torchvision and xformers
pip install -r requirements/requirements-cu121.txt

# the default installation is for inference only
# Ensure torch >= 2.4.0
pip install -v . # for development mode, `pip install -v -e .`

# install the latest tensornvme to use async checkpoint saving
pip install git+https://github.com/hpcaitech/TensorNVMe.git

# install the latest colossalai to use the latest features
pip install git+https://github.com/hpcaitech/ColossalAI.git
```

(Optional, recommended for fast speed, especially for training) To enable `layernorm_kernel` and `flash_attn`, you need to install `apex` and `flash-attn` with the following commands.

```bash
# install flash attention
# set enable_flash_attn=False in config to disable flash attention
pip install packaging ninja
pip install xformers==0.0.27.post2 --index-url https://download.pytorch.org/whl/cu121 # install xformers according to your cuda version
pip install flash-attn --no-build-isolation

# install apex
# set enable_layernorm_kernel=False in config to disable apex
pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
```

### Use Docker

Run the following command to build a docker image from Dockerfile provided.
Optionally, you can install flash attention 3 for faster speed.

```bash
docker build -t opensora .
git clone https://github.com/Dao-AILab/flash-attention # 4f0640d5
cd flash-attention/hopper
python setup.py install
```

Run the following command to start the docker container in interactive mode.

```bash
docker run -ti --gpus all -v .:/workspace/Open-Sora opensora
```

## Model Weights

### Open-Sora 1.3 Model Weights

| Model | Model Size | Data | #iterations | Batch Size | URL |
| -------------- | ---------- | ---- | ----------- | ---------- | ------------------------------------------------------------------ |
| Diffusion | 1.1B | 60M | 70k | Dynamic | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v4) |
| Diffusion-360p | 1.1B | 60M | 70k | Dynamic | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v4-360p) |
| Diffusion-i2v | 1.1B | 60M | 70k | Dynamic | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v4-i2v) |
| VAE | 259M | 3M | 1M | 8 | [:link:](https://huggingface.co/hpcai-tech/OpenSora-VAE-v1.3) |

See our **[report 1.3](docs/report_04.md)** for more infomation. Weight will be automatically downloaded when you run the inference script.

> For users from mainland China, try `export HF_ENDPOINT=https://hf-mirror.com` to successfully download the weights.

### Open-Sora 1.2 Model Weights

<details>
<summary>View more</summary>

| Model | Model Size | Data | #iterations | Batch Size | URL |
| --------- | ---------- | ---- | ----------- | ---------- | ------------------------------------------------------------- |
| Diffusion | 1.1B | 30M | 70k | Dynamic | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v3) |
| VAE | 384M | 3M | 1M | 8 | [:link:](https://huggingface.co/hpcai-tech/OpenSora-VAE-v1.2) |

See our **[report 1.2](docs/report_03.md)** for more information. Weight will be automatically downloaded when you run the inference script.

> For users from mainland China, try `export HF_ENDPOINT=https://hf-mirror.com` to successfully download the weights.
</details>

### Open-Sora 1.1 Model Weights
### Model Download

<details>
<summary>View more</summary>

| Resolution | Model Size | Data | #iterations | Batch Size | URL |
| ------------------ | ---------- | -------------------------- | ----------- | ------------------------------------------------- | -------------------------------------------------------------------- |
| mainly 144p & 240p | 700M | 10M videos + 2M images | 100k | [dynamic](/configs/opensora-v1-1/train/stage2.py) | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v2-stage2) |
| 144p to 720p | 700M | 500K HQ videos + 1M images | 4k | [dynamic](/configs/opensora-v1-1/train/stage3.py) | [:link:](https://huggingface.co/hpcai-tech/OpenSora-STDiT-v2-stage3) |

See our **[report 1.1](docs/report_02.md)** for more information.

:warning: **LIMITATION**: This version contains known issues which we are going to fix in the next version (as we save computation resource for the next release). In addition, the video generation may fail for long duration, and high resolution will have noisy results due to this problem.

</details>

### Open-Sora 1.0 Model Weights

<details>
<summary>View more</summary>

| Resolution | Model Size | Data | #iterations | Batch Size | GPU days (H800) | URL |
| ---------- | ---------- | ------ | ----------- | ---------- | --------------- | --------------------------------------------------------------------------------------------- |
| 16×512×512 | 700M | 20K HQ | 20k | 2×64 | 35 | [:link:](https://huggingface.co/hpcai-tech/Open-Sora/blob/main/OpenSora-v1-HQ-16x512x512.pth) |
| 16×256×256 | 700M | 20K HQ | 24k | 8×64 | 45 | [:link:](https://huggingface.co/hpcai-tech/Open-Sora/blob/main/OpenSora-v1-HQ-16x256x256.pth) |
| 16×256×256 | 700M | 366K | 80k | 8×64 | 117 | [:link:](https://huggingface.co/hpcai-tech/Open-Sora/blob/main/OpenSora-v1-16x256x256.pth) |

Training orders: 16x256x256 $\rightarrow$ 16x256x256 HQ $\rightarrow$ 16x512x512 HQ.

Our model's weight is partially initialized from [PixArt-α](https://github.com/PixArt-alpha/PixArt-alpha). The number of
parameters is 724M. More information about training can be found in our **[report](/docs/report_01.md)**. More about
the dataset can be found in [datasets.md](/docs/datasets.md). HQ means high quality.
Our 11B model supports 256px and 768px resolution. Both T2V and I2V are supported by one model. 🤗 [Huggingface](https://huggingface.co/hpcai-tech/Open-Sora-v2) 🤖 [ModelScope](https://modelscope.cn/models/luchentech/Open-Sora-v2).

:warning: **LIMITATION**: Our model is trained on a limited budget. The quality and text alignment is relatively poor.
The model performs badly, especially on generating human beings and cannot follow detailed instructions. We are working
on improving the quality and text alignment.

</details>

## Gradio Demo

🔥 You can experience Open-Sora on our [🤗 Gradio application](https://huggingface.co/spaces/hpcai-tech/open-sora) on Hugging Face online.

### Local Deployment

If you want to deploy gradio locally, we have also provided a [Gradio application](./gradio) in this repository, you can use the following the command to start an interactive web application to experience video generation with Open-Sora.
Download from huggingface:

```bash
pip install gradio spaces
python gradio/app.py
pip install "huggingface_hub[cli]"
huggingface-cli download hpcai-tech/Open-Sora-v2 --local-dir ./ckpts
```

This will launch a Gradio application on your localhost. If you want to know more about the Gradio application, you can refer to the [Gradio README](./gradio/README.md).

To enable prompt enhancement and other language input (e.g., 中文输入), you need to set the `OPENAI_API_KEY` in the environment. Check [OpenAI's documentation](https://platform.openai.com/docs/quickstart) to get your API key.
Download from ModelScope:

```bash
export OPENAI_API_KEY=YOUR_API_KEY
pip install modelscope
modelscope download hpcai-tech/Open-Sora-v2 --local_dir ./ckpts
```

### Getting Started

In the Gradio application, the basic options are as follows:

![Gradio Demo](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/readme/gradio_basic.png)

The easiest way to generate a video is to input a text prompt and click the "**Generate video**" button (scroll down if you cannot find). The generated video will be displayed in the right panel. Checking the "**Enhance prompt with GPT4o**" will use GPT-4o to refine the prompt, while "**Random Prompt**" button will generate a random prompt by GPT-4o for you. Due to the OpenAI's API limit, the prompt refinement result has some randomness.

Then, you can choose the **resolution**, **duration**, and **aspect ratio** of the generated video. Different resolution and video length will affect the video generation speed. On a 140G H200 GPU, the generation speed (with `num_sampling_step=30`) and peak memory usage is:
### Text-to-Video Generation

| | Image | 49 Frames | 65 Frames | 81 Frames | 97 Frames | 113 Frames |
| ---- | -------- | --------- | --------- | --------- | --------- | ---------- |
| 360p | 10s, 23G | 16s, 23G | 21s, 24G | 27s, 24G | 33s, 25G | 39s, 25G |
| 720p | 13s, 24G | 61s, 28G | 83s, 29G | 106s, 31G | 130s, 33G | 157s, 35G |

Note that besides text to video, you can also use **image to video generation**. You can upload an image and then click the "**Generate video**" button to generate a video with the image as the first frame. Or you can fill in the text prompt and click the "**Generate image**" button to generate an image with the text prompt, and then click the "**Generate video**" button to generate a video with the image generated with the same model.

![Gradio Demo](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/readme/gradio_option.png)

Then you can specify more options, including "**Motion Strength**", "**Aesthetic**" and "**Camera Motion**". If "Enable" not checked or the choice is "none", the information is not passed to the model. Otherwise, the model will generate videos with the specified motion strength, aesthetic score, and camera motion.

For the **aesthetic score**, we recommend using values 'excellent'. For **motion strength**, a smaller value will lead to a smoother but less dynamic video, while a larger value will lead to a more dynamic but likely more blurry video. Thus, you can try without it and then adjust it according to the generated video. For the **camera motion**, sometimes the model cannot follow the instruction well, and we are working on improving it.

You can also adjust the "**Sampling steps**", this is directly related to the generation speed as it is the number of denoising. A number smaller than 30 usually leads to a poor generation results, while a number larger than 100 usually has no significant improvement. The "**Seed**" is used for reproducibility, you can set it to a fixed number to generate the same video. The "**CFG Scale**" controls how much the model follows the text prompt, a smaller value will lead to a more random video, while a larger value will lead to a more text-following video (7 is recommended).

For more advanced usage, you can refer to [Gradio README](./gradio/README.md#advanced-usage).

## Inference

### Open-Sora 1.3 Command Line Inference

The basic command line inference is as follows:
Our model is optimized for image-to-video generation, but it can also be used for text-to-video generation. To generate high quality videos, with the help of flux text-to-image model, we build a text-to-image-to-video pipeline. For 256x256 resolution:

```bash
# text to video
python scripts/inference.py configs/opensora-v1-3/inference/t2v.py \
--num-frames 97 --resolution 720p --aspect-ratio 9:16 \
--prompt "a beautiful waterfall"
```

You can add more options to the command line to customize the generation.
# Generate one given prompt
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir samples --prompt "raining, sea"

```bash
python scripts/inference.py configs/opensora-v1-3/inference/t2v.py \
--num-frames 97 --resolution 720p --aspect-ratio 9:16 --num-sample 1 \
--aes "very good" --flow "fair" \
--prompt "a beautiful waterfall"
# Generation with csv
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir samples --dataset.data-path assets/texts/example.csv
```

For image to video generation and other functionalities, see [here](docs/commands.md) for more instructions.

### Sequence Parallelism Inference

To enable sequence parallelism, you need to use `torchrun` to run the inference script. The following command will run the inference with 2 GPUs.
For 768x768 resolution:

```bash
# text to video
CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 scripts/inference.py configs/opensora-v1-3/inference/t2v.py \
--num-frames 97 --resolution 720p --aspect-ratio 9:16 \
--prompt "a beautiful waterfall"
```

### GPT-4o Prompt Refinement

We find that GPT-4o can refine the prompt and improve the quality of the generated video. With this feature, you can also use other language (e.g., Chinese) as the prompt. To enable this feature, you need prepare your openai api key in the environment:
# One GPU
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_768px.py --save-dir samples --prompt "raining, sea"

```bash
export OPENAI_API_KEY=YOUR_API_KEY
# Multi-GPU with colossalai sp
torchrun --nproc_per_node 8 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_768px.py --save-dir samples --prompt "raining, sea"
```

Then you can inference with `--llm-refine True` to enable the GPT-4o prompt refinement, or leave prompt empty to get a random prompt generated by GPT-4o.
You can adjust the generation aspect ratio by `--aspect_ratio` and the generation length by `--num_frames`. Candidate values for aspect_ratio includes `16:9`, `9:16`, `1:1`, `2.39:1`. Candidate values for num_frames should be `4k+1` and less than 129.

```bash
python scripts/inference.py configs/opensora-v1-3/inference/t2v.py \
--num-frames 97 --resolution 720p --llm-refine True \
--prompt "a beautiful waterfall"
```

### Open-Sora 1.2 Command Line Inference

<details>
<summary>View more</summary>

The basic command line inference is as follows:

```bash
# text to video
python scripts/inference.py configs/opensora-v1-2/inference/sample.py \
--num-frames 4s --resolution 720p --aspect-ratio 9:16 \
--prompt "a beautiful waterfall"
```

You can add more options to the command line to customize the generation.
You can also run direct text-to-video by:

```bash
python scripts/inference.py configs/opensora-v1-2/inference/sample.py \
--num-frames 4s --resolution 720p --aspect-ratio 9:16 \
--num-sampling-steps 30 --flow 5 --aes 6.5 \
--prompt "a beautiful waterfall"
# One GPU for 256px
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/256px.py --prompt "raining, sea"
# Multi-GPU for 768px
torchrun --nproc_per_node 8 --standalone scripts/diffusion/inference.py configs/diffusion/inference/768px.py --prompt "raining, sea"
```

For image to video generation and other functionalities, the API is compatible with Open-Sora 1.1. See [here](docs/commands.md) for more instructions.
### Image-to-Video Generation

If your installation do not contain `apex` and `flash-attn`, you need to disable them in the config file, or via the folowing command.
Given a prompt and a reference image, you can generate a video with the following command:

```bash
python scripts/inference.py configs/opensora-v1-2/inference/sample.py \
--num-frames 4s --resolution 720p \
--layernorm-kernel False --flash-attn False \
--prompt "a beautiful waterfall"
```

### Sequence Parallelism Inference
# 256px
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/256px.py --cond_type i2v_head --prompt "A plump pig wallows in a muddy pond on a rustic farm, its pink snout poking out as it snorts contentedly. The camera captures the pig's playful splashes, sending ripples through the water under the midday sun. Wooden fences and a red barn stand in the background, framed by rolling green hills. The pig's muddy coat glistens in the sunlight, showcasing the simple pleasures of its carefree life." --ref assets/texts/i2v.png

To enable sequence parallelism, you need to use `torchrun` to run the inference script. The following command will run the inference with 2 GPUs.
# 256px with csv
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/256px.py --cond_type i2v_head --dataset.data-path assets/texts/i2v.csv

```bash
# text to video
CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 scripts/inference.py configs/opensora-v1-2/inference/sample.py \
--num-frames 4s --resolution 720p --aspect-ratio 9:16 \
--prompt "a beautiful waterfall"
# Multi-GPU 768px
torchrun --nproc_per_node 8 --standalone scripts/diffusion/inference.py configs/diffusion/inference/768px.py --cond_type i2v_head --dataset.data-path assets/texts/i2v.csv
```

:warning: **LIMITATION**: The sequence parallelism is not supported for gradio deployment. For now, the sequence parallelism is only supported when the dimension can be divided by the number of GPUs. Thus, it may fail for some cases. We tested 4 GPUs for 720p and 2 GPUs for 480p.
## Advanced Usage

### GPT-4o Prompt Refinement
### Motion Score

We find that GPT-4o can refine the prompt and improve the quality of the generated video. With this feature, you can also use other language (e.g., Chinese) as the prompt. To enable this feature, you need prepare your openai api key in the environment:
During training, we provide motion score into the text prompt. During inference, you can use the following command to generate videos with motion score (the default score is 4):

```bash
export OPENAI_API_KEY=YOUR_API_KEY
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir samples --prompt "raining, sea" --motion-score 4
```

Then you can inference with `--llm-refine True` to enable the GPT-4o prompt refinement, or leave prompt empty to get a random prompt generated by GPT-4o.
We also provide a dynamic motion score evaluator. After setting your OpenAI API key, you can use the following command to evaluate the motion score of a video:

```bash
python scripts/inference.py configs/opensora-v1-2/inference/sample.py \
--num-frames 4s --resolution 720p --llm-refine True
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir samples --prompt "raining, sea" --motion-score dynamic
```

</details>
### Open-Sora 1.1 Command Line Inference
| Score | 1 | 4 | 7 |
| ----- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- |
| | <img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/motion_score_1.gif" width=""> | <img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/motion_score_4.gif" width=""> | <img src="https://github.com/hpcaitech/Open-Sora-Demo/blob/main/demo/v2.0/motion_score_7.gif" width=""> |

<details>
<summary>View more</summary>
### Prompt Refine

Since Open-Sora 1.1 supports inference with dynamic input size, you can pass the input size as an argument.
We take advantage of ChatGPT to refine the prompt. You can use the following command to refine the prompt. The function is available for both text-to-video and image-to-video generation.

```bash
# text to video
python scripts/inference.py configs/opensora-v1-1/inference/sample.py --prompt "A beautiful sunset over the city" --num-frames 32 --image-size 480 854
export OPENAI_API_KEY=sk-xxxx
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir samples --prompt "raining, sea" --refine-prompt True
```

If your installation do not contain `apex` and `flash-attn`, you need to disable them in the config file, or via the folowing command.
### Reproductivity

```bash
python scripts/inference.py configs/opensora-v1-1/inference/sample.py --prompt "A beautiful sunset over the city" --num-frames 32 --image-size 480 854 --layernorm-kernel False --flash-attn False
```

See [here](docs/commands.md#inference-with-open-sora-11) for more instructions including text-to-image, image-to-video, video-to-video, and infinite time generation.

</details>

### Open-Sora 1.0 Command Line Inference

<details>
<summary>View more</summary>

We have also provided an offline inference script. Run the following commands to generate samples, the required model weights will be automatically downloaded. To change sampling prompts, modify the txt file passed to `--prompt-path`. See [here](docs/structure.md#inference-config-demos) to customize the configuration.
To make the results reproducible, you can set the random seed by:

```bash
# Sample 16x512x512 (20s/sample, 100 time steps, 24 GB memory)
torchrun --standalone --nproc_per_node 1 scripts/inference.py configs/opensora/inference/16x512x512.py --ckpt-path OpenSora-v1-HQ-16x512x512.pth --prompt-path ./assets/texts/t2v_samples.txt

# Sample 16x256x256 (5s/sample, 100 time steps, 22 GB memory)
torchrun --standalone --nproc_per_node 1 scripts/inference.py configs/opensora/inference/16x256x256.py --ckpt-path OpenSora-v1-HQ-16x256x256.pth --prompt-path ./assets/texts/t2v_samples.txt

# Sample 64x512x512 (40s/sample, 100 time steps)
torchrun --standalone --nproc_per_node 1 scripts/inference.py configs/opensora/inference/64x512x512.py --ckpt-path ./path/to/your/ckpt.pth --prompt-path ./assets/texts/t2v_samples.txt

# Sample 64x512x512 with sequence parallelism (30s/sample, 100 time steps)
# sequence parallelism is enabled automatically when nproc_per_node is larger than 1
torchrun --standalone --nproc_per_node 2 scripts/inference.py configs/opensora/inference/64x512x512.py --ckpt-path ./path/to/your/ckpt.pth --prompt-path ./assets/texts/t2v_samples.txt
```

The speed is tested on H800 GPUs. For inference with other models, see [here](docs/commands.md) for more instructions.
To lower the memory usage, set a smaller `vae.micro_batch_size` in the config (slightly lower sampling speed).

</details>

## Data Processing

High-quality data is crucial for training good generation models.
To this end, we establish a complete pipeline for data processing, which could seamlessly convert raw videos to high-quality video-text pairs.
The pipeline is shown below. For detailed information, please refer to [data processing](docs/data_processing.md).
Also check out the [datasets](docs/datasets.md) we use.

![Data Processing Pipeline](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/readme/report_data_pipeline.png)

## Training

### Open-Sora 1.3 Training

```bash
# one node
torchrun --standalone --nproc_per_node 8 scripts/train.py \
configs/opensora-v1-3/train/stage1.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
# multiple nodes
colossalai run --nproc_per_node 8 --hostfile hostfile scripts/train.py \
configs/opensora-v1-3/train/stage1.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir samples --prompt "raining, sea" --sampling_option.seed 42 --seed 42
```

### Open-Sora 1.2 Training

<details>
<summary>View more</summary>
Use `--num-sample k` to generate `k` samples for each prompt.

The training process is same as Open-Sora 1.1.
## Computational Efficiency

```bash
# If you use async checkpoint saving, and you want to validate the integrity of checkpoints, you can use the following command
# Then there will be a `async_file_io.log` in checkpoint directory. If the number of lines of the log file is not equal to the number of checkpoints (.safetensors files), there may be some errors.
export TENSORNVME_DEBUG=1
# one node
torchrun --standalone --nproc_per_node 8 scripts/train.py \
configs/opensora-v1-2/train/stage1.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
# multiple nodes
colossalai run --nproc_per_node 8 --hostfile hostfile scripts/train.py \
configs/opensora-v1-2/train/stage1.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
```
We test the computational efficiency of text-to-video on H100/H800 GPU. For 256x256, we use colossalai's tensor parallelism. For 768x768, we use colossalai's sequence parallelism. All use number of steps 50. The results are presented in the format: $\color{blue}{\text{Total time (s)}}/\color{red}{\text{peak GPU memory (GB)}}$

</details>

### Open-Sora 1.1 Training

<details>
<summary>View more</summary>

Once you prepare the data in a `csv` file, run the following commands to launch training on a single node.

```bash
# one node
torchrun --standalone --nproc_per_node 8 scripts/train.py \
configs/opensora-v1-1/train/stage1.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
# multiple nodes
colossalai run --nproc_per_node 8 --hostfile hostfile scripts/train.py \
configs/opensora-v1-1/train/stage1.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
```

</details>

### Open-Sora 1.0 Training

<details>
<summary>View more</summary>

Once you prepare the data in a `csv` file, run the following commands to launch training on a single node.

```bash
# 1 GPU, 16x256x256
torchrun --nnodes=1 --nproc_per_node=1 scripts/train.py configs/opensora/train/16x256x256.py --data-path YOUR_CSV_PATH
# 8 GPUs, 64x512x512
torchrun --nnodes=1 --nproc_per_node=8 scripts/train.py configs/opensora/train/64x512x512.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
```

To launch training on multiple nodes, prepare a hostfile according
to [ColossalAI](https://colossalai.org/docs/basics/launch_colossalai/#launch-with-colossal-ai-cli), and run the
following commands.

```bash
colossalai run --nproc_per_node 8 --hostfile hostfile scripts/train.py configs/opensora/train/64x512x512.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT
```

For training other models and advanced usage, see [here](docs/commands.md) for more instructions.

</details>
| Resolution | 1x GPU | 2x GPUs | 4x GPUs | 8x GPUs |
| ---------- | -------------------------------------- | ------------------------------------- | ------------------------------------- | ------------------------------------- |
| 256x256 | $\color{blue}{60}/\color{red}{52.5}$ | $\color{blue}{40}/\color{red}{44.3}$ | $\color{blue}{34}/\color{red}{44.3}$ | |
| 768x768 | $\color{blue}{1656}/\color{red}{60.3}$ | $\color{blue}{863}/\color{red}{48.3}$ | $\color{blue}{466}/\color{red}{44.3}$ | $\color{blue}{276}/\color{red}{44.3}$ |

## Evaluation

We support evaluation based on:

- Validation loss
- [VBench](https://github.com/Vchitect/VBench/tree/master) score
- VBench-i2v score
- Batch generation for human evaluation

All the evaluation code is released in `eval` folder. Check the [README](/eval/README.md) for more details. Our [report](/docs/report_04.md#evaluation) also provides more information about the evaluation during training. The following table shows Open-Sora 1.3 greatly improves Open-Sora 1.2.

| Model | Total Score | Quality Score | Semantic Score |
| -------------- | ----------- | ------------- | -------------- |
| Open-Sora V1.0 | 75.91% | 78.81% | 64.28% |
| Open-Sora V1.2 | 79.23% | 80.71% | 73.30% |

## VAE Training & Evaluation
On [VBench](https://huggingface.co/spaces/Vchitect/VBench_Leaderboard), Open-Sora 2.0 significantly narrows the gap with OpenAI’s Sora, reducing it from 4.52% → 0.69% compared to Open-Sora 1.2.

### Open-Sora 1.3 VAE Training
![VBench](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/readme/v2_vbench.png)

We implement and train a unified spatial-temporal VAE for better compress videos as well as preserve details. For more detailes, refer to [Open-Sora 1.3 VAE Documentation](docs/vae_v1_3.md). Before you run the following commands, follow our [Installation Documentation](docs/installation.md) to install the required dependencies for VAE and Evaluation.
Human preference results show our model is on par with HunyuanVideo 14B and Step-Video 30B.

```bash
## Train VAE with 8 GPUs
colossalai run --hostfile hostfile --nproc_per_node 8 scripts/train_opensoravae_v1_3.py configs/vae_v1_3/train/video_16z.py --data-path YOUR_CSV_PATH --ckpt-path YOUR_PRETRAINED_CKPT --wandb True > logs/train_opensoravae_v1_3.log 2>&1 &
```

### Evaluate VAE performance

You need to run VAE inference first to generate the videos, then calculate scores on the generated videos:

```bash
VID_PATH=YOUR_CSV_PATH CUDA_VISIBLE_DEVICES=0 bash eval/vae/launch.sh pretrained_models/OpenSoraVAE_V1_3/model.pt
```

### Open-Sora 1.2 VAE Training

<details>
<summary>View more</summary>

We train a VAE pipeline that consists of a spatial VAE followed by a temporal VAE.
For more details, refer to [VAE Documentation](docs/vae.md).
Before you run the following commands, follow our [Installation Documentation](docs/installation.md) to install the required dependencies for VAE and Evaluation.

If you want to train your own VAE, we need to prepare data in the csv following the [data processing](#data-processing) pipeline, then run the following commands.
Note that you need to adjust the number of trained epochs (`epochs`) in the config file accordingly with respect to your own csv data size.

```bash
# stage 1 training, 380k steps, 8 GPUs
torchrun --nnodes=1 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage1.py --data-path YOUR_CSV_PATH
# stage 2 training, 260k steps, 8 GPUs
torchrun --nnodes=1 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage2.py --data-path YOUR_CSV_PATH
# stage 3 training, 540k steps, 24 GPUs
torchrun --nnodes=3 --nproc_per_node=8 scripts/train_vae.py configs/vae/train/stage3.py --data-path YOUR_CSV_PATH
```

To evaluate the VAE performance, you need to run VAE inference first to generate the videos, then calculate scores on the generated videos:

```bash
# video generation
torchrun --standalone --nnodes=1 --nproc_per_node=1 scripts/inference_vae.py configs/vae/inference/video.py --ckpt-path YOUR_VAE_CKPT_PATH --data-path YOUR_CSV_PATH --save-dir YOUR_VIDEO_DIR
# the original videos will be saved to `YOUR_VIDEO_DIR_ori`
# the reconstructed videos through the pipeline will be saved to `YOUR_VIDEO_DIR_rec`
# the reconstructed videos through the spatial VAE only will be saved to `YOUR_VIDEO_DIR_spatial`

# score calculation
python eval/vae/eval_common_metric.py --batch_size 2 --real_video_dir YOUR_VIDEO_DIR_ori --generated_video_dir YOUR_VIDEO_DIR_rec --device cuda --sample_fps 24 --crop_size 256 --resolution 256 --num_frames 17 --sample_rate 1 --metric ssim psnr lpips flolpips
```

</details>
![Win Rate](https://github.com/hpcaitech/Open-Sora-Demo/blob/main/readme/v2_winrate.png)

## Contribution

@@ -711,16 +309,17 @@ Here we only list a few of the projects. For other works and datasets, please re
- [OpenDiT](https://github.com/NUS-HPC-AI-Lab/OpenDiT): An acceleration for DiT training. We adopt valuable acceleration
strategies for training progress from OpenDiT.
- [PixArt](https://github.com/PixArt-alpha/PixArt-alpha): An open-source DiT-based text-to-image model.
- [Flux](https://github.com/black-forest-labs/flux): A powerful text-to-image generation model.
- [Latte](https://github.com/Vchitect/Latte): An attempt to efficiently train DiT for video.
- [HunyuanVideo](https://github.com/Tencent/HunyuanVideo/tree/main?tab=readme-ov-file): Open-Source text-to-video model.
- [StabilityAI VAE](https://huggingface.co/stabilityai/sd-vae-ft-mse-original): A powerful image VAE model.
- [DC-AE](https://github.com/mit-han-lab/efficientvit): Deep Compression AutoEncoder for image compression.
- [CLIP](https://github.com/openai/CLIP): A powerful text-image embedding model.
- [T5](https://github.com/google-research/text-to-text-transfer-transformer): A powerful text encoder.
- [LLaVA](https://github.com/haotian-liu/LLaVA): A powerful image captioning model based on [Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) and [Yi-34B](https://huggingface.co/01-ai/Yi-34B).
- [PLLaVA](https://github.com/magic-research/PLLaVA): A powerful video captioning model.
- [MiraData](https://github.com/mira-space/MiraData): A large-scale video dataset with long durations and structured caption.

We are grateful for their exceptional work and generous contribution to open source. Special thanks go to the authors of [MiraData](https://github.com/mira-space/MiraData) and [Rectified Flow](https://github.com/gnobitab/RectifiedFlow) for their valuable advice and help. We wish to express gratitude towards AK for sharing this project on social media and Hugging Face for providing free GPU resources for our online Gradio demo.

## Citation

```bibtex


BIN
assets/images/watermark/watermark.png View File

Before After
Width: 1448  |  Height: 259  |  Size: 34 KiB

+ 0
- 800
assets/texts/VBench/all_category.txt View File

@@ -1,800 +0,0 @@
a black dog wearing halloween costume
spider making a web
bat eating fruits while hanging
a snake crawling on a wooden flooring
a close up video of a dragonfly
macro shot of ladybug on green leaf plant
chameleon eating ant
a bee feeding on nectars
bird nests on a tree captured with moving camera
a squirrel eating nuts
close up video of snail
top view of a hermit crab crawling on a wooden surface
cat licking another cat
red dragonfly perched on green leaf
close up view of a brown caterpillar crawling on green leaf
ants eating dead spider
an eagle on a tree branch
a frog eating an ant
white rabbit near the fence
a gorilla eating a carrot
close up of wolf
a meerkat looking around
a hyena in a zoo
lemur eating grass leaves
an owl being trained by a man
a lizard on a bamboo
brown chicken hunting for its food
video of parrots perched on bird stand
underwater footage of an octopus in a coral reef
a cute pomeranian dog playing with a soccer ball
white fox on rock
close up footage of a horse figurine
giraffe feeding on a tree in a savannah
curious cat sitting and looking around
hummingbird hawk moth flying near pink flowers
close up of a scorpion on a rock
close up on fish in net
koala eating leaves from a branch
a pod of dolphins swirling in the sea catching forage fish
low angle view of a hawk perched on a tree branch
a lion standing on wild grass
deer grazing in the field
elephant herd in a savanna
close up on lobster under water
hedgehog crossing road in forest
a sheep eating yellow flowers from behind a wire fence
twin sisters and a turtle
a pig wallowing in mud
flock of goose eating on the lake water
cow in a field irritated with flies
a close up shot of a fly
cheetah lying on the grass
close up of a lemur
close up shot of a kangaroo itching in the sand
a tortoise covered with algae
turkey in cage
a great blue heron bird in the lakeside
crab with shell in aquarium
a seagull walking on shore
an american crocodile
a tiger walking inside a cage
alligator in the nature
a raccoon climbing a tree
wild rabbit in a green meadow
group of ring tailed lemurs
a clouded leopard on a tree branch
duck grooming its feathers
an african penguin walking on a beach
a video of a peacock
close up shot of a wild bear
baby rhino plays with mom
porcupine climbs tree branches
close up of a natterjack toad on a rock
a sleeping orangutan
mother whale swimming with babies
a bear wearing red jersey
pink jellyfish swimming underwater in a blue sea
beautiful clown fish swimming
animation of disposable objects shaped as a whale
paper cut out of a pair of hands a whale and a heart
vertical video of camel roaming in the field during daytime
a still video of mosquito biting human
a curious sloth hanging from a tree branch
a plastic flamingo bird stumbles from the wind
a wolf in its natural habitat
a monkey sitting in the stone and scratching his head
bat hanging upside down
a red panda eating leaves
snake on ground
a harbour seal swimming near the shore
shark swimming in the sea
otter on branch while eating
goat standing over a rock
a troop of monkey on top of a mountain
a zebra eating grass on the field
a colorful butterfly perching on a bud
a snail crawling on a leaf
zookeeper showering a baby elephant
a beetle emerging from the sand
a nine banded armadillo searching for food
an apartment building with balcony
asian garden and medieval castle
illuminated tower in berlin
a wooden house overseeing the lake
a crowd of people in a plaza in front of a government building
a church interior
jewish friends posing with hanukkah menorah in a cabin house
a destroyed building after a missile attack in ukraine
abandoned building in the woods
drone video of an abandoned school building in pripyat ukraine
elegant university building
architecture and designs of buildings in central london
a pancake tower with chocolate syrup and strawberries on top
an ancient white building
friends hanging out at a coffee house
house front door with christmas decorations
city night dark building
a bird house hanging on a tree branch
sacred sculpture in a temple
high angle shot of a clock tower
modern wooden house interior
the interior of an abandoned building
opera house overlooking sea
a concrete structure near the green trees
dome like building in scotland
low angle shot of a building
tower on hill
a miniature house
eiffel tower from the seine river
low angle footage of an apartment building
island with pier and antique building
asian historic architecture
drone footage of a beautiful mansion
mosque in the middle east
building a tent and hammock in the forest camping site
top view of a high rise building
house covered in snow
skyscraper at night
house in village
a casino with people outside the building
silhouette of a building
a woman climbing a tree house
drone view of house near lake during golden hour
an under construction concrete house
a watch tower by the sea
exterior view of arabic style building
video of a hotel building
red paper lantern decorations hanging outside a building
house on seashore
aerial footage of the palace of culture and science building in warsaw poland
aerial video of stuttgart tv tower in germany
aerial view of the highway and building in a city
drone shot of a skyscraper san francisco california usa
waterfall and house
view of the sky through a building
drone footage of a house on top of the mountain
abandoned house in the nature
clouds hovering over a mansion
light house on the ocean
buddhist temple at sunrise
people walking by a graveyard near a mosque at sunset
view of lifeguard tower on the beach
scenic view of a house in the mountains
the landscape in front of a government building
aerial footage of a building and its surrounding landscape in winter
time lapse of a cloudy sky behind a transmission tower
blue ocean near the brown castle
fog over temple
house in countryside top view
building under construction
turkish flag waving on old tower
the georgian building
close up shot of a steel structure
the atrium and interior design of a multi floor building
city view reflected on a glass building
aerial view of a luxurious house with pool
an unpaved road leading to the house
drone footage of a lookout tower in mountain landscape
wind turbines on hill behind building
time lapse footage of the sun light in front of a small house porch
a building built with lots of stairways
overcast over house on seashore
the view of the sydney opera house from the other side of the harbor
candle on a jar and a house figurine on a surface
video of a farm and house
a dilapidated building made of bricks
a view of a unique building from a moving vehicle
aerial footage of a tall building in cambodia
push in shot of a huge house
a beach house built over a seawall protected from the sea waves
exotic house surrounded by trees
drone video of a house surrounded by tropical vegetation
drone footage of a building beside a pond
observation tower on hill in forest
a tree house in the woods
a video of vessel structure during daytime
fire in front of illuminated building at night
a footage of a wooden house on a wheat field
tilt shot of a solar panel below a light tower
water tower on the desert
freshly baked finger looking cookies
video of fake blood in wine glass
halloween food art
a person slicing a vegetable
a serving of pumpkin dish in a plate
close up view of green leafy vegetable
a birthday cake in the plate
video of a slice papaya fruit
a muffin with a burning candle and a love sign by a ceramic mug
a jack o lantern designed cookie
baked bread with chocolate
a broccoli soup on wooden table
a freshly brewed coffee on a pink mug
grabbing sourdough neapolitan style pizza slices
person cooking mushrooms in frying pan
rice grains placed on a reusable cloth bag
slices of kiwi fruit
grilling a steak on a pan grill
close up of bread popping out of a toaster
man eating noodle
preparing a cocktail drink
close up pasta with bacon on plate
milk and cinnamon rolls
boy getting a dumpling using chopsticks
a mother preparing food with her kids
man using his phone while eating
fresh salmon salad on a plate
cutting cucumbers into long thin slices as ingredient for sushi roll
a steaming cup of tea by the window
a glass filled with beer
a kid eating popcorn while watching tv
close up shot of fried fish on the plate
a man eating a donut
person making a vegetarian dish
spreading cheese on bagel
close up view of a man drinking red wine
a couple having breakfast in a restaurant
a student eating her sandwich
girl peeling a banana
red rice in a small bowl
pancake with blueberry on the top
green apple fruit on white wooden table
a man eating a taco by the bar
making of a burrito
squeezing lemon into salad
a chef cutting sushi rolls
video of a delicious dessert
deep frying a crab on a wok in high fire
close up video of a orange juice
video of a cooked chicken breast
woman holding a pineapple
a woman eating a bar of chocolate
decorating christmas cookie
squeezing a slice of fruit
tuna sashimi on a plate
a strawberry fruit mixed in an alcoholic drink
preparing hot dogs in a grill
a woman cutting a tomato
an orange fruit cut in half
a coconut fruit with drinking straw
woman holding a dragon fruit
a woman pouring hot beverage on a cup
waffles with whipped cream and fruit
focus shot of an insect at the bottom of a fruit
preparing a healthy broccoli dish
man eating snack at picnic
close up video of a grilled shrimp skewer
a woman mixing a smoothie drinks
close up video of woman having a bite of jelly
businessman drinking whiskey at the bar counter of a hotel lounge
cutting an onion with a knife over a wooden chopping board
fresh lemonade in bottles
grilling a meat on a charcoal grill
people enjoying asian cuisine
close up footage of a hot dish on a clay pot
pork ribs dish
waffle with strawberry and syrup for breakfast
tofu dish with rose garnish
uncooked pork meat
egg yolk being dumped over gourmet dish
tasty brunch dish close up
little boy pretending to eat the watermelon
slicing roasted beef
close up of a chef adding teriyaki sauce to a dish
flat lay mexican dish
a person placing an octopus dish on a marble surface
close up of tea leaves brewing in a glass kettle
adding fresh herbs to soup dish
a scoop of roasted coffee beans
fresh dim sum set up on a bamboo steam tray for cooking
a girl putting ketchup on food at the kitchen
cooking on electric stove
a woman with a slice of a pie
grapes and wine on a wooden board
man taking picture of his food
hamburger and fries on restaurant table
close up video of japanese food
a cracker sandwich with cheese filling for snack
barista preparing matcha tea
close up of onion rings being deep fried
people carving a pumpkin
people sitting on a sofa
a man with a muertos face painting
man walking in the dark
men in front of their computer editing photos
men loading christmas tree on tow truck
woman washing the dishes
woman adding honey to the cinnamon rolls
two women kissing and smiling
three women looking at watercolor paintings
a family wearing paper bag masks
a family posing for the camera
a boy covering a rose flower with a dome glass
boy sitting on grass petting a dog
a girl in her tennis sportswear
a girl coloring the cardboard
silhouette of the couple during sunset
couple dancing with body paint
a child playing with water
a woman with her child sitting on a couch in the living room
a group of friend place doing hand gestures of agreement
friends having a group selfie
friends talking while on the basketball court
group of people protesting
a group of campers with a cute dog
a group of photographers taking pictures at the north western gardens in llandudno north wales
a group of students laughing and talking
a group of martial artist warming up
a person playing golf
a person walking on a wet wooden bridge
person doing a leg exercise
ice hockey athlete on rink
a young athlete training in swimming
chess player dusting a chessboard
baseball player holding his bat
a bearded man putting a vinyl record on a vinyl player
an orchestra finishes a performance
people applauding the performance of the kids
band performance at the recording studio
father and his children playing jenga game
people playing a board game
man playing a video game
a man video recording the movie in theater
man and a woman eating while watching a movie
movie crew talking together
a director explaining the movie scene
man and woman listening to music on car
man playing music
couple dancing slow dance with sun glare
a ballerina practicing in the dance studio
father and son holding hands
father and daughter talking together
a mother and her kids engaged in a video call
mother and daughter reading a book together
a mother teaching her daughter playing a violin
kid in a halloween costume
a happy kid playing the ukulele
a chef slicing a cucumber
chef wearing his gloves properly
brother and sister using hammock
girl applying sunblock to her brother
a girl pushing the chair while her sister is on the chair
colleagues talking in office building
fighter practice kicking
a woman fighter in her cosplay costume
an engineer holding blueprints while talking with her colleague
a young woman looking at vr controllers with her friend
workmates teasing a colleague in the work
a male police officer talking on the radio
teacher holding a marker while talking
teacher writing on her notebook
a young student attending her online classes
a student showing his classmates his wand
a male vendor selling fruits
a shirtless male climber
a sound engineer listening to music
female talking to a psychiatrist in a therapy session
young female activist posing with flag
a man in a hoodie and woman with a red bandana talking to each other and smiling
a medium close up of women wearing kimonos
a male interviewer listening to a person talking
a social worker having a conversation with the foster parents
a farm worker harvesting onions
worker packing street food
worker and client at barber shop
elderly man lifting kettlebell
mom assisting son in riding a bicycle
dad watching her daughter eat
young guy with vr headset
pregnant woman exercising with trainer
a fortune teller talking to a client
wizard doing a ritual on a woman
a footage of an actor on a movie scene
a man holding a best actor trophy
a singer of a music band
a young singer performing on stage
young dancer practicing at home
seller showing room to a couple
cab driver talking to passenger
a policeman talking to the car driver
kids celebrating halloween at home
little boy helping mother in kitchen
video of a indoor green plant
a girl arranges a christmas garland hanging by the kitchen cabinet
candle burning in dark room
couple having fun and goofing around the bedroom
girls jumping up and down in the bedroom
woman and man in pajamas working from home
a muslim family sitting and talking in the living room
family enjoying snack time while sitting in the living room
woman holding an animal puppet and a little girl playing together at the living room
kids playing in the indoor tent
young people celebrating new year at the office
a woman writing on the sticky note in the office
a woman exercising at home over a yoga mat
girls preparing easter decorations at home
dog on floor in room
turning on a fluorescent light inside a room
colleagues talking to each other near the office windows
a woman recording herself while exercising at home
music room
different kind of tools kept in a utility room
sofa beds and other furniture
a girl finding her brother reading a book in the bedroom
an elegant ceramic plant pot and hanging plant on indoor
furniture inside a bedroom
interior design of the bar section
living room with party decoration
firewood burning in dark room
a young woman playing the ukulele at home
woman painting at home
a woman in a locker room
video of a bathroom interior
the interior design of a jewish synagogue
a woman in protective suit disinfecting the kitchen
modern minimalist home interior
modern interior design of a coffee shop
person arranging minimalist furniture
aerial shot of interior of the warehouse
a room of a manufacturing facility
interior of catholic
interior design of a restaurant
a female model in a changing room looking herself in mirror
men walking in the office hallway
people sitting in a conference room
the interior design of a shopping mall
chandeliers in room
lucerne railway station interior
a female fencer posing in a foggy room
a toolbox and a paint roller beside a huge package in a room
bedroom in hotel
a woman lying in the operating room
a chef holding and checking kitchen utensils
a couple singing in the shower room together
a woman cleaning mess in the living room
an empty meeting room with natural light
person dancing in a dark room
close up on blood in hospital room
a couple resting on their home floor
a young female staff at courier office
a man entering the gym locker room
a bored man sitting by the tv at home
woman dancing in indoor garden
rubble in the interior of an abandoned house
indoor farm in a greenhouse
man doing handstand in indoor garden
an abandoned indoor swimming pool
home decorations on top of a cabinet
graffiti art on the interior walls of an abandoned mansion
indoor wall climbing activity
sunlight inside a room
teenage girl roller skating at indoor rink
home deco with lighted
baby in the shower room
men enjoying office christmas party
a bedroom with a brick wall
actors prepping in the dressing room
kids playing at an indoor playground
a person sanitizing an office space using smoke machine
mother and daughter choosing clothes at home
a woman sitting by the indoor fire pit
man standing on the corner of the room while looking around
person assembling furniture
a family stacking cardboard boxes in a room
family having fun in the dining room
person disinfecting a room
a woman washing strawberries in the kitchen sink
modern office waiting room
close up view of a person slicing with a kitchen knife
boiling coffee on a stove in the kitchen
modern equipment used in a home studio
interior of a recording studio
people working in a call center office
band performing at a home concert
a group of people watching a concert in a room
people packing their furniture
young employees in office holding a certificate
a criminal inside a dark room handcuffed in a table
couple browsing and looking for furniture in the store
workspace at home
video of a indoor green plant
close up view of a plant
close up shot of a burning plant
plucking leaves from plant
a plant on gold pot with glass lid
a branch of a tree and a plant
a leafless tree
close up shot of fern leaf
close up video of strawberry plant
plant with blooming flowers
close up video of flower petals
watering yellow plant
beautiful flower decoration
cannabis flower in a jar
a footage of the tree leaves
a red leaf plant
close up view of a white christmas tree
snow pouring on a tree
close up shot of white flowers on the tree
leaves in the trees daytime
a dead tree lying on a grass field
tree branches in a flowing river
purple flowers with leaves
a coconut tree by the house
close up on flower in winter
bamboo leaves backlit by the sun
close up video of a wet flower
a man putting a flower in a box
dropping flower petals on a wooden bowl
a close up shot of gypsophila flower
variety of succulent plants on a garden
variety of trees and plants in a botanical garden
forest of deciduous trees
a stack of dried leaves burning in a forest
tall forest trees on a misty morning
close up view of dewdrops on a leaf
close up view of white petaled flower
removing a pineapple leaf
a dragonfly perched on a leaf
butterfly pollinating flower
person visiting and checking a corn plant
woman picking beans from a plant
woman plucking mint leaves
single tree in the middle of farmland
a plant on a soil
drone footage of a tree on farm field
a tractor harvesting lavender flower
people putting christmas ornaments on a christmas tree
jack o lantern hanging on a tree
tree with halloween decoration
flower field near the waterfall
truck carrying the tree logs
raindrops falling on leaves
shot of a palm tree swaying with the wind
squirrels on a tree branch
person holding a flower
a fallen tree trunk
tree with golden leaves
cherry tree
wind blows through leaves of the tree in autumn
a leaf on a glass
the long trunks of tall trees in the forest
trees in the forest during sunny day
close up video of tree bark
reflection of tree branches
trunks of many trees in the forest
tree leaves providing shades from the sun
leaves swaying in the wind
low angle shot of baobab tree
bare trees in forest
a plant surrounded by fallen leaves
a couple preparing food and pruning a plant
a man cutting a tree bark
oranges on a tree branch
plant connected on the stones
video of a sawmill machine cutting tree log
women drying flower petals
macro view of an agave plant
a video of a person tying a plant on a string
green moss in forest nature
coconut tree near sea under blue sky
the canopy of a coconut tree
a man leaning on a tree at the beach
a full grown plant on a pot
candle wax dripping on flower petals
close up of leaves in autumn
a woman opening a book with a flower inside
a man holding leaves looking at the camera
a shadow of a swaying plant
a tree and concrete structure under a blue and cloudy sky
trimming excess leaves on a potted plant
the changing color of the tree leaves during autumn season
a gooseberry tree swayed by the wind
forest trees and a medieval castle at sunset
woman cut down tree
an old oak tree in a park across the street from a hotel
wild flowers growing in a forest ground
a mossy fountain and green plants in a botanical garden
mansion with beautiful garden
ants on a dragon fruit flower
scenery of desert landscape
landscape agriculture farm tractor
burning slash piles in the forest
graveyard at sunset
view of a jack o lantern with pumpkins in a smoky garden
sun view through a spider web
view of the sea from an abandoned building
close up view of a full moon
close up view of lighted candles
close up view of swaying white flowers and leaves
scenery of a relaxing beach
selective focus video of grass during sunny day
aerial view of brown dry landscape
fireworks display in the sky at night
a bonfire near river
mountain view
waterfalls in between mountain
a picturesque view of nature
exotic view of a riverfront city
tall trees in the forest under the clear sky
snow on branches in forest
stream in the nature
an airplane flying above the sea of clouds
scenic video of sunset
view of houses with bush fence under a blue and cloudy sky
scenic view from wooden pathway
scenic view of a tropical beach
drone footage of waves crashing on beach shore
a scenic view of the golden hour at norway
time lapse video of foggy mountain forest
brown mountain during fall season
video of ocean during daytime
boat sailing in the ocean
top view of yachts
beautiful scenery of flowing waterfalls and river
wild ducks paddling on the lake surface
a relaxing scenery of beach view under cloudy sky
natural rock formations on beach under cloudy sky
a palm tree against blue sky
video of sailboat on a lake during sunset
aerial view of snow piles
time lapse of a sunset sky in the countryside
aerial footage of a statue
time lapse video of a farm during sunset
clouds formation in the sky at sunset
aerial shot of a village
drone shot of a beautiful sunrise at the mountains
time lapse video of foggy morning during sunrise
sun shining between tree leaves at sunrise
video of lake during dawn
vehicles traveling on roadway under cloudy sky
view of golden domed church
a monument under the blue sky
firecrackers in the sky
view of fruit signage in the farm
a dark clouds over shadowing the full moon
view of the amazon river
a big river swamp in a dense forest
a blooming cherry blossom tree under a blue sky with white clouds
a river waterfall cascading down the plunge basin
flooded landscape with palm trees
a blurry waterfall background
waterfall in the mountains
aerial footage of a city at night
pond by small waterfall in forest
aerial view of farmlands at the bay of lake
rice terraces in the countryside
a highway built across an agricultural area in the countryside
gloomy morning in the countryside
drone shot of an abandoned coliseum on a snowy mountain top
boat sailing in the middle of ocean
drone shot of the grass field
natural landscape of mountain and sea with islets developed into a community
aerial view of zaporizhia in ukraine
aerial footage of a herd
an aerial footage of a red sky
grass and plants growing in the remains of an abandoned house
view from hill on city
aerial view on orthodox church
aerial view of bay in croatia
a footage of a frozen river
overlooking view of a city at daylight
view outside the cemetery
clear sky with moon over meadow
clouds over railway
aerial footage of moving vehicles on the road at night
aerial view of town and park
top view of skyscrapers
top view of the empire state building in manhattan
top view of the central park in new york city
sheep running in a grass field
clear sky over factory
smoke and fire in birds eye view
view of a pathway with snow melting on its side
ferry under bridge on river near city in malaysia
mountain slopes covered in green vegetation
panoramic view of a town surrounded by snow covered mountains
aerial view of a palace
top view of vehicles driving on the intersection
a graveyard by a church in a mountain landscape
a modern railway station in malaysia use for public transportation
drone footage of amsterdam metro station
train arriving at a station
red vehicle driving on field
close up view of flashing emergency vehicle lighting
vehicle with fertilizer on field
a highway built across an agricultural area in the countryside
drone footage of motorcycles driving on country road between agricultural fields
a road in the woods under fog
footage of a car driving through a wheat field
vehicle stops for an ambulance passing through city traffic
emergency vehicle parked outside the casino
zombies attacking a woman and a boy inside a car
woman seating inside the car while chewing
video of passengers riding a double decker bus during night
traffic in london street at night
elderly couple checking engine of automobile
a green vintage automobile with an open hood parked in a parking area
close up of a prototype automobile with exposed engine on the back seat of the car
aerial view of road in forest
train departing from station
aerial view of a train passing by a bridge
video of a train tracks
video footage of a subway
video of blinking traffic lights
couple walking out on the subway
time lapse of a subway tunnel
monitor board inside the subway
metro train at night
zoom in video of a tram passing by city
young man using laptop in the tram
man reading a book at bus stop
close up shot of a moving taxi
night travel in london street on a public bus
red bus in a rainy city
flow of traffic in the city
close up shot of a yellow taxi turning left
two women calling for a taxi
drone view of an illuminated bridge across a river
policeman in police car talking on radio
airplane taking off at night
view through window in airplane
an airplane in the sky
helicopter landing on the street
a pilot getting out of a helicopter
a helicopter flying under blue sky
boat sailing in the middle of the ocean
girl playing with a toy boat
silhouette of a boat on sea during golden hour
a boat travelling around the lake
road on mountain ridge
ship sailing on danube river
slow motion video of a ship water trail in the sea
drone footage of a wreck ship on shore
a white yacht traveling on a river and passing under the bridge
female teenagers drinking champagne in the yacht
video of yacht sailing in the ocean
red combine harvester on road on field
a woman sitting on a bicycle while using a mobile phone
a woman sitting on a motorcycle looking around
three teenagers fixing a bicycle
a woman in a halloween costume posing on a motorcycle
a parked motorcycle on a foggy roadside
cable car near sea shore
a truck travelling in the road
footage of the road without any traffic
a road sign
love padlocks on a bridge
camera moving at highway construction site
vehicles driving on highway
a motorbike on highway at timelapse mode
point of view of a car driving through a tunnel
time lapse of heavy traffic on an avenue
ferry boat on city canal
black vintage car in museum
a zigzag road across a forest
people crossing the road
video of a kayak boat in a river
a person paddling a wooden boat in a lake
a car charging in the parking area
cars parked on the road
footage of the street with people and vehicle passing by in the rain
traffic on busy city street
a woman getting out of the car to walk with their dog
yacht sailing through the ocean
people in queue to military ship
man wearing motorcycle helmet looking at the camera
empty seats in the bus
empty boat on the water
cargo train traveling on the mountainside
cruise ship in harbor
counting down at traffic lights
pressing the car ignition
fire truck driving on the road
a footage of a broken bicycle
drone footage of an ambulance on the road
slow motion footage of a racing car
ship sailing on sea against sunset
big cargo ship passing on the shore
back view of man and woman walking on unpaved road

+ 0
- 946
assets/texts/VBench/all_dimension.txt View File

@@ -1,946 +0,0 @@
In a still frame, a stop sign
a toilet, frozen in time
a laptop, frozen in time
A tranquil tableau of alley
A tranquil tableau of bar
A tranquil tableau of barn
A tranquil tableau of bathroom
A tranquil tableau of bedroom
A tranquil tableau of cliff
In a still frame, courtyard
In a still frame, gas station
A tranquil tableau of house
indoor gymnasium, frozen in time
A tranquil tableau of indoor library
A tranquil tableau of kitchen
A tranquil tableau of palace
In a still frame, parking lot
In a still frame, phone booth
A tranquil tableau of restaurant
A tranquil tableau of tower
A tranquil tableau of a bowl
A tranquil tableau of an apple
A tranquil tableau of a bench
A tranquil tableau of a bed
A tranquil tableau of a chair
A tranquil tableau of a cup
A tranquil tableau of a dining table
In a still frame, a pear
A tranquil tableau of a bunch of grapes
A tranquil tableau of a bowl on the kitchen counter
A tranquil tableau of a beautiful, handcrafted ceramic bowl
A tranquil tableau of an antique bowl
A tranquil tableau of an exquisite mahogany dining table
A tranquil tableau of a wooden bench in the park
A tranquil tableau of a beautiful wrought-iron bench surrounded by blooming flowers
In a still frame, a park bench with a view of the lake
A tranquil tableau of a vintage rocking chair was placed on the porch
A tranquil tableau of the jail cell was small and dimly lit, with cold, steel bars
A tranquil tableau of the phone booth was tucked away in a quiet alley
a dilapidated phone booth stood as a relic of a bygone era on the sidewalk, frozen in time
A tranquil tableau of the old red barn stood weathered and iconic against the backdrop of the countryside
A tranquil tableau of a picturesque barn was painted a warm shade of red and nestled in a picturesque meadow
In a still frame, within the desolate desert, an oasis unfolded, characterized by the stoic presence of palm trees and a motionless, glassy pool of water
In a still frame, the Parthenon's majestic Doric columns stand in serene solitude atop the Acropolis, framed by the tranquil Athenian landscape
In a still frame, the Temple of Hephaestus, with its timeless Doric grace, stands stoically against the backdrop of a quiet Athens
In a still frame, the ornate Victorian streetlamp stands solemnly, adorned with intricate ironwork and stained glass panels
A tranquil tableau of the Stonehenge presented itself as an enigmatic puzzle, each colossal stone meticulously placed against the backdrop of tranquility
In a still frame, in the vast desert, an oasis nestled among dunes, featuring tall palm trees and an air of serenity
static view on a desert scene with an oasis, palm trees, and a clear, calm pool of water
A tranquil tableau of an ornate Victorian streetlamp standing on a cobblestone street corner, illuminating the empty night
A tranquil tableau of a tranquil lakeside cabin nestled among tall pines, its reflection mirrored perfectly in the calm water
In a still frame, a vintage gas lantern, adorned with intricate details, gracing a historic cobblestone square
In a still frame, a tranquil Japanese tea ceremony room, with tatami mats, a delicate tea set, and a bonsai tree in the corner
A tranquil tableau of the Parthenon stands resolute in its classical elegance, a timeless symbol of Athens' cultural legacy
A tranquil tableau of in the heart of Plaka, the neoclassical architecture of the old city harmonizes with the ancient ruins
A tranquil tableau of in the desolate beauty of the American Southwest, Chaco Canyon's ancient ruins whispered tales of an enigmatic civilization that once thrived amidst the arid landscapes
A tranquil tableau of at the edge of the Arabian Desert, the ancient city of Petra beckoned with its enigmatic rock-carved façades
In a still frame, amidst the cobblestone streets, an Art Nouveau lamppost stood tall
A tranquil tableau of in the quaint village square, a traditional wrought-iron streetlamp featured delicate filigree patterns and amber-hued glass panels
A tranquil tableau of the lampposts were adorned with Art Deco motifs, their geometric shapes and frosted glass creating a sense of vintage glamour
In a still frame, in the picturesque square, a Gothic-style lamppost adorned with intricate stone carvings added a touch of medieval charm to the setting
In a still frame, in the heart of the old city, a row of ornate lantern-style streetlamps bathed the narrow alleyway in a warm, welcoming light
A tranquil tableau of in the heart of the Utah desert, a massive sandstone arch spanned the horizon
A tranquil tableau of in the Arizona desert, a massive stone bridge arched across a rugged canyon
A tranquil tableau of in the corner of the minimalist tea room, a bonsai tree added a touch of nature's beauty to the otherwise simple and elegant space
In a still frame, amidst the hushed ambiance of the traditional tea room, a meticulously arranged tea set awaited, with porcelain cups, a bamboo whisk
In a still frame, nestled in the Zen garden, a rustic teahouse featured tatami seating and a traditional charcoal brazier
A tranquil tableau of a country estate's library featured elegant wooden shelves
A tranquil tableau of beneath the shade of a solitary oak tree, an old wooden park bench sat patiently
A tranquil tableau of beside a tranquil pond, a weeping willow tree draped its branches gracefully over the water's surface, creating a serene tableau of reflection and calm
A tranquil tableau of in the Zen garden, a perfectly raked gravel path led to a serene rock garden
In a still frame, a tranquil pond was fringed by weeping cherry trees, their blossoms drifting lazily onto the glassy surface
In a still frame, within the historic library's reading room, rows of antique leather chairs and mahogany tables offered a serene haven for literary contemplation
A tranquil tableau of a peaceful orchid garden showcased a variety of delicate blooms
A tranquil tableau of in the serene courtyard, a centuries-old stone well stood as a symbol of a bygone era, its mossy stones bearing witness to the passage of time
a bird and a cat
a cat and a dog
a dog and a horse
a horse and a sheep
a sheep and a cow
a cow and an elephant
an elephant and a bear
a bear and a zebra
a zebra and a giraffe
a giraffe and a bird
a chair and a couch
a couch and a potted plant
a potted plant and a tv
a tv and a laptop
a laptop and a remote
a remote and a keyboard
a keyboard and a cell phone
a cell phone and a book
a book and a clock
a clock and a backpack
a backpack and an umbrella
an umbrella and a handbag
a handbag and a tie
a tie and a suitcase
a suitcase and a vase
a vase and scissors
scissors and a teddy bear
a teddy bear and a frisbee
a frisbee and skis
skis and a snowboard
a snowboard and a sports ball
a sports ball and a kite
a kite and a baseball bat
a baseball bat and a baseball glove
a baseball glove and a skateboard
a skateboard and a surfboard
a surfboard and a tennis racket
a tennis racket and a bottle
a bottle and a chair
an airplane and a train
a train and a boat
a boat and an airplane
a bicycle and a car
a car and a motorcycle
a motorcycle and a bus
a bus and a traffic light
a traffic light and a fire hydrant
a fire hydrant and a stop sign
a stop sign and a parking meter
a parking meter and a truck
a truck and a bicycle
a toilet and a hair drier
a hair drier and a toothbrush
a toothbrush and a sink
a sink and a toilet
a wine glass and a chair
a cup and a couch
a fork and a potted plant
a knife and a tv
a spoon and a laptop
a bowl and a remote
a banana and a keyboard
an apple and a cell phone
a sandwich and a book
an orange and a clock
broccoli and a backpack
a carrot and an umbrella
a hot dog and a handbag
a pizza and a tie
a donut and a suitcase
a cake and a vase
an oven and scissors
a toaster and a teddy bear
a microwave and a frisbee
a refrigerator and skis
a bicycle and an airplane
a car and a train
a motorcycle and a boat
a person and a toilet
a person and a hair drier
a person and a toothbrush
a person and a sink
A person is riding a bike
A person is marching
A person is roller skating
A person is tasting beer
A person is clapping
A person is drawing
A person is petting animal (not cat)
A person is eating watermelon
A person is playing harp
A person is wrestling
A person is riding scooter
A person is sweeping floor
A person is skateboarding
A person is dunking basketball
A person is playing flute
A person is stretching leg
A person is tying tie
A person is skydiving
A person is shooting goal (soccer)
A person is playing piano
A person is finger snapping
A person is canoeing or kayaking
A person is laughing
A person is digging
A person is clay pottery making
A person is shooting basketball
A person is bending back
A person is shaking hands
A person is bandaging
A person is push up
A person is catching or throwing frisbee
A person is playing trumpet
A person is flying kite
A person is filling eyebrows
A person is shuffling cards
A person is folding clothes
A person is smoking
A person is tai chi
A person is squat
A person is playing controller
A person is throwing axe
A person is giving or receiving award
A person is air drumming
A person is taking a shower
A person is planting trees
A person is sharpening knives
A person is robot dancing
A person is rock climbing
A person is hula hooping
A person is writing
A person is bungee jumping
A person is pushing cart
A person is cleaning windows
A person is cutting watermelon
A person is cheerleading
A person is washing hands
A person is ironing
A person is cutting nails
A person is hugging
A person is trimming or shaving beard
A person is jogging
A person is making bed
A person is washing dishes
A person is grooming dog
A person is doing laundry
A person is knitting
A person is reading book
A person is baby waking up
A person is massaging legs
A person is brushing teeth
A person is crawling baby
A person is motorcycling
A person is driving car
A person is sticking tongue out
A person is shaking head
A person is sword fighting
A person is doing aerobics
A person is strumming guitar
A person is riding or walking with horse
A person is archery
A person is catching or throwing baseball
A person is playing chess
A person is rock scissors paper
A person is using computer
A person is arranging flowers
A person is bending metal
A person is ice skating
A person is climbing a rope
A person is crying
A person is dancing ballet
A person is getting a haircut
A person is running on treadmill
A person is kissing
A person is counting money
A person is barbequing
A person is peeling apples
A person is milking cow
A person is shining shoes
A person is making snowman
A person is sailing
a person swimming in ocean
a person giving a presentation to a room full of colleagues
a person washing the dishes
a person eating a burger
a person walking in the snowstorm
a person drinking coffee in a cafe
a person playing guitar
a bicycle leaning against a tree
a bicycle gliding through a snowy field
a bicycle slowing down to stop
a bicycle accelerating to gain speed
a car stuck in traffic during rush hour
a car turning a corner
a car slowing down to stop
a car accelerating to gain speed
a motorcycle cruising along a coastal highway
a motorcycle turning a corner
a motorcycle slowing down to stop
a motorcycle gliding through a snowy field
a motorcycle accelerating to gain speed
an airplane soaring through a clear blue sky
an airplane taking off
an airplane landing smoothly on a runway
an airplane accelerating to gain speed
a bus turning a corner
a bus stuck in traffic during rush hour
a bus accelerating to gain speed
a train speeding down the tracks
a train crossing over a tall bridge
a train accelerating to gain speed
a truck turning a corner
a truck anchored in a tranquil bay
a truck stuck in traffic during rush hour
a truck slowing down to stop
a truck accelerating to gain speed
a boat sailing smoothly on a calm lake
a boat slowing down to stop
a boat accelerating to gain speed
a bird soaring gracefully in the sky
a bird building a nest from twigs and leaves
a bird flying over a snowy forest
a cat grooming itself meticulously with its tongue
a cat playing in park
a cat drinking water
a cat running happily
a dog enjoying a peaceful walk
a dog playing in park
a dog drinking water
a dog running happily
a horse bending down to drink water from a river
a horse galloping across an open field
a horse taking a peaceful walk
a horse running to join a herd of its kind
a sheep bending down to drink water from a river
a sheep taking a peaceful walk
a sheep running to join a herd of its kind
a cow bending down to drink water from a river
a cow chewing cud while resting in a tranquil barn
a cow running to join a herd of its kind
an elephant spraying itself with water using its trunk to cool down
an elephant taking a peaceful walk
an elephant running to join a herd of its kind
a bear catching a salmon in its powerful jaws
a bear sniffing the air for scents of food
a bear climbing a tree
a bear hunting for prey
a zebra bending down to drink water from a river
a zebra running to join a herd of its kind
a zebra taking a peaceful walk
a giraffe bending down to drink water from a river
a giraffe taking a peaceful walk
a giraffe running to join a herd of its kind
a person
a bicycle
a car
a motorcycle
an airplane
a bus
a train
a truck
a boat
a traffic light
a fire hydrant
a stop sign
a parking meter
a bench
a bird
a cat
a dog
a horse
a sheep
a cow
an elephant
a bear
a zebra
a giraffe
a backpack
an umbrella
a handbag
a tie
a suitcase
a frisbee
skis
a snowboard
a sports ball
a kite
a baseball bat
a baseball glove
a skateboard
a surfboard
a tennis racket
a bottle
a wine glass
a cup
a fork
a knife
a spoon
a bowl
a banana
an apple
a sandwich
an orange
broccoli
a carrot
a hot dog
a pizza
a donut
a cake
a chair
a couch
a potted plant
a bed
a dining table
a toilet
a tv
a laptop
a remote
a keyboard
a cell phone
a microwave
an oven
a toaster
a sink
a refrigerator
a book
a clock
a vase
scissors
a teddy bear
a hair drier
a toothbrush
a red bicycle
a green bicycle
a blue bicycle
a yellow bicycle
an orange bicycle
a purple bicycle
a pink bicycle
a black bicycle
a white bicycle
a red car
a green car
a blue car
a yellow car
an orange car
a purple car
a pink car
a black car
a white car
a red bird
a green bird
a blue bird
a yellow bird
an orange bird
a purple bird
a pink bird
a black bird
a white bird
a black cat
a white cat
an orange cat
a yellow cat
a red umbrella
a green umbrella
a blue umbrella
a yellow umbrella
an orange umbrella
a purple umbrella
a pink umbrella
a black umbrella
a white umbrella
a red suitcase
a green suitcase
a blue suitcase
a yellow suitcase
an orange suitcase
a purple suitcase
a pink suitcase
a black suitcase
a white suitcase
a red bowl
a green bowl
a blue bowl
a yellow bowl
an orange bowl
a purple bowl
a pink bowl
a black bowl
a white bowl
a red chair
a green chair
a blue chair
a yellow chair
an orange chair
a purple chair
a pink chair
a black chair
a white chair
a red clock
a green clock
a blue clock
a yellow clock
an orange clock
a purple clock
a pink clock
a black clock
a white clock
a red vase
a green vase
a blue vase
a yellow vase
an orange vase
a purple vase
a pink vase
a black vase
a white vase
A beautiful coastal beach in spring, waves lapping on sand, Van Gogh style
A beautiful coastal beach in spring, waves lapping on sand, oil painting
A beautiful coastal beach in spring, waves lapping on sand by Hokusai, in the style of Ukiyo
A beautiful coastal beach in spring, waves lapping on sand, black and white
A beautiful coastal beach in spring, waves lapping on sand, pixel art
A beautiful coastal beach in spring, waves lapping on sand, in cyberpunk style
A beautiful coastal beach in spring, waves lapping on sand, animated style
A beautiful coastal beach in spring, waves lapping on sand, watercolor painting
A beautiful coastal beach in spring, waves lapping on sand, surrealism style
The bund Shanghai, Van Gogh style
The bund Shanghai, oil painting
The bund Shanghai by Hokusai, in the style of Ukiyo
The bund Shanghai, black and white
The bund Shanghai, pixel art
The bund Shanghai, in cyberpunk style
The bund Shanghai, animated style
The bund Shanghai, watercolor painting
The bund Shanghai, surrealism style
a shark is swimming in the ocean, Van Gogh style
a shark is swimming in the ocean, oil painting
a shark is swimming in the ocean by Hokusai, in the style of Ukiyo
a shark is swimming in the ocean, black and white
a shark is swimming in the ocean, pixel art
a shark is swimming in the ocean, in cyberpunk style
a shark is swimming in the ocean, animated style
a shark is swimming in the ocean, watercolor painting
a shark is swimming in the ocean, surrealism style
A panda drinking coffee in a cafe in Paris, Van Gogh style
A panda drinking coffee in a cafe in Paris, oil painting
A panda drinking coffee in a cafe in Paris by Hokusai, in the style of Ukiyo
A panda drinking coffee in a cafe in Paris, black and white
A panda drinking coffee in a cafe in Paris, pixel art
A panda drinking coffee in a cafe in Paris, in cyberpunk style
A panda drinking coffee in a cafe in Paris, animated style
A panda drinking coffee in a cafe in Paris, watercolor painting
A panda drinking coffee in a cafe in Paris, surrealism style
A cute happy Corgi playing in park, sunset, Van Gogh style
A cute happy Corgi playing in park, sunset, oil painting
A cute happy Corgi playing in park, sunset by Hokusai, in the style of Ukiyo
A cute happy Corgi playing in park, sunset, black and white
A cute happy Corgi playing in park, sunset, pixel art
A cute happy Corgi playing in park, sunset, in cyberpunk style
A cute happy Corgi playing in park, sunset, animated style
A cute happy Corgi playing in park, sunset, watercolor painting
A cute happy Corgi playing in park, sunset, surrealism style
Gwen Stacy reading a book, Van Gogh style
Gwen Stacy reading a book, oil painting
Gwen Stacy reading a book by Hokusai, in the style of Ukiyo
Gwen Stacy reading a book, black and white
Gwen Stacy reading a book, pixel art
Gwen Stacy reading a book, in cyberpunk style
Gwen Stacy reading a book, animated style
Gwen Stacy reading a book, watercolor painting
Gwen Stacy reading a book, surrealism style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, Van Gogh style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, oil painting
A boat sailing leisurely along the Seine River with the Eiffel Tower in background by Hokusai, in the style of Ukiyo
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, black and white
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pixel art
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, in cyberpunk style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, animated style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, watercolor painting
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, surrealism style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, Van Gogh style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, oil painting
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas by Hokusai, in the style of Ukiyo
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, black and white
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, pixel art
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, in cyberpunk style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, animated style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, watercolor painting
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, surrealism style
An astronaut flying in space, Van Gogh style
An astronaut flying in space, oil painting
An astronaut flying in space by Hokusai, in the style of Ukiyo
An astronaut flying in space, black and white
An astronaut flying in space, pixel art
An astronaut flying in space, in cyberpunk style
An astronaut flying in space, animated style
An astronaut flying in space, watercolor painting
An astronaut flying in space, surrealism style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, Van Gogh style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, oil painting
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks by Hokusai, in the style of Ukiyo
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, black and white
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, pixel art
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, in cyberpunk style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, animated style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, watercolor painting
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, surrealism style
A beautiful coastal beach in spring, waves lapping on sand, in super slow motion
A beautiful coastal beach in spring, waves lapping on sand, zoom in
A beautiful coastal beach in spring, waves lapping on sand, zoom out
A beautiful coastal beach in spring, waves lapping on sand, pan left
A beautiful coastal beach in spring, waves lapping on sand, pan right
A beautiful coastal beach in spring, waves lapping on sand, tilt up
A beautiful coastal beach in spring, waves lapping on sand, tilt down
A beautiful coastal beach in spring, waves lapping on sand, with an intense shaking effect
A beautiful coastal beach in spring, waves lapping on sand, featuring a steady and smooth perspective
A beautiful coastal beach in spring, waves lapping on sand, racking focus
The bund Shanghai, in super slow motion
The bund Shanghai, zoom in
The bund Shanghai, zoom out
The bund Shanghai, pan left
The bund Shanghai, pan right
The bund Shanghai, tilt up
The bund Shanghai, tilt down
The bund Shanghai, with an intense shaking effect
The bund Shanghai, featuring a steady and smooth perspective
The bund Shanghai, racking focus
a shark is swimming in the ocean, in super slow motion
a shark is swimming in the ocean, zoom in
a shark is swimming in the ocean, zoom out
a shark is swimming in the ocean, pan left
a shark is swimming in the ocean, pan right
a shark is swimming in the ocean, tilt up
a shark is swimming in the ocean, tilt down
a shark is swimming in the ocean, with an intense shaking effect
a shark is swimming in the ocean, featuring a steady and smooth perspective
a shark is swimming in the ocean, racking focus
A panda drinking coffee in a cafe in Paris, in super slow motion
A panda drinking coffee in a cafe in Paris, zoom in
A panda drinking coffee in a cafe in Paris, zoom out
A panda drinking coffee in a cafe in Paris, pan left
A panda drinking coffee in a cafe in Paris, pan right
A panda drinking coffee in a cafe in Paris, tilt up
A panda drinking coffee in a cafe in Paris, tilt down
A panda drinking coffee in a cafe in Paris, with an intense shaking effect
A panda drinking coffee in a cafe in Paris, featuring a steady and smooth perspective
A panda drinking coffee in a cafe in Paris, racking focus
A cute happy Corgi playing in park, sunset, in super slow motion
A cute happy Corgi playing in park, sunset, zoom in
A cute happy Corgi playing in park, sunset, zoom out
A cute happy Corgi playing in park, sunset, pan left
A cute happy Corgi playing in park, sunset, pan right
A cute happy Corgi playing in park, sunset, tilt up
A cute happy Corgi playing in park, sunset, tilt down
A cute happy Corgi playing in park, sunset, with an intense shaking effect
A cute happy Corgi playing in park, sunset, featuring a steady and smooth perspective
A cute happy Corgi playing in park, sunset, racking focus
Gwen Stacy reading a book, in super slow motion
Gwen Stacy reading a book, zoom in
Gwen Stacy reading a book, zoom out
Gwen Stacy reading a book, pan left
Gwen Stacy reading a book, pan right
Gwen Stacy reading a book, tilt up
Gwen Stacy reading a book, tilt down
Gwen Stacy reading a book, with an intense shaking effect
Gwen Stacy reading a book, featuring a steady and smooth perspective
Gwen Stacy reading a book, racking focus
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, in super slow motion
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, zoom in
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, zoom out
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pan left
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pan right
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, tilt up
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, tilt down
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, with an intense shaking effect
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, featuring a steady and smooth perspective
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, racking focus
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, in super slow motion
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, zoom in
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, zoom out
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, pan left
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, pan right
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, tilt up
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, tilt down
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, with an intense shaking effect
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, featuring a steady and smooth perspective
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, racking focus
An astronaut flying in space, in super slow motion
An astronaut flying in space, zoom in
An astronaut flying in space, zoom out
An astronaut flying in space, pan left
An astronaut flying in space, pan right
An astronaut flying in space, tilt up
An astronaut flying in space, tilt down
An astronaut flying in space, with an intense shaking effect
An astronaut flying in space, featuring a steady and smooth perspective
An astronaut flying in space, racking focus
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, in super slow motion
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, zoom in
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, zoom out
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, pan left
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, pan right
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, tilt up
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, tilt down
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, with an intense shaking effect
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, featuring a steady and smooth perspective
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, racking focus
Close up of grapes on a rotating table.
Turtle swimming in ocean.
A storm trooper vacuuming the beach.
A panda standing on a surfboard in the ocean in sunset.
An astronaut feeding ducks on a sunny afternoon, reflection from the water.
Two pandas discussing an academic paper.
Sunset time lapse at the beach with moving clouds and colors in the sky.
A fat rabbit wearing a purple robe walking through a fantasy landscape.
A koala bear playing piano in the forest.
An astronaut flying in space.
Fireworks.
An animated painting of fluffy white clouds moving in sky.
Flying through fantasy landscapes.
A bigfoot walking in the snowstorm.
A squirrel eating a burger.
A cat wearing sunglasses and working as a lifeguard at a pool.
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks.
Splash of turquoise water in extreme slow motion, alpha channel included.
an ice cream is melting on the table.
a drone flying over a snowy forest.
a shark is swimming in the ocean.
Aerial panoramic video from a drone of a fantasy land.
a teddy bear is swimming in the ocean.
time lapse of sunrise on mars.
golden fish swimming in the ocean.
An artist brush painting on a canvas close up.
A drone view of celebration with Christmas tree and fireworks, starry sky - background.
happy dog wearing a yellow turtleneck, studio, portrait, facing camera, dark background
Origami dancers in white paper, 3D render, on white background, studio shot, dancing modern dance.
Campfire at night in a snowy forest with starry sky in the background.
a fantasy landscape
A 3D model of a 1800s victorian house.
this is how I do makeup in the morning.
A raccoon that looks like a turtle, digital art.
Robot dancing in Times Square.
Busy freeway at night.
Balloon full of water exploding in extreme slow motion.
An astronaut is riding a horse in the space in a photorealistic style.
Macro slo-mo. Slow motion cropped closeup of roasted coffee beans falling into an empty bowl.
Sewing machine, old sewing machine working.
Motion colour drop in water, ink swirling in water, colourful ink in water, abstraction fancy dream cloud of ink.
Few big purple plums rotating on the turntable. water drops appear on the skin during rotation. isolated on the white background. close-up. macro.
Vampire makeup face of beautiful girl, red contact lenses.
Ashtray full of butts on table, smoke flowing on black background, close-up
Pacific coast, carmel by the sea ocean and waves.
A teddy bear is playing drum kit in NYC Times Square.
A corgi is playing drum kit.
An Iron man is playing the electronic guitar, high electronic guitar.
A raccoon is playing the electronic guitar.
A boat sailing leisurely along the Seine River with the Eiffel Tower in background by Vincent van Gogh
A corgi's head depicted as an explosion of a nebula
A fantasy landscape
A future where humans have achieved teleportation technology
A jellyfish floating through the ocean, with bioluminescent tentacles
A Mars rover moving on Mars
A panda drinking coffee in a cafe in Paris
A space shuttle launching into orbit, with flames and smoke billowing out from the engines
A steam train moving on a mountainside
A super cool giant robot in Cyberpunk Beijing
A tropical beach at sunrise, with palm trees and crystal-clear water in the foreground
Cinematic shot of Van Gogh's selfie, Van Gogh style
Gwen Stacy reading a book
Iron Man flying in the sky
The bund Shanghai, oil painting
Yoda playing guitar on the stage
A beautiful coastal beach in spring, waves lapping on sand by Hokusai, in the style of Ukiyo
A beautiful coastal beach in spring, waves lapping on sand by Vincent van Gogh
A boat sailing leisurely along the Seine River with the Eiffel Tower in background
A car moving slowly on an empty street, rainy evening
A cat eating food out of a bowl
A cat wearing sunglasses at a pool
A confused panda in calculus class
A cute fluffy panda eating Chinese food in a restaurant
A cute happy Corgi playing in park, sunset
A cute raccoon playing guitar in a boat on the ocean
A happy fuzzy panda playing guitar nearby a campfire, snow mountain in the background
A lightning striking atop of eiffel tower, dark clouds in the sky
A modern art museum, with colorful paintings
A panda cooking in the kitchen
A panda playing on a swing set
A polar bear is playing guitar
A raccoon dressed in suit playing the trumpet, stage background
A robot DJ is playing the turntable, in heavy raining futuristic tokyo rooftop cyberpunk night, sci-fi, fantasy
A shark swimming in clear Caribbean ocean
A super robot protecting city
A teddy bear washing the dishes
An epic tornado attacking above a glowing city at night, the tornado is made of smoke
An oil painting of a couple in formal evening wear going home get caught in a heavy downpour with umbrellas
Clown fish swimming through the coral reef
Hyper-realistic spaceship landing on Mars
The bund Shanghai, vibrant color
Vincent van Gogh is painting in the room
Yellow flowers swing in the wind
alley
amusement park
aquarium
arch
art gallery
bathroom
bakery shop
ballroom
bar
barn
basement
beach
bedroom
bridge
botanical garden
cafeteria
campsite
campus
carrousel
castle
cemetery
classroom
cliff
crosswalk
construction site
corridor
courtyard
desert
downtown
driveway
farm
food court
football field
forest road
fountain
gas station
glacier
golf course
indoor gymnasium
harbor
highway
hospital
house
iceberg
industrial area
jail cell
junkyard
kitchen
indoor library
lighthouse
laboratory
mansion
marsh
mountain
indoor movie theater
indoor museum
music studio
nursery
ocean
office
palace
parking lot
pharmacy
phone booth
raceway
restaurant
river
science museum
shower
ski slope
sky
skyscraper
baseball stadium
staircase
street
supermarket
indoor swimming pool
tower
outdoor track
train railway
train station platform
underwater coral reef
valley
volcano
waterfall
windmill
a bicycle on the left of a car, front view
a car on the right of a motorcycle, front view
a motorcycle on the left of a bus, front view
a bus on the right of a traffic light, front view
a traffic light on the left of a fire hydrant, front view
a fire hydrant on the right of a stop sign, front view
a stop sign on the left of a parking meter, front view
a parking meter on the right of a bench, front view
a bench on the left of a truck, front view
a truck on the right of a bicycle, front view
a bird on the left of a cat, front view
a cat on the right of a dog, front view
a dog on the left of a horse, front view
a horse on the right of a sheep, front view
a sheep on the left of a cow, front view
a cow on the right of an elephant, front view
an elephant on the left of a bear, front view
a bear on the right of a zebra, front view
a zebra on the left of a giraffe, front view
a giraffe on the right of a bird, front view
a bottle on the left of a wine glass, front view
a wine glass on the right of a cup, front view
a cup on the left of a fork, front view
a fork on the right of a knife, front view
a knife on the left of a spoon, front view
a spoon on the right of a bowl, front view
a bowl on the left of a bottle, front view
a potted plant on the left of a remote, front view
a remote on the right of a clock, front view
a clock on the left of a vase, front view
a vase on the right of scissors, front view
scissors on the left of a teddy bear, front view
a teddy bear on the right of a potted plant, front view
a frisbee on the left of a sports ball, front view
a sports ball on the right of a baseball bat, front view
a baseball bat on the left of a baseball glove, front view
a baseball glove on the right of a tennis racket, front view
a tennis racket on the left of a frisbee, front view
a toilet on the left of a hair drier, front view
a hair drier on the right of a toothbrush, front view
a toothbrush on the left of a sink, front view
a sink on the right of a toilet, front view
a chair on the left of a couch, front view
a couch on the right of a bed, front view
a bed on the left of a tv, front view
a tv on the right of a dining table, front view
a dining table on the left of a chair, front view
an airplane on the left of a train, front view
a train on the right of a boat, front view
a boat on the left of an airplane, front view
an oven on the top of a toaster, front view
an oven on the bottom of a toaster, front view
a toaster on the top of a microwave, front view
a toaster on the bottom of a microwave, front view
a microwave on the top of an oven, front view
a microwave on the bottom of an oven, front view
a banana on the top of an apple, front view
a banana on the bottom of an apple, front view
an apple on the top of a sandwich, front view
an apple on the bottom of a sandwich, front view
a sandwich on the top of an orange, front view
a sandwich on the bottom of an orange, front view
an orange on the top of a carrot, front view
an orange on the bottom of a carrot, front view
a carrot on the top of a hot dog, front view
a carrot on the bottom of a hot dog, front view
a hot dog on the top of a pizza, front view
a hot dog on the bottom of a pizza, front view
a pizza on the top of a donut, front view
a pizza on the bottom of a donut, front view
a donut on the top of broccoli, front view
a donut on the bottom of broccoli, front view
broccoli on the top of a banana, front view
broccoli on the bottom of a banana, front view
skis on the top of a snowboard, front view
skis on the bottom of a snowboard, front view
a snowboard on the top of a kite, front view
a snowboard on the bottom of a kite, front view
a kite on the top of a skateboard, front view
a kite on the bottom of a skateboard, front view
a skateboard on the top of a surfboard, front view
a skateboard on the bottom of a surfboard, front view
a surfboard on the top of skis, front view
a surfboard on the bottom of skis, front view

+ 0
- 1118
assets/texts/VBench/all_i2v.txt View File

@@ -1,1118 +0,0 @@
a close up of a blue and orange liquid{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
a close up of a blue and orange liquid, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a blue and orange liquid.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
A black and white abstract video featuring mesmerizing bubbles, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A black and white abstract video featuring mesmerizing bubbles.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a blue and white smoke is swirly in the dark, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue and white smoke is swirly in the dark.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a close-up view of a sea fan in the water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a sea fan in the water.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a visually captivating abstract video, rich in color, set against a dramatic black background, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a visually captivating abstract video, rich in color, set against a dramatic black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a purple and yellow abstract painting with a black background, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a purple and yellow abstract painting with a black background.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a dynamic video of a blurry neon light in the dark, radiating captivating colors, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dynamic video of a blurry neon light in the dark, radiating captivating colors.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
a view of a star trail in the night sky, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a star trail in the night sky.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
an aerial view of a small town on the edge of the ocean, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a small town on the edge of the ocean.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
Colorful buildings on the seaside cliffs, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Colorful buildings on the seaside cliffs.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a bunch of houses that are on a hillside, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of houses that are on a hillside.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
a building that is sitting on the side of a pond, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a building that is sitting on the side of a pond.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
an aerial view of a busy city with a bridge in the background, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a busy city with a bridge in the background.jpg", "mask_strategy": "0"}
a bridge that is over a body of water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a bridge that is over a body of water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is over a body of water.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a pile of wood sitting next to a log house, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pile of wood sitting next to a log house.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
a view of a snowy mountain side with many buildings, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a snowy mountain side with many buildings.jpg", "mask_strategy": "0"}
san francisco skyline at sunset{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
san francisco skyline at sunset, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/san francisco skyline at sunset.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
a castle on top of a hill covered in snow, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a castle on top of a hill covered in snow.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
an aerial view of big ben and the houses of parliament in london, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of big ben and the houses of parliament in london.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
a beach with a lot of buildings on the side of a cliff, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beach with a lot of buildings on the side of a cliff.jpg", "mask_strategy": "0"}
an alley way in an old european city{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
an alley way in an old european city, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alley way in an old european city.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the golden gate bridge in san franscisco is lit up by the setting sun, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the golden gate bridge in san franscisco is lit up by the setting sun.jpg", "mask_strategy": "0"}
the great wall of china in autumn{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the great wall of china in autumn, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the great wall of china in autumn.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
the town of hallstatt is surrounded by mountains and water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the town of hallstatt is surrounded by mountains and water.jpg", "mask_strategy": "0"}
tokyo skyline at night{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
tokyo skyline at night, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tokyo skyline at night.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
a church sits on top of a hill under a cloudy sky, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a church sits on top of a hill under a cloudy sky.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
the parthenon in acropolis, greece, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the parthenon in acropolis, greece.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
a large crowd of people walking in a shopping mall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large crowd of people walking in a shopping mall.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
the pyramids of giza, egypt, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the pyramids of giza, egypt.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a stage door painted with a star on the side of a brick wall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stage door painted with a star on the side of a brick wall.jpg", "mask_strategy": "0"}
a light house on the edge of the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
a light house on the edge of the water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a light house on the edge of the water.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
an asian city street at night with people and bicycles, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an asian city street at night with people and bicycles.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a couple of wooden benches in the middle of a street, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of wooden benches in the middle of a street.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a pagoda sits on top of a mountain in japan, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pagoda sits on top of a mountain in japan.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a red bus driving down a snowy street at night, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a snow covered street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a snow covered street, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow covered street.jpg", "mask_strategy": "0"}
a house with snow on the ground{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
a house with snow on the ground, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a house with snow on the ground.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
cars parked on the side of the road during a snowstorm, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars parked on the side of the road during a snowstorm.jpg", "mask_strategy": "0"}
a group of statues on the side of a building{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a group of statues on the side of a building, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of statues on the side of a building.jpg", "mask_strategy": "0"}
a city street at night during a snow storm{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
a city street at night during a snow storm, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street at night during a snow storm.jpg", "mask_strategy": "0"}
tower bridge in london{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
tower bridge in london, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tower bridge in london.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
chinese pagoda in the middle of a snowy day, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/chinese pagoda in the middle of a snowy day.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a dark alleyway with a bus driving down it, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dark alleyway with a bus driving down it.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
a monastery sits on top of a cliff in bhutan, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monastery sits on top of a cliff in bhutan.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
the dome of the rock in jerusalem, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the dome of the rock in jerusalem.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
an aerial view of a futuristic building on a cliff overlooking a body of water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a futuristic building on a cliff overlooking a body of water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a reflection of a city with buildings in the water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a reflection of a city with buildings in the water.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a bar with chairs and a television on the wall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bar with chairs and a television on the wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with lots of books on a wall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with lots of books on a wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a living room filled with furniture next to a stone wall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room filled with furniture next to a stone wall.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a table and chairs in a room with sunlight coming through the window, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with sunlight coming through the window.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
a room filled with lots of shelves filled with books, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with lots of shelves filled with books.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
an art gallery with paintings on the walls, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an art gallery with paintings on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a room with a lot of pictures on the walls, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a lot of pictures on the walls.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a painting of a cloudy sky next to an easel, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a painting of a cloudy sky next to an easel.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a living room with a christmas tree and a rocking chair, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a christmas tree and a rocking chair.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a kitchen with a sink and a lot of glasses on the counter, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kitchen with a sink and a lot of glasses on the counter.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a wooden table in front of a brick wall with bottles on the wall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a wooden table in front of a brick wall with bottles on the wall.jpg", "mask_strategy": "0"}
a room filled with paintings and statues{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
a room filled with paintings and statues, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with paintings and statues.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
an outdoor dining area surrounded by plants and a brick walkway, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an outdoor dining area surrounded by plants and a brick walkway.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a room filled with books and teddy bears, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room filled with books and teddy bears.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a table and chairs in a room with a plant in the corner, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table and chairs in a room with a plant in the corner.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a living room with a couch, table, and a window, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with a couch, table, and a window.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a modern living room with wood floors and a tv, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a modern living room with wood floors and a tv.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a room with a desk and a chair in it, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a room with a desk and a chair in it.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a building, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a building.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a chair in a room next to some drawings, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chair in a room next to some drawings.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
a living room with hardwood floors and a white couch, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a living room with hardwood floors and a white couch.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
two people in a canoe on a lake with mountains in the background, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people in a canoe on a lake with mountains in the background.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
an aerial view of a snowy road in a forest, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a snowy road in a forest.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a view of a waterfall from a distance, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a view of a waterfall from a distance.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a valley, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a valley.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a group of islands in the middle of a lake, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a group of islands in the middle of a lake.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
an aerial view of a rocky beach in indonesia, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a rocky beach in indonesia.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
fireworks in the night sky over a city, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fireworks in the night sky over a city.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a large wave crashes into a lighthouse on a stormy day, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes into a lighthouse on a stormy day.jpg", "mask_strategy": "0"}
a mountain range with a sky background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a mountain range with a sky background, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with a sky background.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a large bonfire is burning in the night sky, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large bonfire is burning in the night sky.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a close-up view of the flames of a fireplace, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of the flames of a fireplace.jpg", "mask_strategy": "0"}
a farm in the middle of the day{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a farm in the middle of the day, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a farm in the middle of the day.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a flock of birds flying over a tree at sunset, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a flock of birds flying over a tree at sunset.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a captivating scene featuring a spiral galaxy shining brilliantly in the night sky, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a captivating scene featuring a spiral galaxy shining brilliantly in the night sky.jpg", "mask_strategy": "0"}
a mountain with snow on it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a mountain with snow on it, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain with snow on it.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a bridge that is in the middle of a river, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bridge that is in the middle of a river.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a group of people standing on top of a green hill, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people standing on top of a green hill.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a sandy beach with a wooden pier in the water, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with a wooden pier in the water.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a lake surrounded by mountains and flowers, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lake surrounded by mountains and flowers.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
a hot-air balloon flying over a desert landscape, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hot-air balloon flying over a desert landscape.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
several hot air balloons flying over a city, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/several hot air balloons flying over a city.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a group of hot air balloons flying over a field, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of hot air balloons flying over a field.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
a large wave crashes over a rocky cliff, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave crashes over a rocky cliff.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
the sun is setting over a lake in the mountains, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is setting over a lake in the mountains.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
a mountain range with snow on the ground, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain range with snow on the ground.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
sun rays shining through clouds over a lake, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/sun rays shining through clouds over a lake.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a boat sits on the shore of a lake with mt fuji in the background, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a boat sits on the shore of a lake with mt fuji in the background.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
a foggy road with trees in the distance, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy road with trees in the distance.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
two swans swimming on a lake in the fog, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two swans swimming on a lake in the fog.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
the sun is shining through the trees near a waterfall, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the sun is shining through the trees near a waterfall.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
a sandy beach with palm trees on the shore, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sandy beach with palm trees on the shore.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
an aerial view of a body of water and a beach, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a body of water and a beach.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy field that has trees in the grass, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy field that has trees in the grass.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a foggy landscape with trees and hills in the distance, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a foggy landscape with trees and hills in the distance.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a large wave in the ocean with a lot of spray coming from it, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large wave in the ocean with a lot of spray coming from it.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a waterfall is shown in the middle of a lush green hillside, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a waterfall is shown in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
an aerial view of a curvy road in the middle of a forest, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an aerial view of a curvy road in the middle of a forest.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a mountain covered in snow with evergreen trees, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a mountain covered in snow with evergreen trees.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a very large waterfall in the middle of the day, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a very large waterfall in the middle of the day.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera pans left{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera pans right{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera tilts up{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera tilts down{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera zooms in{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera zooms out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a large waterfall in the middle of a lush green hillside, camera static{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large waterfall in the middle of a lush green hillside.jpg", "mask_strategy": "0"}
a brown bear in the water with a fish in its mouth{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a brown bear in the water with a fish in its mouth.jpg", "mask_strategy": "0"}
a close-up of a hippopotamus eating grass in a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up of a hippopotamus eating grass in a field.jpg", "mask_strategy": "0"}
a sea turtle swimming in the ocean under the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sea turtle swimming in the ocean under the water.jpg", "mask_strategy": "0"}
two bees are flying over a lavender plant{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two bees are flying over a lavender plant.jpg", "mask_strategy": "0"}
the otter is standing in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the otter is standing in the water.jpg", "mask_strategy": "0"}
a dog carrying a soccer ball in its mouth{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dog carrying a soccer ball in its mouth.jpg", "mask_strategy": "0"}
an eagle is flying over a mountain with trees in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an eagle is flying over a mountain with trees in the background.jpg", "mask_strategy": "0"}
a couple of horses are running in the dirt{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of horses are running in the dirt.jpg", "mask_strategy": "0"}
a highland cow with long horns standing in a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a highland cow with long horns standing in a field.jpg", "mask_strategy": "0"}
a monkey is holding a banana in its mouth{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monkey is holding a banana in its mouth.jpg", "mask_strategy": "0"}
a large rhino grazing in the grass near a bush{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large rhino grazing in the grass near a bush.jpg", "mask_strategy": "0"}
a butterfly sits on top of a purple flower{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a butterfly sits on top of a purple flower.jpg", "mask_strategy": "0"}
an alligator is covered in green plants in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an alligator is covered in green plants in the water.jpg", "mask_strategy": "0"}
a red panda eating bamboo in a zoo{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red panda eating bamboo in a zoo.jpg", "mask_strategy": "0"}
a monochromatic video capturing a cat's gaze into the camera{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a monochromatic video capturing a cat's gaze into the camera.jpg", "mask_strategy": "0"}
a frog sitting on top of water lily leaves{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a frog sitting on top of water lily leaves.jpg", "mask_strategy": "0"}
a lion is roaring in the wild{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lion is roaring in the wild.jpg", "mask_strategy": "0"}
a seagull is flying towards a person's hand{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a seagull is flying towards a person's hand.jpg", "mask_strategy": "0"}
a yellow and white jellyfish is floating in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a yellow and white jellyfish is floating in the ocean.jpg", "mask_strategy": "0"}
a group of jellyfish swimming in an aquarium{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of jellyfish swimming in an aquarium.jpg", "mask_strategy": "0"}
a clown fish hiding in a purple anemone{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a clown fish hiding in a purple anemone.jpg", "mask_strategy": "0"}
a snake sitting on the ground next to a bowl{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snake sitting on the ground next to a bowl.jpg", "mask_strategy": "0"}
a brown and white cow eating hay{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a brown and white cow eating hay.jpg", "mask_strategy": "0"}
a seal swimming in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a seal swimming in the water.jpg", "mask_strategy": "0"}
a panda bear is eating a piece of bamboo{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a panda bear is eating a piece of bamboo.jpg", "mask_strategy": "0"}
a small bird sits on a moss covered branch{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a small bird sits on a moss covered branch.jpg", "mask_strategy": "0"}
a bird with a fish in its beak flying over a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bird with a fish in its beak flying over a field.jpg", "mask_strategy": "0"}
a large flock of birds flying in the sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large flock of birds flying in the sky.jpg", "mask_strategy": "0"}
a bald eagle flying over a tree filled forest{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bald eagle flying over a tree filled forest.jpg", "mask_strategy": "0"}
a giraffe walking in a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a giraffe walking in a field.jpg", "mask_strategy": "0"}
a lioness yawning in a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a lioness yawning in a field.jpg", "mask_strategy": "0"}
a little crab scurried on the sandy beach{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a little crab scurried on the sandy beach.jpg", "mask_strategy": "0"}
a warthog is walking in the grass{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a warthog is walking in the grass.jpg", "mask_strategy": "0"}
a penguin walking on a beach near the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a penguin walking on a beach near the water.jpg", "mask_strategy": "0"}
a tiger walking through a wooded area{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a tiger walking through a wooded area.jpg", "mask_strategy": "0"}
a tiger walking on a dirt path in the woods{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a tiger walking on a dirt path in the woods.jpg", "mask_strategy": "0"}
a small monkey holding a piece of food in it's mouth{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a small monkey holding a piece of food in it's mouth.jpg", "mask_strategy": "0"}
a squirrel sitting on the ground eating a piece of bread{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a squirrel sitting on the ground eating a piece of bread.jpg", "mask_strategy": "0"}
a group of fish swimming over a coral reef{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of fish swimming over a coral reef.jpg", "mask_strategy": "0"}
a toad is sitting on top of some moss{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a toad is sitting on top of some moss.jpg", "mask_strategy": "0"}
a great white shark swimming in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a great white shark swimming in the ocean.jpg", "mask_strategy": "0"}
a group of camels resting in the desert{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of camels resting in the desert.jpg", "mask_strategy": "0"}
two sheep grazing in the grass next to a wooden bridge{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two sheep grazing in the grass next to a wooden bridge.jpg", "mask_strategy": "0"}
an elephant walking through a forest{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an elephant walking through a forest.jpg", "mask_strategy": "0"}
a white rooster standing in a grassy field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a white rooster standing in a grassy field.jpg", "mask_strategy": "0"}
a zebra walking across a dirt road near a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a zebra walking across a dirt road near a field.jpg", "mask_strategy": "0"}
cars are driving down a street lined with tall trees{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/cars are driving down a street lined with tall trees.jpg", "mask_strategy": "0"}
the cars on the street are waiting for the traffic lights{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the cars on the street are waiting for the traffic lights.jpg", "mask_strategy": "0"}
a bicycle leaning against a fence in the snow{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bicycle leaning against a fence in the snow.jpg", "mask_strategy": "0"}
a blue fishing boat is navigating in the ocean next to a cruise ship{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue fishing boat is navigating in the ocean next to a cruise ship.jpg", "mask_strategy": "0"}
a blue car driving down a dirt road near train tracks{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue car driving down a dirt road near train tracks.jpg", "mask_strategy": "0"}
a sailboat is drifting on the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sailboat is drifting on the ocean.jpg", "mask_strategy": "0"}
a couple of boats floating on a body of water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a couple of boats floating on a body of water.jpg", "mask_strategy": "0"}
a city street with cars driving in the rain{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city street with cars driving in the rain.jpg", "mask_strategy": "0"}
a red and white tram traveling down a snowy street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red and white tram traveling down a snowy street.jpg", "mask_strategy": "0"}
a city bus driving down a snowy street at night{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a city bus driving down a snowy street at night.jpg", "mask_strategy": "0"}
a green toy car is sitting on the ground{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a green toy car is sitting on the ground.jpg", "mask_strategy": "0"}
a train traveling down tracks through the woods with leaves on the ground{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a train traveling down tracks through the woods with leaves on the ground.jpg", "mask_strategy": "0"}
a man in a small boat fishing in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man in a small boat fishing in the ocean.jpg", "mask_strategy": "0"}
an airplane is flying through the sky at sunset{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an airplane is flying through the sky at sunset.jpg", "mask_strategy": "0"}
an old rusty car sits in the middle of a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an old rusty car sits in the middle of a field.jpg", "mask_strategy": "0"}
a motorcycle driving down a road{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a motorcycle driving down a road.jpg", "mask_strategy": "0"}
a blue train traveling through a lush green area{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a blue train traveling through a lush green area.jpg", "mask_strategy": "0"}
a white car is swiftly driving on a dirt road near a bush, kicking up dust{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a white car is swiftly driving on a dirt road near a bush, kicking up dust.jpg", "mask_strategy": "0"}
a large cargo ship sailing in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large cargo ship sailing in the water.jpg", "mask_strategy": "0"}
the red Alfa sports car is speeding down the road{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/the red Alfa sports car is speeding down the road.jpg", "mask_strategy": "0"}
two cars that have been involved in a violent collision{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two cars that have been involved in a violent collision.jpg", "mask_strategy": "0"}
a red double decker bus driving down a street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a red double decker bus driving down a street.jpg", "mask_strategy": "0"}
A red sports car driving through sand, kicking up a large amount of dust{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A red sports car driving through sand, kicking up a large amount of dust.jpg", "mask_strategy": "0"}
a yellow toy car parked on a rock near the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a yellow toy car parked on a rock near the water.jpg", "mask_strategy": "0"}
a space shuttle taking off into the sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a space shuttle taking off into the sky.jpg", "mask_strategy": "0"}
a steam train traveling through the woods{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a steam train traveling through the woods.jpg", "mask_strategy": "0"}
a group of buses parked at a bus station{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of buses parked at a bus station.jpg", "mask_strategy": "0"}
A bunch of cars are driving on a highway{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A bunch of cars are driving on a highway.jpg", "mask_strategy": "0"}
a white and blue airplane flying in the sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a white and blue airplane flying in the sky.jpg", "mask_strategy": "0"}
A space station orbited above the Earth{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A space station orbited above the Earth.jpg", "mask_strategy": "0"}
A yellow boat is cruising in front of a bridge{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A yellow boat is cruising in front of a bridge.jpg", "mask_strategy": "0"}
tangerines in a metal bowl on a table{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/tangerines in a metal bowl on a table.jpg", "mask_strategy": "0"}
a shadow of a hand reaching for a leaf{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a shadow of a hand reaching for a leaf.jpg", "mask_strategy": "0"}
A teddy bear is climbing over a wooden fence{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A teddy bear is climbing over a wooden fence.jpg", "mask_strategy": "0"}
a book on fire with flames coming out of it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a book on fire with flames coming out of it.jpg", "mask_strategy": "0"}
a close-up of a pink rose with water droplets on it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up of a pink rose with water droplets on it.jpg", "mask_strategy": "0"}
a person is cooking meat on a grill with flames{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person is cooking meat on a grill with flames.jpg", "mask_strategy": "0"}
a snowman wearing a santa hat and scarf{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snowman wearing a santa hat and scarf.jpg", "mask_strategy": "0"}
a person holding a sparkler in their hand{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person holding a sparkler in their hand.jpg", "mask_strategy": "0"}
a teddy bear sitting on a moss covered ground{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a teddy bear sitting on a moss covered ground.jpg", "mask_strategy": "0"}
a statue of a lion is sitting on a pedestal{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a statue of a lion is sitting on a pedestal.jpg", "mask_strategy": "0"}
metal balls are suspended in the air{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/metal balls are suspended in the air.jpg", "mask_strategy": "0"}
a close up of a bunch of green grapes{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a bunch of green grapes.jpg", "mask_strategy": "0"}
a close-up view of a green plant with unfurled fronds{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a green plant with unfurled fronds.jpg", "mask_strategy": "0"}
an orange mushroom sitting on top of a tree stump in the woods{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an orange mushroom sitting on top of a tree stump in the woods.jpg", "mask_strategy": "0"}
a stack of pancakes covered in syrup and fruit{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stack of pancakes covered in syrup and fruit.jpg", "mask_strategy": "0"}
a plate of spaghetti with spinach and tomatoes{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a plate of spaghetti with spinach and tomatoes.jpg", "mask_strategy": "0"}
a pink lotus flower in the middle of a pond{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pink lotus flower in the middle of a pond.jpg", "mask_strategy": "0"}
a person holding a sparkler in front of a sunset{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person holding a sparkler in front of a sunset.jpg", "mask_strategy": "0"}
a pink rose is blooming in a garden{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pink rose is blooming in a garden.jpg", "mask_strategy": "0"}
a snow man holding a lantern in the snow{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snow man holding a lantern in the snow.jpg", "mask_strategy": "0"}
a stack of chocolate cookies with a bite taken out of it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a stack of chocolate cookies with a bite taken out of it.jpg", "mask_strategy": "0"}
a white plate topped with eggs, toast, tomatoes, and a sausage{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a white plate topped with eggs, toast, tomatoes, and a sausage.jpg", "mask_strategy": "0"}
a yellow water lily is floating in a pond{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a yellow water lily is floating in a pond.jpg", "mask_strategy": "0"}
an astronaut floating in space with the earth in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an astronaut floating in space with the earth in the background.jpg", "mask_strategy": "0"}
A little girl, lost in thought, is quietly sitting on the bus{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A little girl, lost in thought, is quietly sitting on the bus.jpg", "mask_strategy": "0"}
a man holding a tray in front of a brick wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man holding a tray in front of a brick wall.jpg", "mask_strategy": "0"}
an older man playing a saxophone on the street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an older man playing a saxophone on the street.jpg", "mask_strategy": "0"}
an older man jogging by the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an older man jogging by the water.jpg", "mask_strategy": "0"}
a person riding a skateboard on a concrete floor{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person riding a skateboard on a concrete floor.jpg", "mask_strategy": "0"}
a woman with long black hair is posing for a picture{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman with long black hair is posing for a picture.jpg", "mask_strategy": "0"}
a woman sitting on the ground in front of a guitar{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman sitting on the ground in front of a guitar.jpg", "mask_strategy": "0"}
a little girl wearing a purple helmet riding a blue bike{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a little girl wearing a purple helmet riding a blue bike.jpg", "mask_strategy": "0"}
a young boy is jumping in the mud{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a young boy is jumping in the mud.jpg", "mask_strategy": "0"}
a man sitting in the driver's seat of a car wearing sunglasses{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man sitting in the driver's seat of a car wearing sunglasses.jpg", "mask_strategy": "0"}
a little boy jumping in the air over a puddle of water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a little boy jumping in the air over a puddle of water.jpg", "mask_strategy": "0"}
a woman with afro hair is smiling while wearing earphones{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman with afro hair is smiling while wearing earphones.jpg", "mask_strategy": "0"}
a smiling woman with her hands clasped{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a smiling woman with her hands clasped.jpg", "mask_strategy": "0"}
a young boy standing in a field with horses in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a young boy standing in a field with horses in the background.jpg", "mask_strategy": "0"}
a young man is covered in colored powder{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a young man is covered in colored powder.jpg", "mask_strategy": "0"}
a woman with curly hair is drinking a beer{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman with curly hair is drinking a beer.jpg", "mask_strategy": "0"}
an old man standing in the middle of a field holding a bunch of plants{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an old man standing in the middle of a field holding a bunch of plants.jpg", "mask_strategy": "0"}
a man standing on a boat with a net{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man standing on a boat with a net.jpg", "mask_strategy": "0"}
a woman in a hat is putting salt into a basket{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman in a hat is putting salt into a basket.jpg", "mask_strategy": "0"}
a young girl smelling a pink flower{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a young girl smelling a pink flower.jpg", "mask_strategy": "0"}
a young boy leaning on a wooden pole{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a young boy leaning on a wooden pole.jpg", "mask_strategy": "0"}
a man in a hat sitting in front of a brick oven{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man in a hat sitting in front of a brick oven.jpg", "mask_strategy": "0"}
a man in a mexican outfit holding an acoustic guitar{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man in a mexican outfit holding an acoustic guitar.jpg", "mask_strategy": "0"}
a snowboarder is in the air doing a trick{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a snowboarder is in the air doing a trick.jpg", "mask_strategy": "0"}
a man riding a horse with a spear in his hand{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man riding a horse with a spear in his hand.jpg", "mask_strategy": "0"}
a woman carrying a bundle of plants over their head{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman carrying a bundle of plants over their head.jpg", "mask_strategy": "0"}
a person jumping in the air over a fence{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person jumping in the air over a fence.jpg", "mask_strategy": "0"}
a man on a surfboard riding a wave in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man on a surfboard riding a wave in the ocean.jpg", "mask_strategy": "0"}
a man sitting on steps playing an acoustic guitar{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man sitting on steps playing an acoustic guitar.jpg", "mask_strategy": "0"}
a man swinging a tennis racquet at a tennis ball{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man swinging a tennis racquet at a tennis ball.jpg", "mask_strategy": "0"}
a man riding a mountain bike on top of a rocky hill{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man riding a mountain bike on top of a rocky hill.jpg", "mask_strategy": "0"}
a man riding a bike down a street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man riding a bike down a street.jpg", "mask_strategy": "0"}
a man is running on a dirt road{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man is running on a dirt road.jpg", "mask_strategy": "0"}
A man in a black suit and a sombrero, shouting loudly{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A man in a black suit and a sombrero, shouting loudly.jpg", "mask_strategy": "0"}
a man standing on top of a sand dune in the desert{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man standing on top of a sand dune in the desert.jpg", "mask_strategy": "0"}
a person riding a motorcycle down a road{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person riding a motorcycle down a road.jpg", "mask_strategy": "0"}
a man standing on top of a mountain with a backpack{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man standing on top of a mountain with a backpack.jpg", "mask_strategy": "0"}
a man with a skull face paint smoking a cigar and holding a guitar{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man with a skull face paint smoking a cigar and holding a guitar.jpg", "mask_strategy": "0"}
a man in sunglasses laying on a wooden bench{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man in sunglasses laying on a wooden bench.jpg", "mask_strategy": "0"}
an older woman sitting in a room with a cigarette in her hand{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an older woman sitting in a room with a cigarette in her hand.jpg", "mask_strategy": "0"}
a man sitting on the ground playing a musical instrument{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man sitting on the ground playing a musical instrument.jpg", "mask_strategy": "0"}
a person riding a horse in a polo match{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person riding a horse in a polo match.jpg", "mask_strategy": "0"}
a woman in a kimono holding an umbrella{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman in a kimono holding an umbrella.jpg", "mask_strategy": "0"}
a person riding a dirt bike{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person riding a dirt bike.jpg", "mask_strategy": "0"}
a person riding an atv on a dirt track{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person riding an atv on a dirt track.jpg", "mask_strategy": "0"}
a person riding a wave on a surfboard{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person riding a wave on a surfboard.jpg", "mask_strategy": "0"}
a woman in a wetsuit is swimming in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman in a wetsuit is swimming in the ocean.jpg", "mask_strategy": "0"}
a man snorkling in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man snorkling in the ocean.jpg", "mask_strategy": "0"}
a beautiful woman in a blue sari posing in front of a wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a beautiful woman in a blue sari posing in front of a wall.jpg", "mask_strategy": "0"}
a woman wearing a shawl in front of a mountain{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman wearing a shawl in front of a mountain.jpg", "mask_strategy": "0"}
a woman is making bread in an oven{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman is making bread in an oven.jpg", "mask_strategy": "0"}
a woman smiles while holding a yellow flower{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman smiles while holding a yellow flower.jpg", "mask_strategy": "0"}
A young boy is lifting a bundle of dry grass wrapped in waterproof fabric over his head{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A young boy is lifting a bundle of dry grass wrapped in waterproof fabric over his head.jpg", "mask_strategy": "0"}
two people performing a sword fight in front of a forest{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people performing a sword fight in front of a forest.jpg", "mask_strategy": "0"}
a woman in a colorful shirt is cooking food{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman in a colorful shirt is cooking food.jpg", "mask_strategy": "0"}
an older woman is drinking a bottle of water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an older woman is drinking a bottle of water.jpg", "mask_strategy": "0"}
a smiling woman sitting at a table with food and drinks{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a smiling woman sitting at a table with food and drinks.jpg", "mask_strategy": "0"}
a woman wearing a hijab reading a book on the beach{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman wearing a hijab reading a book on the beach.jpg", "mask_strategy": "0"}
a woman wearing a headscarf is reaching for an olive tree{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman wearing a headscarf is reaching for an olive tree.jpg", "mask_strategy": "0"}
a woman in a white dress jumping in the air in a field of pink flowers{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman in a white dress jumping in the air in a field of pink flowers.jpg", "mask_strategy": "0"}
a woman wearing a conical hat sits on a boat{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman wearing a conical hat sits on a boat.jpg", "mask_strategy": "0"}
an older woman sitting in front of an old building{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an older woman sitting in front of an old building.jpg", "mask_strategy": "0"}
a woman is praying in front of a buddhist temple{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman is praying in front of a buddhist temple.jpg", "mask_strategy": "0"}
a woman with green hair smiling for the camera{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman with green hair smiling for the camera.jpg", "mask_strategy": "0"}
A group of people in a yellow raft is rowing through turbulent waters{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A group of people in a yellow raft is rowing through turbulent waters.jpg", "mask_strategy": "0"}
a man carrying a woman on his back in a field{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man carrying a woman on his back in a field.jpg", "mask_strategy": "0"}
an indian police officer talking to an old woman{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an indian police officer talking to an old woman.jpg", "mask_strategy": "0"}
two people scuba diving in the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two people scuba diving in the ocean.jpg", "mask_strategy": "0"}
A man and woman dressed as sugar skulls in a field of flowers, sharing a loving gaze with each other{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A man and woman dressed as sugar skulls in a field of flowers, sharing a loving gaze with each other.jpg", "mask_strategy": "0"}
a group of people watching a cow race{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people watching a cow race.jpg", "mask_strategy": "0"}
a man and a child riding bumper cars in an amusement park{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man and a child riding bumper cars in an amusement park.jpg", "mask_strategy": "0"}
a group of motorcyclists racing on a dirt track{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of motorcyclists racing on a dirt track.jpg", "mask_strategy": "0"}
a man and a woman are boxing in a boxing ring{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man and a woman are boxing in a boxing ring.jpg", "mask_strategy": "0"}
a man holding a baby in his arms{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man holding a baby in his arms.jpg", "mask_strategy": "0"}
a man and a woman sitting on a bench playing instruments{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man and a woman sitting on a bench playing instruments.jpg", "mask_strategy": "0"}
two men are standing next to each other with a bicycle{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two men are standing next to each other with a bicycle.jpg", "mask_strategy": "0"}
a man and a boy sitting on a beach near the ocean{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man and a boy sitting on a beach near the ocean.jpg", "mask_strategy": "0"}
two men in white clothing standing next to each other{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two men in white clothing standing next to each other.jpg", "mask_strategy": "0"}
a group of men riding horses in a dusty arena{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of men riding horses in a dusty arena.jpg", "mask_strategy": "0"}
a soccer player in a yellow and black shirt is chasing a soccer ball{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a soccer player in a yellow and black shirt is chasing a soccer ball.jpg", "mask_strategy": "0"}
a group of women sitting on the steps of a building{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of women sitting on the steps of a building.jpg", "mask_strategy": "0"}
a group of people gathered around a red checkered blanket{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people gathered around a red checkered blanket.jpg", "mask_strategy": "0"}
a group of people in orange jumpsuits running along a river{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people in orange jumpsuits running along a river.jpg", "mask_strategy": "0"}
a woman walking down a sidewalk with a bag{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman walking down a sidewalk with a bag.jpg", "mask_strategy": "0"}
a busy street with cars and people on motorcycles{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a busy street with cars and people on motorcycles.jpg", "mask_strategy": "0"}
a man in a mask is walking through a crowd of people{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man in a mask is walking through a crowd of people.jpg", "mask_strategy": "0"}
a man and a woman walking under an umbrella next to a brick wall{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a man and a woman walking under an umbrella next to a brick wall.jpg", "mask_strategy": "0"}
a group of people riding bikes down a street{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of people riding bikes down a street.jpg", "mask_strategy": "0"}
An old person is holding a cup on the street, and people around are curiously looking at him{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/An old person is holding a cup on the street, and people around are curiously looking at him.jpg", "mask_strategy": "0"}
two young girls playing with leaves in the woods{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two young girls playing with leaves in the woods.jpg", "mask_strategy": "0"}
One person is riding on the back of a horse led by another person{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/One person is riding on the back of a horse led by another person.jpg", "mask_strategy": "0"}
an older woman and a young girl are knitting together{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/an older woman and a young girl are knitting together.jpg", "mask_strategy": "0"}
three geishas walking down the street in traditional clothing{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/three geishas walking down the street in traditional clothing.jpg", "mask_strategy": "0"}
two men riding bikes down a road near a forest{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two men riding bikes down a road near a forest.jpg", "mask_strategy": "0"}
two women carrying bowls on their heads{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two women carrying bowls on their heads.jpg", "mask_strategy": "0"}
two women eating pizza at a restaurant{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two women eating pizza at a restaurant.jpg", "mask_strategy": "0"}
two young women studying in a library{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two young women studying in a library.jpg", "mask_strategy": "0"}
pink water lilies in a pond with leaves{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/pink water lilies in a pond with leaves.jpg", "mask_strategy": "0"}
a group of succulents in a rock garden{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of succulents in a rock garden.jpg", "mask_strategy": "0"}
a close up view of a bunch of snowdrop flowers{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up view of a bunch of snowdrop flowers.jpg", "mask_strategy": "0"}
a close up of leaves with water droplets on them{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of leaves with water droplets on them.jpg", "mask_strategy": "0"}
a close-up of a sea anemone in the water{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up of a sea anemone in the water.jpg", "mask_strategy": "0"}
a plant with water droplets on it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a plant with water droplets on it.jpg", "mask_strategy": "0"}
a group of cactus plants in the desert{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of cactus plants in the desert.jpg", "mask_strategy": "0"}
a close-up view of a plant with spiky leaves{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a plant with spiky leaves.jpg", "mask_strategy": "0"}
A budding and blossoming flower bud seedling{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A budding and blossoming flower bud seedling.jpg", "mask_strategy": "0"}
a field of orange flowers near the ocean'{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a field of orange flowers near the ocean'.jpg", "mask_strategy": "0"}
a close-up view of a bunch of pink flowers{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close-up view of a bunch of pink flowers.jpg", "mask_strategy": "0"}
pink water lilies in a pond{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/pink water lilies in a pond.jpg", "mask_strategy": "0"}
reeds blowing in the wind against a cloudy sky{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/reeds blowing in the wind against a cloudy sky.jpg", "mask_strategy": "0"}
two tall cacti in the middle of the desert{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two tall cacti in the middle of the desert.jpg", "mask_strategy": "0"}
a sea anemone on a coral reef{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a sea anemone on a coral reef.jpg", "mask_strategy": "0"}
a dandelion blowing in the wind{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a dandelion blowing in the wind.jpg", "mask_strategy": "0"}
A boiling pot cooking vegetables{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A boiling pot cooking vegetables.jpg", "mask_strategy": "0"}
a woman stirring food in a pan on the stove{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a woman stirring food in a pan on the stove.jpg", "mask_strategy": "0"}
two eggs are fried in a frying pan on the stove{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/two eggs are fried in a frying pan on the stove.jpg", "mask_strategy": "0"}
fried onion rings in a basket{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/fried onion rings in a basket.jpg", "mask_strategy": "0"}
a pot is sitting on top of a campfire{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pot is sitting on top of a campfire.jpg", "mask_strategy": "0"}
a chef is preparing a dish with mushrooms on a wooden board{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a chef is preparing a dish with mushrooms on a wooden board.jpg", "mask_strategy": "0"}
a hand holding a slice of pizza{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a hand holding a slice of pizza.jpg", "mask_strategy": "0"}
A person is using tongs to pick up meat from a plate{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A person is using tongs to pick up meat from a plate.jpg", "mask_strategy": "0"}
The meat is picked up from the grill with tongs{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/The meat is picked up from the grill with tongs.jpg", "mask_strategy": "0"}
A person is whisking eggs, and the egg whites and yolks are gently streaming out{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A person is whisking eggs, and the egg whites and yolks are gently streaming out.jpg", "mask_strategy": "0"}
a person is putting sauce on a burger{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person is putting sauce on a burger.jpg", "mask_strategy": "0"}
A person is making dumplings{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A person is making dumplings.jpg", "mask_strategy": "0"}
a pan filled with fried food{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a pan filled with fried food.jpg", "mask_strategy": "0"}
Chopsticks are slowly picking up the buns from the plastic container{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Chopsticks are slowly picking up the buns from the plastic container.jpg", "mask_strategy": "0"}
a basket of french fries in a fryer{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a basket of french fries in a fryer.jpg", "mask_strategy": "0"}
a table with lobsters and drinks on it{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a table with lobsters and drinks on it.jpg", "mask_strategy": "0"}
a person pouring coffee into a pot on a stove{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person pouring coffee into a pot on a stove.jpg", "mask_strategy": "0"}
a kettle is sitting on top of a campfire{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a kettle is sitting on top of a campfire.jpg", "mask_strategy": "0"}
Chopsticks are picking up noodles from the bowl{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/Chopsticks are picking up noodles from the bowl.jpg", "mask_strategy": "0"}
a person is cooking eggs on an outdoor grill{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person is cooking eggs on an outdoor grill.jpg", "mask_strategy": "0"}
a person is cooking food in a wok on a stove{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person is cooking food in a wok on a stove.jpg", "mask_strategy": "0"}
a person is holding up a burger with his hands{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person is holding up a burger with his hands.jpg", "mask_strategy": "0"}
A person is pouring water into a teacup{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/A person is pouring water into a teacup.jpg", "mask_strategy": "0"}
a person pouring seasoning into a pot of food{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person pouring seasoning into a pot of food.jpg", "mask_strategy": "0"}
a person holding a taco in their hand{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person holding a taco in their hand.jpg", "mask_strategy": "0"}
a person slicing salmon on a cutting board{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person slicing salmon on a cutting board.jpg", "mask_strategy": "0"}
a bunch of food is cooking on a grill over an open fire{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a bunch of food is cooking on a grill over an open fire.jpg", "mask_strategy": "0"}
a close up of a piece of sushi on chopsticks{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a close up of a piece of sushi on chopsticks.jpg", "mask_strategy": "0"}
a group of pots on a stove with flames in the background{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a group of pots on a stove with flames in the background.jpg", "mask_strategy": "0"}
a person cooking vegetables in a pan on a stove{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person cooking vegetables in a pan on a stove.jpg", "mask_strategy": "0"}
a large pot of soup filled with vegetables and meat{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a large pot of soup filled with vegetables and meat.jpg", "mask_strategy": "0"}
a person holding chopsticks over a bowl of food{"reference_path": "/mnt/jfs-hdd/sora/data/vbench-i2v/crop/1-1/a person holding chopsticks over a bowl of food.jpg", "mask_strategy": "0"}

+ 0
- 100
assets/texts/VBench/prompts_per_category/animal.txt View File

@@ -1,100 +0,0 @@
a black dog wearing halloween costume
spider making a web
bat eating fruits while hanging
a snake crawling on a wooden flooring
a close up video of a dragonfly
macro shot of ladybug on green leaf plant
chameleon eating ant
a bee feeding on nectars
bird nests on a tree captured with moving camera
a squirrel eating nuts
close up video of snail
top view of a hermit crab crawling on a wooden surface
cat licking another cat
red dragonfly perched on green leaf
close up view of a brown caterpillar crawling on green leaf
ants eating dead spider
an eagle on a tree branch
a frog eating an ant
white rabbit near the fence
a gorilla eating a carrot
close up of wolf
a meerkat looking around
a hyena in a zoo
lemur eating grass leaves
an owl being trained by a man
a lizard on a bamboo
brown chicken hunting for its food
video of parrots perched on bird stand
underwater footage of an octopus in a coral reef
a cute pomeranian dog playing with a soccer ball
white fox on rock
close up footage of a horse figurine
giraffe feeding on a tree in a savannah
curious cat sitting and looking around
hummingbird hawk moth flying near pink flowers
close up of a scorpion on a rock
close up on fish in net
koala eating leaves from a branch
a pod of dolphins swirling in the sea catching forage fish
low angle view of a hawk perched on a tree branch
a lion standing on wild grass
deer grazing in the field
elephant herd in a savanna
close up on lobster under water
hedgehog crossing road in forest
a sheep eating yellow flowers from behind a wire fence
twin sisters and a turtle
a pig wallowing in mud
flock of goose eating on the lake water
cow in a field irritated with flies
a close up shot of a fly
cheetah lying on the grass
close up of a lemur
close up shot of a kangaroo itching in the sand
a tortoise covered with algae
turkey in cage
a great blue heron bird in the lakeside
crab with shell in aquarium
a seagull walking on shore
an american crocodile
a tiger walking inside a cage
alligator in the nature
a raccoon climbing a tree
wild rabbit in a green meadow
group of ring tailed lemurs
a clouded leopard on a tree branch
duck grooming its feathers
an african penguin walking on a beach
a video of a peacock
close up shot of a wild bear
baby rhino plays with mom
porcupine climbs tree branches
close up of a natterjack toad on a rock
a sleeping orangutan
mother whale swimming with babies
a bear wearing red jersey
pink jellyfish swimming underwater in a blue sea
beautiful clown fish swimming
animation of disposable objects shaped as a whale
paper cut out of a pair of hands a whale and a heart
vertical video of camel roaming in the field during daytime
a still video of mosquito biting human
a curious sloth hanging from a tree branch
a plastic flamingo bird stumbles from the wind
a wolf in its natural habitat
a monkey sitting in the stone and scratching his head
bat hanging upside down
a red panda eating leaves
snake on ground
a harbour seal swimming near the shore
shark swimming in the sea
otter on branch while eating
goat standing over a rock
a troop of monkey on top of a mountain
a zebra eating grass on the field
a colorful butterfly perching on a bud
a snail crawling on a leaf
zookeeper showering a baby elephant
a beetle emerging from the sand
a nine banded armadillo searching for food

+ 0
- 100
assets/texts/VBench/prompts_per_category/architecture.txt View File

@@ -1,100 +0,0 @@
an apartment building with balcony
asian garden and medieval castle
illuminated tower in berlin
a wooden house overseeing the lake
a crowd of people in a plaza in front of a government building
a church interior
jewish friends posing with hanukkah menorah in a cabin house
a destroyed building after a missile attack in ukraine
abandoned building in the woods
drone video of an abandoned school building in pripyat ukraine
elegant university building
architecture and designs of buildings in central london
a pancake tower with chocolate syrup and strawberries on top
an ancient white building
friends hanging out at a coffee house
house front door with christmas decorations
city night dark building
a bird house hanging on a tree branch
sacred sculpture in a temple
high angle shot of a clock tower
modern wooden house interior
the interior of an abandoned building
opera house overlooking sea
a concrete structure near the green trees
dome like building in scotland
low angle shot of a building
tower on hill
a miniature house
eiffel tower from the seine river
low angle footage of an apartment building
island with pier and antique building
asian historic architecture
drone footage of a beautiful mansion
mosque in the middle east
building a tent and hammock in the forest camping site
top view of a high rise building
house covered in snow
skyscraper at night
house in village
a casino with people outside the building
silhouette of a building
a woman climbing a tree house
drone view of house near lake during golden hour
an under construction concrete house
a watch tower by the sea
exterior view of arabic style building
video of a hotel building
red paper lantern decorations hanging outside a building
house on seashore
aerial footage of the palace of culture and science building in warsaw poland
aerial video of stuttgart tv tower in germany
aerial view of the highway and building in a city
drone shot of a skyscraper san francisco california usa
waterfall and house
view of the sky through a building
drone footage of a house on top of the mountain
abandoned house in the nature
clouds hovering over a mansion
light house on the ocean
buddhist temple at sunrise
people walking by a graveyard near a mosque at sunset
view of lifeguard tower on the beach
scenic view of a house in the mountains
the landscape in front of a government building
aerial footage of a building and its surrounding landscape in winter
time lapse of a cloudy sky behind a transmission tower
blue ocean near the brown castle
fog over temple
house in countryside top view
building under construction
turkish flag waving on old tower
the georgian building
close up shot of a steel structure
the atrium and interior design of a multi floor building
city view reflected on a glass building
aerial view of a luxurious house with pool
an unpaved road leading to the house
drone footage of a lookout tower in mountain landscape
wind turbines on hill behind building
time lapse footage of the sun light in front of a small house porch
a building built with lots of stairways
overcast over house on seashore
the view of the sydney opera house from the other side of the harbor
candle on a jar and a house figurine on a surface
video of a farm and house
a dilapidated building made of bricks
a view of a unique building from a moving vehicle
aerial footage of a tall building in cambodia
push in shot of a huge house
a beach house built over a seawall protected from the sea waves
exotic house surrounded by trees
drone video of a house surrounded by tropical vegetation
drone footage of a building beside a pond
observation tower on hill in forest
a tree house in the woods
a video of vessel structure during daytime
fire in front of illuminated building at night
a footage of a wooden house on a wheat field
tilt shot of a solar panel below a light tower
water tower on the desert

+ 0
- 100
assets/texts/VBench/prompts_per_category/food.txt View File

@@ -1,100 +0,0 @@
freshly baked finger looking cookies
video of fake blood in wine glass
halloween food art
a person slicing a vegetable
a serving of pumpkin dish in a plate
close up view of green leafy vegetable
a birthday cake in the plate
video of a slice papaya fruit
a muffin with a burning candle and a love sign by a ceramic mug
a jack o lantern designed cookie
baked bread with chocolate
a broccoli soup on wooden table
a freshly brewed coffee on a pink mug
grabbing sourdough neapolitan style pizza slices
person cooking mushrooms in frying pan
rice grains placed on a reusable cloth bag
slices of kiwi fruit
grilling a steak on a pan grill
close up of bread popping out of a toaster
man eating noodle
preparing a cocktail drink
close up pasta with bacon on plate
milk and cinnamon rolls
boy getting a dumpling using chopsticks
a mother preparing food with her kids
man using his phone while eating
fresh salmon salad on a plate
cutting cucumbers into long thin slices as ingredient for sushi roll
a steaming cup of tea by the window
a glass filled with beer
a kid eating popcorn while watching tv
close up shot of fried fish on the plate
a man eating a donut
person making a vegetarian dish
spreading cheese on bagel
close up view of a man drinking red wine
a couple having breakfast in a restaurant
a student eating her sandwich
girl peeling a banana
red rice in a small bowl
pancake with blueberry on the top
green apple fruit on white wooden table
a man eating a taco by the bar
making of a burrito
squeezing lemon into salad
a chef cutting sushi rolls
video of a delicious dessert
deep frying a crab on a wok in high fire
close up video of a orange juice
video of a cooked chicken breast
woman holding a pineapple
a woman eating a bar of chocolate
decorating christmas cookie
squeezing a slice of fruit
tuna sashimi on a plate
a strawberry fruit mixed in an alcoholic drink
preparing hot dogs in a grill
a woman cutting a tomato
an orange fruit cut in half
a coconut fruit with drinking straw
woman holding a dragon fruit
a woman pouring hot beverage on a cup
waffles with whipped cream and fruit
focus shot of an insect at the bottom of a fruit
preparing a healthy broccoli dish
man eating snack at picnic
close up video of a grilled shrimp skewer
a woman mixing a smoothie drinks
close up video of woman having a bite of jelly
businessman drinking whiskey at the bar counter of a hotel lounge
cutting an onion with a knife over a wooden chopping board
fresh lemonade in bottles
grilling a meat on a charcoal grill
people enjoying asian cuisine
close up footage of a hot dish on a clay pot
pork ribs dish
waffle with strawberry and syrup for breakfast
tofu dish with rose garnish
uncooked pork meat
egg yolk being dumped over gourmet dish
tasty brunch dish close up
little boy pretending to eat the watermelon
slicing roasted beef
close up of a chef adding teriyaki sauce to a dish
flat lay mexican dish
a person placing an octopus dish on a marble surface
close up of tea leaves brewing in a glass kettle
adding fresh herbs to soup dish
a scoop of roasted coffee beans
fresh dim sum set up on a bamboo steam tray for cooking
a girl putting ketchup on food at the kitchen
cooking on electric stove
a woman with a slice of a pie
grapes and wine on a wooden board
man taking picture of his food
hamburger and fries on restaurant table
close up video of japanese food
a cracker sandwich with cheese filling for snack
barista preparing matcha tea
close up of onion rings being deep fried

+ 0
- 100
assets/texts/VBench/prompts_per_category/human.txt View File

@@ -1,100 +0,0 @@
people carving a pumpkin
people sitting on a sofa
a man with a muertos face painting
man walking in the dark
men in front of their computer editing photos
men loading christmas tree on tow truck
woman washing the dishes
woman adding honey to the cinnamon rolls
two women kissing and smiling
three women looking at watercolor paintings
a family wearing paper bag masks
a family posing for the camera
a boy covering a rose flower with a dome glass
boy sitting on grass petting a dog
a girl in her tennis sportswear
a girl coloring the cardboard
silhouette of the couple during sunset
couple dancing with body paint
a child playing with water
a woman with her child sitting on a couch in the living room
a group of friend place doing hand gestures of agreement
friends having a group selfie
friends talking while on the basketball court
group of people protesting
a group of campers with a cute dog
a group of photographers taking pictures at the north western gardens in llandudno north wales
a group of students laughing and talking
a group of martial artist warming up
a person playing golf
a person walking on a wet wooden bridge
person doing a leg exercise
ice hockey athlete on rink
a young athlete training in swimming
chess player dusting a chessboard
baseball player holding his bat
a bearded man putting a vinyl record on a vinyl player
an orchestra finishes a performance
people applauding the performance of the kids
band performance at the recording studio
father and his children playing jenga game
people playing a board game
man playing a video game
a man video recording the movie in theater
man and a woman eating while watching a movie
movie crew talking together
a director explaining the movie scene
man and woman listening to music on car
man playing music
couple dancing slow dance with sun glare
a ballerina practicing in the dance studio
father and son holding hands
father and daughter talking together
a mother and her kids engaged in a video call
mother and daughter reading a book together
a mother teaching her daughter playing a violin
kid in a halloween costume
a happy kid playing the ukulele
a chef slicing a cucumber
chef wearing his gloves properly
brother and sister using hammock
girl applying sunblock to her brother
a girl pushing the chair while her sister is on the chair
colleagues talking in office building
fighter practice kicking
a woman fighter in her cosplay costume
an engineer holding blueprints while talking with her colleague
a young woman looking at vr controllers with her friend
workmates teasing a colleague in the work
a male police officer talking on the radio
teacher holding a marker while talking
teacher writing on her notebook
a young student attending her online classes
a student showing his classmates his wand
a male vendor selling fruits
a shirtless male climber
a sound engineer listening to music
female talking to a psychiatrist in a therapy session
young female activist posing with flag
a man in a hoodie and woman with a red bandana talking to each other and smiling
a medium close up of women wearing kimonos
a male interviewer listening to a person talking
a social worker having a conversation with the foster parents
a farm worker harvesting onions
worker packing street food
worker and client at barber shop
elderly man lifting kettlebell
mom assisting son in riding a bicycle
dad watching her daughter eat
young guy with vr headset
pregnant woman exercising with trainer
a fortune teller talking to a client
wizard doing a ritual on a woman
a footage of an actor on a movie scene
a man holding a best actor trophy
a singer of a music band
a young singer performing on stage
young dancer practicing at home
seller showing room to a couple
cab driver talking to passenger
a policeman talking to the car driver

+ 0
- 100
assets/texts/VBench/prompts_per_category/lifestyle.txt View File

@@ -1,100 +0,0 @@
kids celebrating halloween at home
little boy helping mother in kitchen
video of a indoor green plant
a girl arranges a christmas garland hanging by the kitchen cabinet
candle burning in dark room
couple having fun and goofing around the bedroom
girls jumping up and down in the bedroom
woman and man in pajamas working from home
a muslim family sitting and talking in the living room
family enjoying snack time while sitting in the living room
woman holding an animal puppet and a little girl playing together at the living room
kids playing in the indoor tent
young people celebrating new year at the office
a woman writing on the sticky note in the office
a woman exercising at home over a yoga mat
girls preparing easter decorations at home
dog on floor in room
turning on a fluorescent light inside a room
colleagues talking to each other near the office windows
a woman recording herself while exercising at home
music room
different kind of tools kept in a utility room
sofa beds and other furniture
a girl finding her brother reading a book in the bedroom
an elegant ceramic plant pot and hanging plant on indoor
furniture inside a bedroom
interior design of the bar section
living room with party decoration
firewood burning in dark room
a young woman playing the ukulele at home
woman painting at home
a woman in a locker room
video of a bathroom interior
the interior design of a jewish synagogue
a woman in protective suit disinfecting the kitchen
modern minimalist home interior
modern interior design of a coffee shop
person arranging minimalist furniture
aerial shot of interior of the warehouse
a room of a manufacturing facility
interior of catholic
interior design of a restaurant
a female model in a changing room looking herself in mirror
men walking in the office hallway
people sitting in a conference room
the interior design of a shopping mall
chandeliers in room
lucerne railway station interior
a female fencer posing in a foggy room
a toolbox and a paint roller beside a huge package in a room
bedroom in hotel
a woman lying in the operating room
a chef holding and checking kitchen utensils
a couple singing in the shower room together
a woman cleaning mess in the living room
an empty meeting room with natural light
person dancing in a dark room
close up on blood in hospital room
a couple resting on their home floor
a young female staff at courier office
a man entering the gym locker room
a bored man sitting by the tv at home
woman dancing in indoor garden
rubble in the interior of an abandoned house
indoor farm in a greenhouse
man doing handstand in indoor garden
an abandoned indoor swimming pool
home decorations on top of a cabinet
graffiti art on the interior walls of an abandoned mansion
indoor wall climbing activity
sunlight inside a room
teenage girl roller skating at indoor rink
home deco with lighted
baby in the shower room
men enjoying office christmas party
a bedroom with a brick wall
actors prepping in the dressing room
kids playing at an indoor playground
a person sanitizing an office space using smoke machine
mother and daughter choosing clothes at home
a woman sitting by the indoor fire pit
man standing on the corner of the room while looking around
person assembling furniture
a family stacking cardboard boxes in a room
family having fun in the dining room
person disinfecting a room
a woman washing strawberries in the kitchen sink
modern office waiting room
close up view of a person slicing with a kitchen knife
boiling coffee on a stove in the kitchen
modern equipment used in a home studio
interior of a recording studio
people working in a call center office
band performing at a home concert
a group of people watching a concert in a room
people packing their furniture
young employees in office holding a certificate
a criminal inside a dark room handcuffed in a table
couple browsing and looking for furniture in the store
workspace at home

+ 0
- 100
assets/texts/VBench/prompts_per_category/plant.txt View File

@@ -1,100 +0,0 @@
video of a indoor green plant
close up view of a plant
close up shot of a burning plant
plucking leaves from plant
a plant on gold pot with glass lid
a branch of a tree and a plant
a leafless tree
close up shot of fern leaf
close up video of strawberry plant
plant with blooming flowers
close up video of flower petals
watering yellow plant
beautiful flower decoration
cannabis flower in a jar
a footage of the tree leaves
a red leaf plant
close up view of a white christmas tree
snow pouring on a tree
close up shot of white flowers on the tree
leaves in the trees daytime
a dead tree lying on a grass field
tree branches in a flowing river
purple flowers with leaves
a coconut tree by the house
close up on flower in winter
bamboo leaves backlit by the sun
close up video of a wet flower
a man putting a flower in a box
dropping flower petals on a wooden bowl
a close up shot of gypsophila flower
variety of succulent plants on a garden
variety of trees and plants in a botanical garden
forest of deciduous trees
a stack of dried leaves burning in a forest
tall forest trees on a misty morning
close up view of dewdrops on a leaf
close up view of white petaled flower
removing a pineapple leaf
a dragonfly perched on a leaf
butterfly pollinating flower
person visiting and checking a corn plant
woman picking beans from a plant
woman plucking mint leaves
single tree in the middle of farmland
a plant on a soil
drone footage of a tree on farm field
a tractor harvesting lavender flower
people putting christmas ornaments on a christmas tree
jack o lantern hanging on a tree
tree with halloween decoration
flower field near the waterfall
truck carrying the tree logs
raindrops falling on leaves
shot of a palm tree swaying with the wind
squirrels on a tree branch
person holding a flower
a fallen tree trunk
tree with golden leaves
cherry tree
wind blows through leaves of the tree in autumn
a leaf on a glass
the long trunks of tall trees in the forest
trees in the forest during sunny day
close up video of tree bark
reflection of tree branches
trunks of many trees in the forest
tree leaves providing shades from the sun
leaves swaying in the wind
low angle shot of baobab tree
bare trees in forest
a plant surrounded by fallen leaves
a couple preparing food and pruning a plant
a man cutting a tree bark
oranges on a tree branch
plant connected on the stones
video of a sawmill machine cutting tree log
women drying flower petals
macro view of an agave plant
a video of a person tying a plant on a string
green moss in forest nature
coconut tree near sea under blue sky
the canopy of a coconut tree
a man leaning on a tree at the beach
a full grown plant on a pot
candle wax dripping on flower petals
close up of leaves in autumn
a woman opening a book with a flower inside
a man holding leaves looking at the camera
a shadow of a swaying plant
a tree and concrete structure under a blue and cloudy sky
trimming excess leaves on a potted plant
the changing color of the tree leaves during autumn season
a gooseberry tree swayed by the wind
forest trees and a medieval castle at sunset
woman cut down tree
an old oak tree in a park across the street from a hotel
wild flowers growing in a forest ground
a mossy fountain and green plants in a botanical garden
mansion with beautiful garden
ants on a dragon fruit flower

+ 0
- 100
assets/texts/VBench/prompts_per_category/scenery.txt View File

@@ -1,100 +0,0 @@
scenery of desert landscape
landscape agriculture farm tractor
burning slash piles in the forest
graveyard at sunset
view of a jack o lantern with pumpkins in a smoky garden
sun view through a spider web
view of the sea from an abandoned building
close up view of a full moon
close up view of lighted candles
close up view of swaying white flowers and leaves
scenery of a relaxing beach
selective focus video of grass during sunny day
aerial view of brown dry landscape
fireworks display in the sky at night
a bonfire near river
mountain view
waterfalls in between mountain
a picturesque view of nature
exotic view of a riverfront city
tall trees in the forest under the clear sky
snow on branches in forest
stream in the nature
an airplane flying above the sea of clouds
scenic video of sunset
view of houses with bush fence under a blue and cloudy sky
scenic view from wooden pathway
scenic view of a tropical beach
drone footage of waves crashing on beach shore
a scenic view of the golden hour at norway
time lapse video of foggy mountain forest
brown mountain during fall season
video of ocean during daytime
boat sailing in the ocean
top view of yachts
beautiful scenery of flowing waterfalls and river
wild ducks paddling on the lake surface
a relaxing scenery of beach view under cloudy sky
natural rock formations on beach under cloudy sky
a palm tree against blue sky
video of sailboat on a lake during sunset
aerial view of snow piles
time lapse of a sunset sky in the countryside
aerial footage of a statue
time lapse video of a farm during sunset
clouds formation in the sky at sunset
aerial shot of a village
drone shot of a beautiful sunrise at the mountains
time lapse video of foggy morning during sunrise
sun shining between tree leaves at sunrise
video of lake during dawn
vehicles traveling on roadway under cloudy sky
view of golden domed church
a monument under the blue sky
firecrackers in the sky
view of fruit signage in the farm
a dark clouds over shadowing the full moon
view of the amazon river
a big river swamp in a dense forest
a blooming cherry blossom tree under a blue sky with white clouds
a river waterfall cascading down the plunge basin
flooded landscape with palm trees
a blurry waterfall background
waterfall in the mountains
aerial footage of a city at night
pond by small waterfall in forest
aerial view of farmlands at the bay of lake
rice terraces in the countryside
a highway built across an agricultural area in the countryside
gloomy morning in the countryside
drone shot of an abandoned coliseum on a snowy mountain top
boat sailing in the middle of ocean
drone shot of the grass field
natural landscape of mountain and sea with islets developed into a community
aerial view of zaporizhia in ukraine
aerial footage of a herd
an aerial footage of a red sky
grass and plants growing in the remains of an abandoned house
view from hill on city
aerial view on orthodox church
aerial view of bay in croatia
a footage of a frozen river
overlooking view of a city at daylight
view outside the cemetery
clear sky with moon over meadow
clouds over railway
aerial footage of moving vehicles on the road at night
aerial view of town and park
top view of skyscrapers
top view of the empire state building in manhattan
top view of the central park in new york city
sheep running in a grass field
clear sky over factory
smoke and fire in birds eye view
view of a pathway with snow melting on its side
ferry under bridge on river near city in malaysia
mountain slopes covered in green vegetation
panoramic view of a town surrounded by snow covered mountains
aerial view of a palace
top view of vehicles driving on the intersection
a graveyard by a church in a mountain landscape

+ 0
- 100
assets/texts/VBench/prompts_per_category/vehicles.txt View File

@@ -1,100 +0,0 @@
a modern railway station in malaysia use for public transportation
drone footage of amsterdam metro station
train arriving at a station
red vehicle driving on field
close up view of flashing emergency vehicle lighting
vehicle with fertilizer on field
a highway built across an agricultural area in the countryside
drone footage of motorcycles driving on country road between agricultural fields
a road in the woods under fog
footage of a car driving through a wheat field
vehicle stops for an ambulance passing through city traffic
emergency vehicle parked outside the casino
zombies attacking a woman and a boy inside a car
woman seating inside the car while chewing
video of passengers riding a double decker bus during night
traffic in london street at night
elderly couple checking engine of automobile
a green vintage automobile with an open hood parked in a parking area
close up of a prototype automobile with exposed engine on the back seat of the car
aerial view of road in forest
train departing from station
aerial view of a train passing by a bridge
video of a train tracks
video footage of a subway
video of blinking traffic lights
couple walking out on the subway
time lapse of a subway tunnel
monitor board inside the subway
metro train at night
zoom in video of a tram passing by city
young man using laptop in the tram
man reading a book at bus stop
close up shot of a moving taxi
night travel in london street on a public bus
red bus in a rainy city
flow of traffic in the city
close up shot of a yellow taxi turning left
two women calling for a taxi
drone view of an illuminated bridge across a river
policeman in police car talking on radio
airplane taking off at night
view through window in airplane
an airplane in the sky
helicopter landing on the street
a pilot getting out of a helicopter
a helicopter flying under blue sky
boat sailing in the middle of the ocean
girl playing with a toy boat
silhouette of a boat on sea during golden hour
a boat travelling around the lake
road on mountain ridge
ship sailing on danube river
slow motion video of a ship water trail in the sea
drone footage of a wreck ship on shore
a white yacht traveling on a river and passing under the bridge
female teenagers drinking champagne in the yacht
video of yacht sailing in the ocean
red combine harvester on road on field
a woman sitting on a bicycle while using a mobile phone
a woman sitting on a motorcycle looking around
three teenagers fixing a bicycle
a woman in a halloween costume posing on a motorcycle
a parked motorcycle on a foggy roadside
cable car near sea shore
a truck travelling in the road
footage of the road without any traffic
a road sign
love padlocks on a bridge
camera moving at highway construction site
vehicles driving on highway
a motorbike on highway at timelapse mode
point of view of a car driving through a tunnel
time lapse of heavy traffic on an avenue
ferry boat on city canal
black vintage car in museum
a zigzag road across a forest
people crossing the road
video of a kayak boat in a river
a person paddling a wooden boat in a lake
a car charging in the parking area
cars parked on the road
footage of the street with people and vehicle passing by in the rain
traffic on busy city street
a woman getting out of the car to walk with their dog
yacht sailing through the ocean
people in queue to military ship
man wearing motorcycle helmet looking at the camera
empty seats in the bus
empty boat on the water
cargo train traveling on the mountainside
cruise ship in harbor
counting down at traffic lights
pressing the car ignition
fire truck driving on the road
a footage of a broken bicycle
drone footage of an ambulance on the road
slow motion footage of a racing car
ship sailing on sea against sunset
big cargo ship passing on the shore
back view of man and woman walking on unpaved road

+ 0
- 90
assets/texts/VBench/prompts_per_dimension/appearance_style.txt View File

@@ -1,90 +0,0 @@
A beautiful coastal beach in spring, waves lapping on sand, Van Gogh style
A beautiful coastal beach in spring, waves lapping on sand, oil painting
A beautiful coastal beach in spring, waves lapping on sand by Hokusai, in the style of Ukiyo
A beautiful coastal beach in spring, waves lapping on sand, black and white
A beautiful coastal beach in spring, waves lapping on sand, pixel art
A beautiful coastal beach in spring, waves lapping on sand, in cyberpunk style
A beautiful coastal beach in spring, waves lapping on sand, animated style
A beautiful coastal beach in spring, waves lapping on sand, watercolor painting
A beautiful coastal beach in spring, waves lapping on sand, surrealism style
The bund Shanghai, Van Gogh style
The bund Shanghai, oil painting
The bund Shanghai by Hokusai, in the style of Ukiyo
The bund Shanghai, black and white
The bund Shanghai, pixel art
The bund Shanghai, in cyberpunk style
The bund Shanghai, animated style
The bund Shanghai, watercolor painting
The bund Shanghai, surrealism style
a shark is swimming in the ocean, Van Gogh style
a shark is swimming in the ocean, oil painting
a shark is swimming in the ocean by Hokusai, in the style of Ukiyo
a shark is swimming in the ocean, black and white
a shark is swimming in the ocean, pixel art
a shark is swimming in the ocean, in cyberpunk style
a shark is swimming in the ocean, animated style
a shark is swimming in the ocean, watercolor painting
a shark is swimming in the ocean, surrealism style
A panda drinking coffee in a cafe in Paris, Van Gogh style
A panda drinking coffee in a cafe in Paris, oil painting
A panda drinking coffee in a cafe in Paris by Hokusai, in the style of Ukiyo
A panda drinking coffee in a cafe in Paris, black and white
A panda drinking coffee in a cafe in Paris, pixel art
A panda drinking coffee in a cafe in Paris, in cyberpunk style
A panda drinking coffee in a cafe in Paris, animated style
A panda drinking coffee in a cafe in Paris, watercolor painting
A panda drinking coffee in a cafe in Paris, surrealism style
A cute happy Corgi playing in park, sunset, Van Gogh style
A cute happy Corgi playing in park, sunset, oil painting
A cute happy Corgi playing in park, sunset by Hokusai, in the style of Ukiyo
A cute happy Corgi playing in park, sunset, black and white
A cute happy Corgi playing in park, sunset, pixel art
A cute happy Corgi playing in park, sunset, in cyberpunk style
A cute happy Corgi playing in park, sunset, animated style
A cute happy Corgi playing in park, sunset, watercolor painting
A cute happy Corgi playing in park, sunset, surrealism style
Gwen Stacy reading a book, Van Gogh style
Gwen Stacy reading a book, oil painting
Gwen Stacy reading a book by Hokusai, in the style of Ukiyo
Gwen Stacy reading a book, black and white
Gwen Stacy reading a book, pixel art
Gwen Stacy reading a book, in cyberpunk style
Gwen Stacy reading a book, animated style
Gwen Stacy reading a book, watercolor painting
Gwen Stacy reading a book, surrealism style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, Van Gogh style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, oil painting
A boat sailing leisurely along the Seine River with the Eiffel Tower in background by Hokusai, in the style of Ukiyo
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, black and white
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pixel art
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, in cyberpunk style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, animated style
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, watercolor painting
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, surrealism style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, Van Gogh style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, oil painting
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas by Hokusai, in the style of Ukiyo
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, black and white
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, pixel art
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, in cyberpunk style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, animated style
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, watercolor painting
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, surrealism style
An astronaut flying in space, Van Gogh style
An astronaut flying in space, oil painting
An astronaut flying in space by Hokusai, in the style of Ukiyo
An astronaut flying in space, black and white
An astronaut flying in space, pixel art
An astronaut flying in space, in cyberpunk style
An astronaut flying in space, animated style
An astronaut flying in space, watercolor painting
An astronaut flying in space, surrealism style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, Van Gogh style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, oil painting
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks by Hokusai, in the style of Ukiyo
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, black and white
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, pixel art
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, in cyberpunk style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, animated style
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, watercolor painting
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, surrealism style

+ 0
- 85
assets/texts/VBench/prompts_per_dimension/color.txt View File

@@ -1,85 +0,0 @@
a red bicycle
a green bicycle
a blue bicycle
a yellow bicycle
an orange bicycle
a purple bicycle
a pink bicycle
a black bicycle
a white bicycle
a red car
a green car
a blue car
a yellow car
an orange car
a purple car
a pink car
a black car
a white car
a red bird
a green bird
a blue bird
a yellow bird
an orange bird
a purple bird
a pink bird
a black bird
a white bird
a black cat
a white cat
an orange cat
a yellow cat
a red umbrella
a green umbrella
a blue umbrella
a yellow umbrella
an orange umbrella
a purple umbrella
a pink umbrella
a black umbrella
a white umbrella
a red suitcase
a green suitcase
a blue suitcase
a yellow suitcase
an orange suitcase
a purple suitcase
a pink suitcase
a black suitcase
a white suitcase
a red bowl
a green bowl
a blue bowl
a yellow bowl
an orange bowl
a purple bowl
a pink bowl
a black bowl
a white bowl
a red chair
a green chair
a blue chair
a yellow chair
an orange chair
a purple chair
a pink chair
a black chair
a white chair
a red clock
a green clock
a blue clock
a yellow clock
an orange clock
a purple clock
a pink clock
a black clock
a white clock
a red vase
a green vase
a blue vase
a yellow vase
an orange vase
a purple vase
a pink vase
a black vase
a white vase

+ 0
- 100
assets/texts/VBench/prompts_per_dimension/human_action.txt View File

@@ -1,100 +0,0 @@
A person is riding a bike
A person is marching
A person is roller skating
A person is tasting beer
A person is clapping
A person is drawing
A person is petting animal (not cat)
A person is eating watermelon
A person is playing harp
A person is wrestling
A person is riding scooter
A person is sweeping floor
A person is skateboarding
A person is dunking basketball
A person is playing flute
A person is stretching leg
A person is tying tie
A person is skydiving
A person is shooting goal (soccer)
A person is playing piano
A person is finger snapping
A person is canoeing or kayaking
A person is laughing
A person is digging
A person is clay pottery making
A person is shooting basketball
A person is bending back
A person is shaking hands
A person is bandaging
A person is push up
A person is catching or throwing frisbee
A person is playing trumpet
A person is flying kite
A person is filling eyebrows
A person is shuffling cards
A person is folding clothes
A person is smoking
A person is tai chi
A person is squat
A person is playing controller
A person is throwing axe
A person is giving or receiving award
A person is air drumming
A person is taking a shower
A person is planting trees
A person is sharpening knives
A person is robot dancing
A person is rock climbing
A person is hula hooping
A person is writing
A person is bungee jumping
A person is pushing cart
A person is cleaning windows
A person is cutting watermelon
A person is cheerleading
A person is washing hands
A person is ironing
A person is cutting nails
A person is hugging
A person is trimming or shaving beard
A person is jogging
A person is making bed
A person is washing dishes
A person is grooming dog
A person is doing laundry
A person is knitting
A person is reading book
A person is baby waking up
A person is massaging legs
A person is brushing teeth
A person is crawling baby
A person is motorcycling
A person is driving car
A person is sticking tongue out
A person is shaking head
A person is sword fighting
A person is doing aerobics
A person is strumming guitar
A person is riding or walking with horse
A person is archery
A person is catching or throwing baseball
A person is playing chess
A person is rock scissors paper
A person is using computer
A person is arranging flowers
A person is bending metal
A person is ice skating
A person is climbing a rope
A person is crying
A person is dancing ballet
A person is getting a haircut
A person is running on treadmill
A person is kissing
A person is counting money
A person is barbequing
A person is peeling apples
A person is milking cow
A person is shining shoes
A person is making snowman
A person is sailing

+ 0
- 82
assets/texts/VBench/prompts_per_dimension/multiple_objects.txt View File

@@ -1,82 +0,0 @@
a bird and a cat
a cat and a dog
a dog and a horse
a horse and a sheep
a sheep and a cow
a cow and an elephant
an elephant and a bear
a bear and a zebra
a zebra and a giraffe
a giraffe and a bird
a chair and a couch
a couch and a potted plant
a potted plant and a tv
a tv and a laptop
a laptop and a remote
a remote and a keyboard
a keyboard and a cell phone
a cell phone and a book
a book and a clock
a clock and a backpack
a backpack and an umbrella
an umbrella and a handbag
a handbag and a tie
a tie and a suitcase
a suitcase and a vase
a vase and scissors
scissors and a teddy bear
a teddy bear and a frisbee
a frisbee and skis
skis and a snowboard
a snowboard and a sports ball
a sports ball and a kite
a kite and a baseball bat
a baseball bat and a baseball glove
a baseball glove and a skateboard
a skateboard and a surfboard
a surfboard and a tennis racket
a tennis racket and a bottle
a bottle and a chair
an airplane and a train
a train and a boat
a boat and an airplane
a bicycle and a car
a car and a motorcycle
a motorcycle and a bus
a bus and a traffic light
a traffic light and a fire hydrant
a fire hydrant and a stop sign
a stop sign and a parking meter
a parking meter and a truck
a truck and a bicycle
a toilet and a hair drier
a hair drier and a toothbrush
a toothbrush and a sink
a sink and a toilet
a wine glass and a chair
a cup and a couch
a fork and a potted plant
a knife and a tv
a spoon and a laptop
a bowl and a remote
a banana and a keyboard
an apple and a cell phone
a sandwich and a book
an orange and a clock
broccoli and a backpack
a carrot and an umbrella
a hot dog and a handbag
a pizza and a tie
a donut and a suitcase
a cake and a vase
an oven and scissors
a toaster and a teddy bear
a microwave and a frisbee
a refrigerator and skis
a bicycle and an airplane
a car and a train
a motorcycle and a boat
a person and a toilet
a person and a hair drier
a person and a toothbrush
a person and a sink

+ 0
- 79
assets/texts/VBench/prompts_per_dimension/object_class.txt View File

@@ -1,79 +0,0 @@
a person
a bicycle
a car
a motorcycle
an airplane
a bus
a train
a truck
a boat
a traffic light
a fire hydrant
a stop sign
a parking meter
a bench
a bird
a cat
a dog
a horse
a sheep
a cow
an elephant
a bear
a zebra
a giraffe
a backpack
an umbrella
a handbag
a tie
a suitcase
a frisbee
skis
a snowboard
a sports ball
a kite
a baseball bat
a baseball glove
a skateboard
a surfboard
a tennis racket
a bottle
a wine glass
a cup
a fork
a knife
a spoon
a bowl
a banana
an apple
a sandwich
an orange
broccoli
a carrot
a hot dog
a pizza
a donut
a cake
a chair
a couch
a potted plant
a bed
a dining table
a toilet
a tv
a laptop
a remote
a keyboard
a cell phone
a microwave
an oven
a toaster
a sink
a refrigerator
a book
a clock
a vase
scissors
a teddy bear
a hair drier
a toothbrush

+ 0
- 93
assets/texts/VBench/prompts_per_dimension/overall_consistency.txt View File

@@ -1,93 +0,0 @@
Close up of grapes on a rotating table.
Turtle swimming in ocean.
A storm trooper vacuuming the beach.
A panda standing on a surfboard in the ocean in sunset.
An astronaut feeding ducks on a sunny afternoon, reflection from the water.
Two pandas discussing an academic paper.
Sunset time lapse at the beach with moving clouds and colors in the sky.
A fat rabbit wearing a purple robe walking through a fantasy landscape.
A koala bear playing piano in the forest.
An astronaut flying in space.
Fireworks.
An animated painting of fluffy white clouds moving in sky.
Flying through fantasy landscapes.
A bigfoot walking in the snowstorm.
A squirrel eating a burger.
A cat wearing sunglasses and working as a lifeguard at a pool.
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks.
Splash of turquoise water in extreme slow motion, alpha channel included.
an ice cream is melting on the table.
a drone flying over a snowy forest.
a shark is swimming in the ocean.
Aerial panoramic video from a drone of a fantasy land.
a teddy bear is swimming in the ocean.
time lapse of sunrise on mars.
golden fish swimming in the ocean.
An artist brush painting on a canvas close up.
A drone view of celebration with Christmas tree and fireworks, starry sky - background.
happy dog wearing a yellow turtleneck, studio, portrait, facing camera, dark background
Origami dancers in white paper, 3D render, on white background, studio shot, dancing modern dance.
Campfire at night in a snowy forest with starry sky in the background.
a fantasy landscape
A 3D model of a 1800s victorian house.
this is how I do makeup in the morning.
A raccoon that looks like a turtle, digital art.
Robot dancing in Times Square.
Busy freeway at night.
Balloon full of water exploding in extreme slow motion.
An astronaut is riding a horse in the space in a photorealistic style.
Macro slo-mo. Slow motion cropped closeup of roasted coffee beans falling into an empty bowl.
Sewing machine, old sewing machine working.
Motion colour drop in water, ink swirling in water, colourful ink in water, abstraction fancy dream cloud of ink.
Few big purple plums rotating on the turntable. water drops appear on the skin during rotation. isolated on the white background. close-up. macro.
Vampire makeup face of beautiful girl, red contact lenses.
Ashtray full of butts on table, smoke flowing on black background, close-up
Pacific coast, carmel by the sea ocean and waves.
A teddy bear is playing drum kit in NYC Times Square.
A corgi is playing drum kit.
An Iron man is playing the electronic guitar, high electronic guitar.
A raccoon is playing the electronic guitar.
A boat sailing leisurely along the Seine River with the Eiffel Tower in background by Vincent van Gogh
A corgi's head depicted as an explosion of a nebula
A fantasy landscape
A future where humans have achieved teleportation technology
A jellyfish floating through the ocean, with bioluminescent tentacles
A Mars rover moving on Mars
A panda drinking coffee in a cafe in Paris
A space shuttle launching into orbit, with flames and smoke billowing out from the engines
A steam train moving on a mountainside
A super cool giant robot in Cyberpunk Beijing
A tropical beach at sunrise, with palm trees and crystal-clear water in the foreground
Cinematic shot of Van Gogh's selfie, Van Gogh style
Gwen Stacy reading a book
Iron Man flying in the sky
The bund Shanghai, oil painting
Yoda playing guitar on the stage
A beautiful coastal beach in spring, waves lapping on sand by Hokusai, in the style of Ukiyo
A beautiful coastal beach in spring, waves lapping on sand by Vincent van Gogh
A boat sailing leisurely along the Seine River with the Eiffel Tower in background
A car moving slowly on an empty street, rainy evening
A cat eating food out of a bowl
A cat wearing sunglasses at a pool
A confused panda in calculus class
A cute fluffy panda eating Chinese food in a restaurant
A cute happy Corgi playing in park, sunset
A cute raccoon playing guitar in a boat on the ocean
A happy fuzzy panda playing guitar nearby a campfire, snow mountain in the background
A lightning striking atop of eiffel tower, dark clouds in the sky
A modern art museum, with colorful paintings
A panda cooking in the kitchen
A panda playing on a swing set
A polar bear is playing guitar
A raccoon dressed in suit playing the trumpet, stage background
A robot DJ is playing the turntable, in heavy raining futuristic tokyo rooftop cyberpunk night, sci-fi, fantasy
A shark swimming in clear Caribbean ocean
A super robot protecting city
A teddy bear washing the dishes
An epic tornado attacking above a glowing city at night, the tornado is made of smoke
An oil painting of a couple in formal evening wear going home get caught in a heavy downpour with umbrellas
Clown fish swimming through the coral reef
Hyper-realistic spaceship landing on Mars
The bund Shanghai, vibrant color
Vincent van Gogh is painting in the room
Yellow flowers swing in the wind

+ 0
- 86
assets/texts/VBench/prompts_per_dimension/scene.txt View File

@@ -1,86 +0,0 @@
alley
amusement park
aquarium
arch
art gallery
bathroom
bakery shop
ballroom
bar
barn
basement
beach
bedroom
bridge
botanical garden
cafeteria
campsite
campus
carrousel
castle
cemetery
classroom
cliff
crosswalk
construction site
corridor
courtyard
desert
downtown
driveway
farm
food court
football field
forest road
fountain
gas station
glacier
golf course
indoor gymnasium
harbor
highway
hospital
house
iceberg
industrial area
jail cell
junkyard
kitchen
indoor library
lighthouse
laboratory
mansion
marsh
mountain
indoor movie theater
indoor museum
music studio
nursery
ocean
office
palace
parking lot
pharmacy
phone booth
raceway
restaurant
river
science museum
shower
ski slope
sky
skyscraper
baseball stadium
staircase
street
supermarket
indoor swimming pool
tower
outdoor track
train railway
train station platform
underwater coral reef
valley
volcano
waterfall
windmill

+ 0
- 84
assets/texts/VBench/prompts_per_dimension/spatial_relationship.txt View File

@@ -1,84 +0,0 @@
a bicycle on the left of a car, front view
a car on the right of a motorcycle, front view
a motorcycle on the left of a bus, front view
a bus on the right of a traffic light, front view
a traffic light on the left of a fire hydrant, front view
a fire hydrant on the right of a stop sign, front view
a stop sign on the left of a parking meter, front view
a parking meter on the right of a bench, front view
a bench on the left of a truck, front view
a truck on the right of a bicycle, front view
a bird on the left of a cat, front view
a cat on the right of a dog, front view
a dog on the left of a horse, front view
a horse on the right of a sheep, front view
a sheep on the left of a cow, front view
a cow on the right of an elephant, front view
an elephant on the left of a bear, front view
a bear on the right of a zebra, front view
a zebra on the left of a giraffe, front view
a giraffe on the right of a bird, front view
a bottle on the left of a wine glass, front view
a wine glass on the right of a cup, front view
a cup on the left of a fork, front view
a fork on the right of a knife, front view
a knife on the left of a spoon, front view
a spoon on the right of a bowl, front view
a bowl on the left of a bottle, front view
a potted plant on the left of a remote, front view
a remote on the right of a clock, front view
a clock on the left of a vase, front view
a vase on the right of scissors, front view
scissors on the left of a teddy bear, front view
a teddy bear on the right of a potted plant, front view
a frisbee on the left of a sports ball, front view
a sports ball on the right of a baseball bat, front view
a baseball bat on the left of a baseball glove, front view
a baseball glove on the right of a tennis racket, front view
a tennis racket on the left of a frisbee, front view
a toilet on the left of a hair drier, front view
a hair drier on the right of a toothbrush, front view
a toothbrush on the left of a sink, front view
a sink on the right of a toilet, front view
a chair on the left of a couch, front view
a couch on the right of a bed, front view
a bed on the left of a tv, front view
a tv on the right of a dining table, front view
a dining table on the left of a chair, front view
an airplane on the left of a train, front view
a train on the right of a boat, front view
a boat on the left of an airplane, front view
an oven on the top of a toaster, front view
an oven on the bottom of a toaster, front view
a toaster on the top of a microwave, front view
a toaster on the bottom of a microwave, front view
a microwave on the top of an oven, front view
a microwave on the bottom of an oven, front view
a banana on the top of an apple, front view
a banana on the bottom of an apple, front view
an apple on the top of a sandwich, front view
an apple on the bottom of a sandwich, front view
a sandwich on the top of an orange, front view
a sandwich on the bottom of an orange, front view
an orange on the top of a carrot, front view
an orange on the bottom of a carrot, front view
a carrot on the top of a hot dog, front view
a carrot on the bottom of a hot dog, front view
a hot dog on the top of a pizza, front view
a hot dog on the bottom of a pizza, front view
a pizza on the top of a donut, front view
a pizza on the bottom of a donut, front view
a donut on the top of broccoli, front view
a donut on the bottom of broccoli, front view
broccoli on the top of a banana, front view
broccoli on the bottom of a banana, front view
skis on the top of a snowboard, front view
skis on the bottom of a snowboard, front view
a snowboard on the top of a kite, front view
a snowboard on the bottom of a kite, front view
a kite on the top of a skateboard, front view
a kite on the bottom of a skateboard, front view
a skateboard on the top of a surfboard, front view
a skateboard on the bottom of a surfboard, front view
a surfboard on the top of skis, front view
a surfboard on the bottom of skis, front view

+ 0
- 72
assets/texts/VBench/prompts_per_dimension/subject_consistency.txt View File

@@ -1,72 +0,0 @@
a person swimming in ocean
a person giving a presentation to a room full of colleagues
a person washing the dishes
a person eating a burger
a person walking in the snowstorm
a person drinking coffee in a cafe
a person playing guitar
a bicycle leaning against a tree
a bicycle gliding through a snowy field
a bicycle slowing down to stop
a bicycle accelerating to gain speed
a car stuck in traffic during rush hour
a car turning a corner
a car slowing down to stop
a car accelerating to gain speed
a motorcycle cruising along a coastal highway
a motorcycle turning a corner
a motorcycle slowing down to stop
a motorcycle gliding through a snowy field
a motorcycle accelerating to gain speed
an airplane soaring through a clear blue sky
an airplane taking off
an airplane landing smoothly on a runway
an airplane accelerating to gain speed
a bus turning a corner
a bus stuck in traffic during rush hour
a bus accelerating to gain speed
a train speeding down the tracks
a train crossing over a tall bridge
a train accelerating to gain speed
a truck turning a corner
a truck anchored in a tranquil bay
a truck stuck in traffic during rush hour
a truck slowing down to stop
a truck accelerating to gain speed
a boat sailing smoothly on a calm lake
a boat slowing down to stop
a boat accelerating to gain speed
a bird soaring gracefully in the sky
a bird building a nest from twigs and leaves
a bird flying over a snowy forest
a cat grooming itself meticulously with its tongue
a cat playing in park
a cat drinking water
a cat running happily
a dog enjoying a peaceful walk
a dog playing in park
a dog drinking water
a dog running happily
a horse bending down to drink water from a river
a horse galloping across an open field
a horse taking a peaceful walk
a horse running to join a herd of its kind
a sheep bending down to drink water from a river
a sheep taking a peaceful walk
a sheep running to join a herd of its kind
a cow bending down to drink water from a river
a cow chewing cud while resting in a tranquil barn
a cow running to join a herd of its kind
an elephant spraying itself with water using its trunk to cool down
an elephant taking a peaceful walk
an elephant running to join a herd of its kind
a bear catching a salmon in its powerful jaws
a bear sniffing the air for scents of food
a bear climbing a tree
a bear hunting for prey
a zebra bending down to drink water from a river
a zebra running to join a herd of its kind
a zebra taking a peaceful walk
a giraffe bending down to drink water from a river
a giraffe taking a peaceful walk
a giraffe running to join a herd of its kind

+ 0
- 75
assets/texts/VBench/prompts_per_dimension/temporal_flickering.txt View File

@@ -1,75 +0,0 @@
In a still frame, a stop sign
a toilet, frozen in time
a laptop, frozen in time
A tranquil tableau of alley
A tranquil tableau of bar
A tranquil tableau of barn
A tranquil tableau of bathroom
A tranquil tableau of bedroom
A tranquil tableau of cliff
In a still frame, courtyard
In a still frame, gas station
A tranquil tableau of house
indoor gymnasium, frozen in time
A tranquil tableau of indoor library
A tranquil tableau of kitchen
A tranquil tableau of palace
In a still frame, parking lot
In a still frame, phone booth
A tranquil tableau of restaurant
A tranquil tableau of tower
A tranquil tableau of a bowl
A tranquil tableau of an apple
A tranquil tableau of a bench
A tranquil tableau of a bed
A tranquil tableau of a chair
A tranquil tableau of a cup
A tranquil tableau of a dining table
In a still frame, a pear
A tranquil tableau of a bunch of grapes
A tranquil tableau of a bowl on the kitchen counter
A tranquil tableau of a beautiful, handcrafted ceramic bowl
A tranquil tableau of an antique bowl
A tranquil tableau of an exquisite mahogany dining table
A tranquil tableau of a wooden bench in the park
A tranquil tableau of a beautiful wrought-iron bench surrounded by blooming flowers
In a still frame, a park bench with a view of the lake
A tranquil tableau of a vintage rocking chair was placed on the porch
A tranquil tableau of the jail cell was small and dimly lit, with cold, steel bars
A tranquil tableau of the phone booth was tucked away in a quiet alley
a dilapidated phone booth stood as a relic of a bygone era on the sidewalk, frozen in time
A tranquil tableau of the old red barn stood weathered and iconic against the backdrop of the countryside
A tranquil tableau of a picturesque barn was painted a warm shade of red and nestled in a picturesque meadow
In a still frame, within the desolate desert, an oasis unfolded, characterized by the stoic presence of palm trees and a motionless, glassy pool of water
In a still frame, the Parthenon's majestic Doric columns stand in serene solitude atop the Acropolis, framed by the tranquil Athenian landscape
In a still frame, the Temple of Hephaestus, with its timeless Doric grace, stands stoically against the backdrop of a quiet Athens
In a still frame, the ornate Victorian streetlamp stands solemnly, adorned with intricate ironwork and stained glass panels
A tranquil tableau of the Stonehenge presented itself as an enigmatic puzzle, each colossal stone meticulously placed against the backdrop of tranquility
In a still frame, in the vast desert, an oasis nestled among dunes, featuring tall palm trees and an air of serenity
static view on a desert scene with an oasis, palm trees, and a clear, calm pool of water
A tranquil tableau of an ornate Victorian streetlamp standing on a cobblestone street corner, illuminating the empty night
A tranquil tableau of a tranquil lakeside cabin nestled among tall pines, its reflection mirrored perfectly in the calm water
In a still frame, a vintage gas lantern, adorned with intricate details, gracing a historic cobblestone square
In a still frame, a tranquil Japanese tea ceremony room, with tatami mats, a delicate tea set, and a bonsai tree in the corner
A tranquil tableau of the Parthenon stands resolute in its classical elegance, a timeless symbol of Athens' cultural legacy
A tranquil tableau of in the heart of Plaka, the neoclassical architecture of the old city harmonizes with the ancient ruins
A tranquil tableau of in the desolate beauty of the American Southwest, Chaco Canyon's ancient ruins whispered tales of an enigmatic civilization that once thrived amidst the arid landscapes
A tranquil tableau of at the edge of the Arabian Desert, the ancient city of Petra beckoned with its enigmatic rock-carved façades
In a still frame, amidst the cobblestone streets, an Art Nouveau lamppost stood tall
A tranquil tableau of in the quaint village square, a traditional wrought-iron streetlamp featured delicate filigree patterns and amber-hued glass panels
A tranquil tableau of the lampposts were adorned with Art Deco motifs, their geometric shapes and frosted glass creating a sense of vintage glamour
In a still frame, in the picturesque square, a Gothic-style lamppost adorned with intricate stone carvings added a touch of medieval charm to the setting
In a still frame, in the heart of the old city, a row of ornate lantern-style streetlamps bathed the narrow alleyway in a warm, welcoming light
A tranquil tableau of in the heart of the Utah desert, a massive sandstone arch spanned the horizon
A tranquil tableau of in the Arizona desert, a massive stone bridge arched across a rugged canyon
A tranquil tableau of in the corner of the minimalist tea room, a bonsai tree added a touch of nature's beauty to the otherwise simple and elegant space
In a still frame, amidst the hushed ambiance of the traditional tea room, a meticulously arranged tea set awaited, with porcelain cups, a bamboo whisk
In a still frame, nestled in the Zen garden, a rustic teahouse featured tatami seating and a traditional charcoal brazier
A tranquil tableau of a country estate's library featured elegant wooden shelves
A tranquil tableau of beneath the shade of a solitary oak tree, an old wooden park bench sat patiently
A tranquil tableau of beside a tranquil pond, a weeping willow tree draped its branches gracefully over the water's surface, creating a serene tableau of reflection and calm
A tranquil tableau of in the Zen garden, a perfectly raked gravel path led to a serene rock garden
In a still frame, a tranquil pond was fringed by weeping cherry trees, their blossoms drifting lazily onto the glassy surface
In a still frame, within the historic library's reading room, rows of antique leather chairs and mahogany tables offered a serene haven for literary contemplation
A tranquil tableau of a peaceful orchid garden showcased a variety of delicate blooms
A tranquil tableau of in the serene courtyard, a centuries-old stone well stood as a symbol of a bygone era, its mossy stones bearing witness to the passage of time

+ 0
- 100
assets/texts/VBench/prompts_per_dimension/temporal_style.txt View File

@@ -1,100 +0,0 @@
A beautiful coastal beach in spring, waves lapping on sand, in super slow motion
A beautiful coastal beach in spring, waves lapping on sand, zoom in
A beautiful coastal beach in spring, waves lapping on sand, zoom out
A beautiful coastal beach in spring, waves lapping on sand, pan left
A beautiful coastal beach in spring, waves lapping on sand, pan right
A beautiful coastal beach in spring, waves lapping on sand, tilt up
A beautiful coastal beach in spring, waves lapping on sand, tilt down
A beautiful coastal beach in spring, waves lapping on sand, with an intense shaking effect
A beautiful coastal beach in spring, waves lapping on sand, featuring a steady and smooth perspective
A beautiful coastal beach in spring, waves lapping on sand, racking focus
The bund Shanghai, in super slow motion
The bund Shanghai, zoom in
The bund Shanghai, zoom out
The bund Shanghai, pan left
The bund Shanghai, pan right
The bund Shanghai, tilt up
The bund Shanghai, tilt down
The bund Shanghai, with an intense shaking effect
The bund Shanghai, featuring a steady and smooth perspective
The bund Shanghai, racking focus
a shark is swimming in the ocean, in super slow motion
a shark is swimming in the ocean, zoom in
a shark is swimming in the ocean, zoom out
a shark is swimming in the ocean, pan left
a shark is swimming in the ocean, pan right
a shark is swimming in the ocean, tilt up
a shark is swimming in the ocean, tilt down
a shark is swimming in the ocean, with an intense shaking effect
a shark is swimming in the ocean, featuring a steady and smooth perspective
a shark is swimming in the ocean, racking focus
A panda drinking coffee in a cafe in Paris, in super slow motion
A panda drinking coffee in a cafe in Paris, zoom in
A panda drinking coffee in a cafe in Paris, zoom out
A panda drinking coffee in a cafe in Paris, pan left
A panda drinking coffee in a cafe in Paris, pan right
A panda drinking coffee in a cafe in Paris, tilt up
A panda drinking coffee in a cafe in Paris, tilt down
A panda drinking coffee in a cafe in Paris, with an intense shaking effect
A panda drinking coffee in a cafe in Paris, featuring a steady and smooth perspective
A panda drinking coffee in a cafe in Paris, racking focus
A cute happy Corgi playing in park, sunset, in super slow motion
A cute happy Corgi playing in park, sunset, zoom in
A cute happy Corgi playing in park, sunset, zoom out
A cute happy Corgi playing in park, sunset, pan left
A cute happy Corgi playing in park, sunset, pan right
A cute happy Corgi playing in park, sunset, tilt up
A cute happy Corgi playing in park, sunset, tilt down
A cute happy Corgi playing in park, sunset, with an intense shaking effect
A cute happy Corgi playing in park, sunset, featuring a steady and smooth perspective
A cute happy Corgi playing in park, sunset, racking focus
Gwen Stacy reading a book, in super slow motion
Gwen Stacy reading a book, zoom in
Gwen Stacy reading a book, zoom out
Gwen Stacy reading a book, pan left
Gwen Stacy reading a book, pan right
Gwen Stacy reading a book, tilt up
Gwen Stacy reading a book, tilt down
Gwen Stacy reading a book, with an intense shaking effect
Gwen Stacy reading a book, featuring a steady and smooth perspective
Gwen Stacy reading a book, racking focus
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, in super slow motion
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, zoom in
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, zoom out
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pan left
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pan right
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, tilt up
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, tilt down
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, with an intense shaking effect
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, featuring a steady and smooth perspective
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, racking focus
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, in super slow motion
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, zoom in
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, zoom out
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, pan left
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, pan right
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, tilt up
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, tilt down
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, with an intense shaking effect
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, featuring a steady and smooth perspective
A couple in formal evening wear going home get caught in a heavy downpour with umbrellas, racking focus
An astronaut flying in space, in super slow motion
An astronaut flying in space, zoom in
An astronaut flying in space, zoom out
An astronaut flying in space, pan left
An astronaut flying in space, pan right
An astronaut flying in space, tilt up
An astronaut flying in space, tilt down
An astronaut flying in space, with an intense shaking effect
An astronaut flying in space, featuring a steady and smooth perspective
An astronaut flying in space, racking focus
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, in super slow motion
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, zoom in
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, zoom out
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, pan left
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, pan right
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, tilt up
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, tilt down
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, with an intense shaking effect
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, featuring a steady and smooth perspective
Snow rocky mountains peaks canyon. snow blanketed rocky mountains surround and shadow deep canyons. the canyons twist and bend through the high elevated mountain peaks, racking focus

+ 0
- 5
assets/texts/celebrities_interaction.txt View File

@@ -1,5 +0,0 @@
Donald Trump and Kamala Harris are hugging each other and turning in rounds.
Donald Trump and Kamala Harris are fighting each other.
Kamala Harris slapped Donald Trump.
Donald Trump is kissing a random girl.
Donald Trump and Kamala Harris are kissing.

+ 9
- 0
assets/texts/example.csv View File

@@ -0,0 +1,9 @@
text
"Imagine a cyberpunk close-up shot capturing the upper body of a character with an melancholic demeanor. The subject is gesturing with one hand while shaking the head, showcasing natural body language. The background features a vibrant carnival, complementing the character's pose. The lighting is dim and moody, emphasizing the contours of their face and upper body. The camera subtly pans or zooms, drawing attention to the harmony between expression, posture, and setting."
"A sleek red sports car speeds through a winding mountain road, its engine roaring against the backdrop of towering snow-capped peaks. The sunlight glints off the polished surface, creating dazzling reflections. The camera pans to capture the lush greenery surrounding the road. The atmosphere is exhilarating, with a cinematic style emphasizing speed and adventure. The lighting is golden, suggesting early morning or late afternoon."
"A group of fluffy baby chicks huddle together under a heat lamp in a rustic barn. Their soft peeping fills the air as they nudge each other for warmth. The wooden floor beneath them is strewn with straw, and the gentle light creates a cozy, heartwarming atmosphere. The video captures their tiny, detailed movements in a close-up, realistic style."
"A black-and-white film of a pianist playing in an empty theater. His fingers move deftly across the keys, the music echoing in the large, empty hall. Dust motes float in the air, caught in the faint light streaming through the high windows. The grand piano gleams under the spotlight, contrasting with the decaying seats and peeling walls. The atmosphere is haunting and nostalgic."
"A wave of glowing steam crashes into a stone wall, the vapor hissing and swirling as it dissipates."
"A tomato surfing on a piece of lettuce down a waterfall of ranch dressing, with exaggerated surfing moves and creamy wave effects to highlight the 3D animated fun."
"A cheerful panda on a bustling city street, casually playing a violin while sitting on a bench. People passing by stop to enjoy the impromptu performance, and a group of children dance around, clapping their hands to the upbeat tempo. The panda’s paws move swiftly, creating a lively tune that brings a sense of joy and energy to the urban scene."
"A shimmering, crystalline city built into the side of a massive mountain on a distant planet. Waterfalls of liquid light cascade down the cliffs, with hovering bridges connecting the structures. The entire city glows as it absorbs energy from the planet’s core."

+ 2
- 0
assets/texts/i2v.csv View File

@@ -0,0 +1,2 @@
text,ref
"A plump pig wallows in a muddy pond on a rustic farm, its pink snout poking out as it snorts contentedly. The camera captures the pig's playful splashes, sending ripples through the water under the midday sun. Wooden fences and a red barn stand in the background, framed by rolling green hills. The pig's muddy coat glistens in the sunlight, showcasing the simple pleasures of its carefree life.",assets/texts/i2v.png

BIN
assets/texts/i2v.png View File

Before After
Width: 1024  |  Height: 576  |  Size: 746 KiB

+ 0
- 14
assets/texts/i2v/celebrities_interaction.txt View File

@@ -1,14 +0,0 @@
Harris and Trump shakes hands.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_1.jpeg"}
The female and male politician shakes hands.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_1.jpeg"}
Harris and Trump shakes hands.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_2.jpeg"}
The female and male politician shakes hands.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_2.jpeg"}
Harris and Trump shakes hands.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_3.jpeg"}
The female and male politician shakes hands.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_3.jpeg"}
As Harris and Trump shakes hands, they break into hugging each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_1.jpeg"}
As the female and male politician shakes hands, they break into hugging each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_1.jpeg"}
As Harris and Trump shakes hands, they break into hugging each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_2.jpeg"}
As the female and male politician shakes hands, they break into hugging each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_2.jpeg"}
As Harris and Trump shakes hands, they break into hugging each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_3.jpeg"}
As the female and male politician shakes hands, they break into hugging each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_shake_3.jpeg"}
As the woman speaks, the man from the back steps in front and hugs her.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_speech_1.jpeg"}
As the man on the left and woman on the left speak, they suddenly hug each other.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/harris_trump_speech_2.jpeg"}

+ 0
- 10
assets/texts/i2v/multi_human.txt View File

@@ -1,10 +0,0 @@
In a modern kitchen, two chefs are cutting onions on red plastic cutting boards placed on metallic work benches. Both chefs wear black working attire, black caps, and black gloves, meticulously chopping the onions into small pieces. Between them are large bowls filled with boiled, de-shelled eggs. In the background, neatly stacked raw eggs can be seen. A man, likely the supervisor, stands facing away, looking out an open-glass door while holding papers. The scene captures a moment of focused kitchen activity without transitions.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/chef_cut_onion.png"}
Three guys are standing on a side road in front of a Chinese rural village, engaging in an informal interview. The guy on the left, wearing a white T-shirt and blue jeans, is facing the other two, gesturing with his hands as he talks. The middle guy, dressed in a black polo shirt, is nodding and listening attentively. The rightmost guy, in a blue polo shirt, occasionally glances at the camera. Behind them is a large gray stone stacked on a red square pillar, resembling a landmark, with shrubs further in the background. The video focuses on the interaction among the three characters.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/rural_interview.png"}
The video captures a scene in a spacious, modern indoor sheep farm. A group of people stands in the middle passage, observing a black-and-white lamb. Fences dividing sheep into different areas are visible on the left side. The central figure is holding the lamb, with a man on the right trying to pat it, and another man on the left looking at it. A woman in a red-and-white dress is smiling at the lamb from a sideways angle, while another man's partial figure can be seen in the distance. All the men are wearing white long coats, and the lamb is kicking its hind legs while being held firmly by the central figure.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/sheep_farm.png"}
Two singers are walking slowly and singing on a stage during a performance. The male singer, wearing a beige scarf and white top, holds a microphone and sings attentively while holding the female singer's hand. The female singer, dressed in a white dress with long brown curly hair, smiles glamorously. The stage background features steel frames and is otherwise unclear, with an orange light in the background. The video is shot from the front at a slightly low angle, capturing both singers as they face slightly towards the left. The background is dark, but the spotlight illuminates the singers, making their faces and expressions clearly visible.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/couple_singers.png"}
A distinguished musical performance is in progress at a classical concert. In the center, a man in a formal navy coat, white shirt, and cravat holds a violin and addresses the audience. Behind him, a full orchestra of seated musicians prepares with their string and percussion instruments, some smiling and looking towards the central figure. The setting is elegant, with decorative music stands featuring intricate gold designs, indicating a prestigious event. The atmosphere exudes grandeur and professionalism, characteristic of a high-caliber classical music performance. The scene captures a moment of anticipation and readiness before the music begins.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/pre_ochestra.png"}
A dynamic scene at a musical concert featuring two women performing on stage. The woman on the left, with blonde wavy hair, is passionately singing into a golden microphone. She is dressed in a sparkly, embellished yellow outfit with dark glittering details, her eyes partially closed, and her hand raised expressively. The woman on the right, shown in profile, is wearing a sleeveless, glittering dark dress. Her long, wavy hair flows over her shoulders, and she appears composed, possibly preparing for her part or listening attentively. The background, slightly blurred, reveals musicians or orchestra members, enhancing the live performance atmosphere with focused and intense energy.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/dual_female_singers.png"}
A lively moment on a talk show set with the focus on a guest and the host. On the left, the guest, a woman in an elegant black outfit with sparkling details, is gesturing expressively with both hands as she passionately shares a story. Her hair is tied back, and she wears a necklace, adding to her stylish appearance. On the right, the host, in a suit and tie, sits behind a wooden desk, leaning slightly forward with a smile, appearing attentive and engaged. The backdrop showcases a nighttime city skyline, typical of late-night talk shows. The scene captures a warm interaction, highlighting the positive and relaxed mood of their conversation.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/talk_show.png"}
A vibrant scene featuring a talented girl group performing a synchronized dance routine against a backdrop screen displaying soft clouds and a moonlit sky. The group members are dressed in stylish pastel outfits and knee-high boots, radiating energy and confidence. Their dynamic choreography and intricate formations showcase their skill and dedication. Expressive gestures and movements highlight their commitment to the craft. The combination of music, dance, and visual elements creates an electrifying atmosphere that captivates the audience, retaining focus on their performance without scene transitions.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/girl_band_dance.png"}
A scene on a set of urban stairs where two girls are sitting, both intently focused on their mobile phones. The girl on the left is adjusting her hair with one hand while holding her phone with a protective case in the other, capturing a casual yet attentive demeanor. The girl on the right, dressed in a stylish outfit, is completely absorbed in her device. The background shows people walking by, emphasizing the bustling urban environment. Despite the activity around them, the girls are engaged in their own digital worlds. Their body language reflects a blend of relaxation and concentration as they interact with their screens.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/street_girls.png"}
A young girl in a colorful swimsuit and a cute sun hat is playfully covering a boy with sand on a beach. She is holding a small plastic shovel and has a joyful expression, clearly enjoying the activity. The boy, lying on a beach towel in a blue swimsuit and cap, appears relaxed and enjoying the sun. The sandy beach stretches out behind them, with the ocean visible in the background. The scene captures a moment of fun and carefree play, typical of a day at the beach. There is no scene transition, focusing on this single, joyous moment.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/kids_beach.png"}

+ 0
- 20
assets/texts/i2v/prompts_head.txt View File

@@ -1,20 +0,0 @@
An astronaut transitions from being underwater to floating in space.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/Astronaut_1.jpg"}
The Eiffel Tower transitions from a daytime scene to a nighttime scene.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/Effiel_1.jpeg"}
A woman is seamlessly moving through different scenes with a magical effect.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/MorphCharacters_1.jpg"}
A small kitten is playing in the snow.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/cat1.png"}
A person is giving water to a dog, and the dog tilts its head back to drink.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/dog1.png"}
A car is speeding down a narrow country road in a field.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/car1.jpeg"}
Three daffodils quietly bloom, their delicate petals unfurling in serene harmony.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/flower1.png"}
A butterfly, attracted by the fragrance of the flowers, flew over and landed on the scent.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/butterfly1.png"}
A cyclist rides along the waves as they crash against the shore.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/cyclist1.png"}
The earth is rotating.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/earth1.png"}
The bread is baking in the oven, slowly expanding as it rises.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/bread1.png"}
The little boy is celebrating his birthday and blowing out the candles.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/candle1.png"}
The airplane is landing at dusk.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/airplane1.png"}
Sharks swim in a circle.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/sea1.png"}
The man is walking along the path.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/walking1.png"}
The cat slipped on a pair of sunglasses.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/catwglass1.png"}
Attractive confident guy happily showing you gesture and winking on camera isolated.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/man1.png"}
Mother covering sleeping daughter with blanket.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/mother1.png"}
Pink Chocolate bar exploding midair against blue background.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/chocolate1.png"}
A guy is freeskate rollerblading.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/freeskate_1.png"}

+ 0
- 10
assets/texts/i2v/prompts_human_i2v_head.txt View File

@@ -1,10 +0,0 @@
The young child with short hair, dressed in a vibrant green and white t-shirt, sits peacefully in a meditative pose, her hands gently clasped together. As she breathes deeply, a subtle sway in her posture hints at a gentle motion, like a leaf dancing in a light breeze. The serene expression on her face remains undisturbed, embodying a tranquil harmony with the natural world around her.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/girl_pray_1.jpeg"}
The video captures a charged moment at a political rally, with trump in the center raising his fist triumphantly against a backdrop of a waving American flag. He's flanked by serious-looking men in dark suits and sunglasses, suggesting a high-profile campaign event with security present. The scene exudes an atmosphere of patriotic fervor and political enthusiasm, with trump's gesture and the prominent flag symbolizing themes of American leadership and national pride.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/trump_1.jpeg"}
In this charming outdoor scene, a jubilant toddler revels in a makeshift bathtub on what appears to be a sunny deck or patio. The little one, adorned with a straw hat, sits gleefully in a large white tub, surrounded by potted plants and garden furniture. With mouth wide open in an expression of pure joy, the child seems to be thoroughly enjoying their unconventional bath time, complete with a blue beach ball as a bath toy. The video captures a moment of carefree childhood bliss, blending the fun of bath time with the pleasures of outdoor play on a beautiful day.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/baby_1.jpeg"}
Under the spotlight, Taylor Swift commands the stage with her signature blend of charm and musical prowess, her golden curls catching the light as she leans into the microphone. Her fingers dance across the strings of her ornate acoustic guitar, coaxing out melodies that have the audience hanging on every note. With a sparkle in her eye and a slight smile playing on her lips, she pours her heart into the performance, her voice soaring through the venue and weaving a spell of nostalgia and raw emotion that leaves the crowd breathless and yearning for more.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/swift_1.jpeg"}
Detective Chen prowled the shadowy streets of the city, his keen eyes scanning for clues behind the shield of his reflective aviators. The leather jacket and woolen scarf spoke of a man prepared for both action and the chill of long stakeouts, his composed demeanor masking the razor-sharp mind piecing together the puzzle of his latest case. As he leaned against the weathered stone wall, blending seamlessly into the urban landscape, Chen's senses remained on high alert, ready to spring into action at the slightest hint of his elusive quarry.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/huge_1.jpeg"}
In this brief black and white video clip, Albert Einstein sits at his desk, his piercing gaze fixed intently on a point off-camera. As the scene unfolds, Einstein makes a funny face.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/einstein_1.jpeg"}
In the video clip, a graceful empress, adorned in a golden robe, gently hands over a bamboo scroll to the young emperor, who is deep in thought. The emperor looks up, meeting her gaze with a soft smile, as they exchange a quiet moment of understanding in the grand, candle-lit royal chamber. Behind them, intricately carved wooden panels display ancient calligraphy, creating an atmosphere of wisdom and elegance as they prepare to discuss matters of state.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/royal_1.jpeg"}
In the video clip, the young emperor sits confidently on an intricately carved throne, his expression calm yet commanding as he reads a royal decree. The golden light from nearby lanterns illuminates the rich patterns on his robe, reflecting his authority and the weight of the decisions he must make. Behind him, a backdrop of ancient calligraphy hints at the wisdom of past dynasties, as he prepares to address his court with a decisive proclamation.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/ch_king_1.jpeg"}
In the video clip, Cheng Long stands confidently in front of an iconic cathedral, his fists raised in a poised stance, ready for action. The camera zooms in as he smiles, exuding both calm and determination, before swiftly transitioning into a series of dynamic, graceful moves. With the towering spires of the cathedral in the background, his martial arts sequence blends seamlessly with the historic architecture, creating a striking contrast between tradition and modern agility.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/chenglong_1.jpeg"}
The camera slowly pans around the figure in traditional attire, revealing intricate details of the red garment and gold embroidery. As the person turns, their black hat casts a dramatic shadow against the dimly lit backdrop, adding a mysterious aura to the scene. The soft sound of traditional music begins to play, transporting viewers into a captivating journey through cultural heritage and historical elegance.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/lizhongshua_1.jpeg"}

+ 0
- 20
assets/texts/i2v/prompts_loop.txt View File

@@ -1,20 +0,0 @@
An astronaut transitions from being underwater to floating in space.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/Astronaut_1.jpg;/mnt/jfs-hdd2/sora/data/i2v_test_img/Astronaut_2.jpg"}
The Eiffel Tower transitions from a daytime scene to a nighttime scene.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/Effiel_1.jpeg;/mnt/jfs-hdd2/sora/data/i2v_test_img/Effiel_2.jpeg"}
A woman is seamlessly moving through different scenes with a magical effect.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/MorphCharacters_1.jpg;/mnt/jfs-hdd2/sora/data/i2v_test_img/MorphCharacters_2.jpg"}
A small kitten is playing in the snow.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/cat1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/cat2.png"}
A person is giving water to a dog, and the dog tilts its head back to drink.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/dog1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/dog2.png"}
A car is speeding down a narrow country road in a field.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/car1.jpeg;/mnt/jfs-hdd2/sora/data/i2v_test_img/car2.jpeg"}
Three daffodils quietly bloom, their delicate petals unfurling in serene harmony.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/flower1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/flower2.png"}
A butterfly, attracted by the fragrance of the flowers, flew over and landed on the scent.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/butterfly1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/butterfly2.png"}
A cyclist rides along the waves as they crash against the shore.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/cyclist1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/cyclist2.png"}
The earth is rotating.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/earth1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/earth2.png"}
The bread is baking in the oven, slowly expanding as it rises.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/bread1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/bread2.png"}
The little boy is celebrating his birthday and blowing out the candles.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/candle1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/candle2.png"}
The airplane is landing at dusk.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/airplane1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/airplane2.png"}
Sharks swim in a circle.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/sea1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/sea2.png"}
The man is walking along the path.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/walking1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/walking2.png"}
The cat slipped on a pair of sunglasses.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/catwglass1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/catwglass2.png"}
Attractive confident guy happily showing you gesture and winking on camera isolated.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/man1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/man2.png"}
Mother covering sleeping daughter with blanket.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/mother1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/mother2.png"}
Pink Chocolate bar exploding midair against blue background.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/chocolate1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/chocolate2.png"}
A guy is freeskate rollerblading.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/freeskate_1.png;/mnt/jfs-hdd2/sora/data/i2v_test_img/freeskate_2.png"}

+ 0
- 20
assets/texts/i2v/prompts_ori.txt View File

@@ -1,20 +0,0 @@
An astronaut transitions from being underwater to floating in space.
The Eiffel Tower transitions from a daytime scene to a nighttime scene.
A woman is seamlessly moving through different scenes with a magical effect.
A small kitten is playing in the snow.
A person is giving water to a dog, and the dog tilts its head back to drink.
A car is speeding down a narrow country road in a field.
Three daffodils quietly bloom, their delicate petals unfurling in serene harmony.
A butterfly, attracted by the fragrance of the flowers, flew over and landed on the scent.
A cyclist rides along the waves as they crash against the shore.
The earth is rotating.
The bread is baking in the oven, slowly expanding as it rises.
The little boy is celebrating his birthday and blowing out the candles.
The airplane is landing at dusk.
Sharks swim in a circle.
The man is walking along the path.
The cat slipped on a pair of sunglasses.
Attractive confident guy happily showing you gesture and winking on camera isolated.
Mother covering sleeping daughter with blanket.
Pink Chocolate bar exploding midair against blue background.
A guy is freeskate rollerblading.

+ 0
- 20
assets/texts/i2v/prompts_tail.txt View File

@@ -1,20 +0,0 @@
An astronaut transitions from being underwater to floating in space.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/Astronaut_2.jpg"}
The Eiffel Tower transitions from a daytime scene to a nighttime scene.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/Effiel_2.jpeg"}
A woman is seamlessly moving through different scenes with a magical effect.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/MorphCharacters_2.jpg"}
A small kitten is playing in the snow.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/cat2.png"}
A person is giving water to a dog, and the dog tilts its head back to drink.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/dog2.png"}
A car is speeding down a narrow country road in a field.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/car2.jpeg"}
Three daffodils quietly bloom, their delicate petals unfurling in serene harmony.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/flower2.png"}
A butterfly, attracted by the fragrance of the flowers, flew over and landed on the scent.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/butterfly2.png"}
A cyclist rides along the waves as they crash against the shore.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/cyclist2.png"}
The earth is rotating.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/earth2.png"}
The bread is baking in the oven, slowly expanding as it rises.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/bread2.png"}
The little boy is celebrating his birthday and blowing out the candles.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/candle2.png"}
The airplane is landing at dusk.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/airplane2.png"}
Sharks swim in a circle.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/sea2.png"}
The man is walking along the path.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/walking2.png"}
The cat slipped on a pair of sunglasses.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/catwglass2.png"}
Attractive confident guy happily showing you gesture and winking on camera isolated.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/man2.png"}
Mother covering sleeping daughter with blanket.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/mother2.png"}
Pink Chocolate bar exploding midair against blue background.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/chocolate2.png"}
A guy is freeskate rollerblading.{"reference_path": "/mnt/jfs-hdd2/sora/data/i2v_test_img/freeskate_2.png"}

+ 0
- 8
assets/texts/imagenet_labels.txt View File

@@ -1,8 +0,0 @@
golden retriever
otter
lesser panda
geyser
macaw
valley
balloon
golden panda

+ 0
- 24
assets/texts/internal_prompts_1.txt View File

@@ -1,24 +0,0 @@
A group of dolphins leaping out of the ocean, synchronized and playful, with crystal-clear blue water and a bright sunny sky.
A panda playing the violin on a grand stage, with an elegant backdrop and an attentive audience.
A dragon flying over a medieval village, breathing fire with villagers looking up in awe and fear.
A diverse group of people enjoying a picnic in the park, with a blonde baby playing on a blanket among them.
A child playing fetch with a golden retriever in a sunny backyard, both running and laughing with joy.
A couple dancing under the stars on a quiet beach, with gentle waves lapping at their feet and a full moon illuminating the scene.
A lone hiker standing on the edge of a cliff, looking out over a vast, misty valley at dawn.
A close-up of a person's face showing a range of emotions, with detailed expressions and lighting that highlights the contours of their face.
In ancient China, during the Han Dynasty, Zhaojun was leaving the Han capital of Chang'an, accompanied by a grand entourage. Dressed in luxurious attire, she wore a determined expression on her face. The procession moved along winding roads, heading towards the Xiongnu territory in the north. The atmosphere was solemn, filled with the sadness of parting.
In ancient China, the emperor attended court, accompanied by ministers and palace attendants. The emperor was discussing important state affairs with the ministers in the imperial court.
In early 20th century Paris, Marie Curie was conducting research on radioactivity in her laboratory. The lab was filled with various scientific instruments, and Curie was focused on her experiments. The atmosphere was charged with the rigor and passion of scientific exploration.
In a sunny city park, the camera captures a cat leisurely rolling around on the grass.
In a sunny city park, the camera captures a cat leisurely rolling around on the grass. In the distance, on a park bench, a little girl is reading a book.
In a sunny city park, the camera captures a cat leisurely rolling around on the grass. The camera slowly pans to a little girl dressed in colorful summer clothes, who is crouched beside the cat, happily playing with it.
The car moves along the road, with the wheels creating friction as they make contact with the ground.
The pendulum hanging from the clock swings periodically under the influence of gravity.
The two gases mix slowly and gracefully. The first gas is a light blue, while the second gas is bright green. As these gases converge, they create beautiful gradients and dynamic ripples in the air. The camera captures the details of the gas mixing, showcasing the various color transitions, bubbles, and flow effects.
On the vast prairie, an antelope is running at high speed. The camera focuses on its strong muscles, which rapidly shift between tension and relaxation with each stride. Its powerful limbs propel it forward. The wind on the prairie rustles the antelope's fur, and the entire scene is filled with natural vitality and a sense of freedom.
On a tranquil night, a night-blooming cereus quietly opens under the moonlight. In a softly focused shot, the petals slowly unfurl, revealing the pure white stamen. The fragrance of the flower fills the air, and the fleeting yet beautiful moment is captured with delicate detail. The atmosphere is serene and mysterious.
On a calm lake, an orange rubber duck floats gently, creating small ripples on the water's surface. Under the sunlight, the lake water displays a soft blue-green hue. The camera focuses on the small ripples around the rubber duck, showcasing a simple and relaxed dynamic effect.
In a volcanic eruption scene, lava erupts from the volcano's mouth, creating a spectacular fountain. The dynamic flow of the lava combines with the eruptive flames and smoke. The camera captures the high-speed movement of the lava and the dramatic changes in the surrounding environment, showcasing the awe-inspiring power of nature.
In a vibrant dance floor, several people are dancing. The camera moves with their steps, capturing the swift movements and jumps, with the background lights shifting and colors blending. The video showcases the fluidity of the dancers' movements and their sense of rhythm.
In a skydiving scene, the camera starts from the skydiver's perspective, rapidly descending through the clouds. It captures the free-fall motion of the skydiver, the changing airflow, and the passage through the mist. The skydiver performs spins and flips in the air, forming impressive formations and formations with other jumpers. The background music is exhilarating and dynamic, enhancing the thrill of the skydiving experience.
On an extreme mountain bike race course, the camera tracks a rider navigating through rugged, uneven terrain. The rider swiftly descends steep slopes, jumps, and makes sharp turns. The camera captures the details of each bump, leap, and turn, as well as the rider's interaction with the natural environment.

+ 0
- 8
assets/texts/internal_prompts_2.txt View File

@@ -1,8 +0,0 @@
In the center of the image, a woman is the main focus. She is elegantly dressed in a long, shimmering silver dress that cascades down to her feet. The dress is sleeveless, revealing her arms and shoulders. Her hair, styled in loose waves, frames her face as she turns her head to the side, her gaze directed towards the camera. She is standing in front of a black car, which is parked on a gray surface. The car is sleek and modern, with a distinctive design that includes a large rear window and a silver grille. The woman's position in front of the car suggests she is the center of attention, perhaps at a car show or a similar event. The background is blurred, but it appears to be an indoor setting with a high ceiling and bright lights. The focus is clearly on the woman and the car, with the background serving to highlight their features. The image captures a moment of elegance and sophistication, with the woman's dress and the car's design both drawing the eye.
In the video, a young woman is the main subject, standing in front of a bush of pink flowers. She is dressed in a traditional Japanese kimono, which is predominantly white with red and gold accents. The kimono features a floral pattern, adding to the overall aesthetic of the scene. The woman's hair is styled in a bun, adorned with a hair accessory that matches her kimono. She is holding a pink flower in her hand, which she is gently touching with her lips. Her gaze is directed off to the side, and her expression is one of contemplation. The background of the video is blurred, but it appears to be a garden or park, suggesting a peaceful and serene setting. The video is shot from a low angle, looking up at the woman, which adds a sense of grandeur to the scene. The camera angle also allows for a clear view of the woman's kimono and the flowers she is holding. The overall composition of the video suggests a theme of beauty and tranquility.
In the image, a young woman is the main subject. She is seated on a gray chair, her legs crossed in a relaxed manner. She is dressed in a gray dress that features a floral pattern, adding a touch of elegance to her appearance. The dress is complemented by a black choker necklace that she is wearing. She is holding a white phone in her hand, which she is using to take a selfie. The phone is held up in front of her, capturing her image in the mirror behind her. The mirror reflects the room she is in, which is dimly lit, creating an intimate atmosphere. The woman's pose and the way she is holding the phone suggest that she is in the middle of taking a selfie. The image captures a moment of self-expression and confidence. The woman's outfit, the chair she is sitting on, and the phone she is using are all clearly visible, providing a detailed view of the scene.
The video is a vibrant and colorful music video featuring a female performer. The performer is dressed in a white dress with pink and white accents, and she has long blonde hair. She is dancing in front of a large clock with a red and gold design, which is set against a backdrop of red and white flowers. The performer is holding a pink flower in her hand and appears to be in motion, suggesting she is dancing. The overall style of the video is reminiscent of a fairy tale or fantasy theme, with the use of the clock and flowers adding to the whimsical atmosphere. The performer's outfit and the setting suggest that the video may be related to a specific theme or concept, but the details of this are not provided in the description. The video does not contain any text or additional elements that can be confidently described.
In the image, a young woman is the main subject. She is seated on a wooden bench, her body oriented towards the camera. She is wearing a white top and a blue hat adorned with bunny ears, adding a playful touch to her outfit. In her hands, she holds a white bowl and a spoon, suggesting she might be enjoying a meal or a snack. The bench she is sitting on is positioned in front of a wooden wall, which provides a simple and uncluttered background. The overall scene is casual and relaxed, with the woman appearing to be at ease in her surroundings.
A close-up shot of a couple sharing a gentle kiss in a softly lit room. The couple’s faces are the focus, with their eyes closed and lips meeting in a tender, affectionate moment. The background is softly blurred, with warm, ambient lighting enhancing the intimate atmosphere. The scene captures the connection between the couple, with no other objects or distractions in the frame, emphasizing the simplicity and emotion of the kiss. The overall mood is peaceful and loving, highlighting the deep affection between them.
A serene scene of a couple walking hand in hand along a quiet beach at sunset. The couple is silhouetted against the warm, golden light of the setting sun, which casts long shadows on the sand. The ocean waves gently lap at the shore, creating a rhythmic, soothing background. The sky is a mix of soft oranges, pinks, and purples, adding to the tranquil atmosphere. The couple’s steps are in sync, their hands clasped as they stroll along the water’s edge. The focus is on the connection between them, with no other people or distractions in the frame, emphasizing the peacefulness of their shared moment.
A playful scene featuring a Golden Retriever and a white cat in a cozy living room. The Golden Retriever, with its fluffy golden fur, is lying down on a soft rug, looking up with a friendly expression. Nearby, the white cat is perched gracefully on the armrest of a sofa, its sleek fur contrasting with the dog’s. The two animals are close to each other, with the cat gently reaching out a paw towards the dog, suggesting a moment of gentle interaction. The background includes a few scattered toys and a warm, homey ambiance created by soft lighting. The scene captures the peaceful coexistence and budding friendship between the two pets.

+ 0
- 32
assets/texts/internal_test.txt View File

@@ -1,32 +0,0 @@
A group of dolphins leaping out of the ocean, synchronized and playful, with crystal-clear blue water and a bright sunny sky.
A panda playing the violin on a grand stage, with an elegant backdrop and an attentive audience.
A dragon flying over a medieval village, breathing fire with villagers looking up in awe and fear.
A diverse group of people enjoying a picnic in the park, with a blonde baby playing on a blanket among them.
A child playing fetch with a golden retriever in a sunny backyard, both running and laughing with joy.
A couple dancing under the stars on a quiet beach, with gentle waves lapping at their feet and a full moon illuminating the scene.
A lone hiker standing on the edge of a cliff, looking out over a vast, misty valley at dawn.
A close-up of a person's face with changing emotions, with detailed expressions and lighting that highlights the contours of his or her face.
In ancient China, during the Han Dynasty, Zhaojun was leaving the Han capital of Chang'an, accompanied by a grand entourage. Dressed in luxurious attire, she wore a determined expression on her face. The procession moved along winding roads, heading towards the Xiongnu territory in the north. The atmosphere was solemn, filled with the sadness of parting.
In ancient China, the emperor attended court, accompanied by ministers and palace attendants. The emperor was discussing important state affairs with the ministers in the imperial court.
In early 20th century Paris, Marie Curie was conducting research on radioactivity in her laboratory. The lab was filled with various scientific instruments, and Curie was focused on her experiments. The atmosphere was charged with the rigor and passion of scientific exploration.
In a sunny city park, the camera captures a cat leisurely rolling around on the grass.
In a sunny city park, the camera captures a cat leisurely rolling around on the grass. In the distance, on a park bench, a little girl is reading a book.
In a sunny city park, the camera captures a cat leisurely rolling around on the grass. The camera slowly pans to a little girl dressed in colorful summer clothes, who is crouched beside the cat, happily playing with it.
The car moves along the road, with the wheels creating friction as they make contact with the ground.
The pendulum hanging from the clock swings periodically under the influence of gravity.
The two gases mix slowly and gracefully. The first gas is a light blue, while the second gas is bright green. As these gases converge, they create beautiful gradients and dynamic ripples in the air. The camera captures the details of the gas mixing, showcasing the various color transitions, bubbles, and flow effects.
On the vast prairie, an antelope is running at high speed. The camera focuses on its strong muscles, which rapidly shift between tension and relaxation with each stride. Its powerful limbs propel it forward. The wind on the prairie rustles the antelope's fur, and the entire scene is filled with natural vitality and a sense of freedom.
On a tranquil night, a night-blooming cereus quietly opens under the moonlight. In a softly focused shot, the petals slowly unfurl, revealing the pure white stamen. The fragrance of the flower fills the air, and the fleeting yet beautiful moment is captured with delicate detail. The atmosphere is serene and mysterious.
On a calm lake, an orange rubber duck floats gently, creating small ripples on the water's surface. Under the sunlight, the lake water displays a soft blue-green hue. The camera focuses on the small ripples around the rubber duck, showcasing a simple and relaxed dynamic effect.
In a volcanic eruption scene, lava erupts from the volcano's mouth, creating a spectacular fountain. The dynamic flow of the lava combines with the eruptive flames and smoke. The camera captures the high-speed movement of the lava and the dramatic changes in the surrounding environment, showcasing the awe-inspiring power of nature.
In a vibrant dance floor, several people are dancing. The camera moves with their steps, capturing the swift movements and jumps, with the background lights shifting and colors blending. The video showcases the fluidity of the dancers' movements and their sense of rhythm.
In a skydiving scene, the camera starts from the skydiver's perspective, rapidly descending through the clouds. It captures the free-fall motion of the skydiver, the changing airflow, and the passage through the mist. The skydiver performs spins and flips in the air, forming impressive formations and formations with other jumpers. The background music is exhilarating and dynamic, enhancing the thrill of the skydiving experience.
On an extreme mountain bike race course, the camera tracks a rider navigating through rugged, uneven terrain. The rider swiftly descends steep slopes, jumps, and makes sharp turns. The camera captures the details of each bump, leap, and turn, as well as the rider's interaction with the natural environment.
In the center of the image, a woman is the main focus. She is elegantly dressed in a long, shimmering silver dress that cascades down to her feet. The dress is sleeveless, revealing her arms and shoulders. Her hair, styled in loose waves, frames her face as she turns her head to the side, her gaze directed towards the camera. She is standing in front of a black car, which is parked on a gray surface. The car is sleek and modern, with a distinctive design that includes a large rear window and a silver grille. The woman's position in front of the car suggests she is the center of attention, perhaps at a car show or a similar event. The background is blurred, but it appears to be an indoor setting with a high ceiling and bright lights. The focus is clearly on the woman and the car, with the background serving to highlight their features. The image captures a moment of elegance and sophistication, with the woman's dress and the car's design both drawing the eye.
In the video, a young woman is the main subject, standing in front of a bush of pink flowers. She is dressed in a traditional Japanese kimono, which is predominantly white with red and gold accents. The kimono features a floral pattern, adding to the overall aesthetic of the scene. The woman's hair is styled in a bun, adorned with a hair accessory that matches her kimono. She is holding a pink flower in her hand, which she is gently touching with her lips. Her gaze is directed off to the side, and her expression is one of contemplation. The background of the video is blurred, but it appears to be a garden or park, suggesting a peaceful and serene setting. The video is shot from a low angle, looking up at the woman, which adds a sense of grandeur to the scene. The camera angle also allows for a clear view of the woman's kimono and the flowers she is holding. The overall composition of the video suggests a theme of beauty and tranquility.
In the image, a young woman is the main subject. She is seated on a gray chair, her legs crossed in a relaxed manner. She is dressed in a gray dress that features a floral pattern, adding a touch of elegance to her appearance. The dress is complemented by a black choker necklace that she is wearing. She is holding a white phone in her hand, which she is using to take a selfie. The phone is held up in front of her, capturing her image in the mirror behind her. The mirror reflects the room she is in, which is dimly lit, creating an intimate atmosphere. The woman's pose and the way she is holding the phone suggest that she is in the middle of taking a selfie. The image captures a moment of self-expression and confidence. The woman's outfit, the chair she is sitting on, and the phone she is using are all clearly visible, providing a detailed view of the scene.
The video is a vibrant and colorful music video featuring a female performer. The performer is dressed in a white dress with pink and white accents, and she has long blonde hair. She is dancing in front of a large clock with a red and gold design, which is set against a backdrop of red and white flowers. The performer is holding a pink flower in her hand and appears to be in motion, suggesting she is dancing. The overall style of the video is reminiscent of a fairy tale or fantasy theme, with the use of the clock and flowers adding to the whimsical atmosphere. The performer's outfit and the setting suggest that the video may be related to a specific theme or concept, but the details of this are not provided in the description. The video does not contain any text or additional elements that can be confidently described.
In the image, a young woman is the main subject. She is seated on a wooden bench, her body oriented towards the camera. She is wearing a white top and a blue hat adorned with bunny ears, adding a playful touch to her outfit. In her hands, she holds a white bowl and a spoon, suggesting she might be enjoying a meal or a snack. The bench she is sitting on is positioned in front of a wooden wall, which provides a simple and uncluttered background. The overall scene is casual and relaxed, with the woman appearing to be at ease in her surroundings.
A close-up shot of a couple sharing a gentle kiss in a softly lit room. The couple’s faces are the focus, with their eyes closed and lips meeting in a tender, affectionate moment. The background is softly blurred, with warm, ambient lighting enhancing the intimate atmosphere. The scene captures the connection between the couple, with no other objects or distractions in the frame, emphasizing the simplicity and emotion of the kiss. The overall mood is peaceful and loving, highlighting the deep affection between them.
A serene scene of a couple walking hand in hand along a quiet beach at sunset. The couple is silhouetted against the warm, golden light of the setting sun, which casts long shadows on the sand. The ocean waves gently lap at the shore, creating a rhythmic, soothing background. The sky is a mix of soft oranges, pinks, and purples, adding to the tranquil atmosphere. The couple’s steps are in sync, their hands clasped as they stroll along the water’s edge. The focus is on the connection between them, with no other people or distractions in the frame, emphasizing the peacefulness of their shared moment.
A playful scene featuring a Golden Retriever and a white cat in a cozy living room. The Golden Retriever, with its fluffy golden fur, is lying down on a soft rug, looking up with a friendly expression. Nearby, the white cat is perched gracefully on the armrest of a sofa, its sleek fur contrasting with the dog’s. The two animals are close to each other, with the cat gently reaching out a paw towards the dog, suggesting a moment of gentle interaction. The background includes a few scattered toys and a warm, homey ambiance created by soft lighting. The scene captures the peaceful coexistence and budding friendship between the two pets.

+ 0
- 10
assets/texts/multi_human.txt View File

@@ -1,10 +0,0 @@
In a modern kitchen, two chefs are cutting onions on red plastic cutting boards placed on metallic work benches. Both chefs wear black working attire, black caps, and black gloves, meticulously chopping the onions into small pieces. Between them are large bowls filled with boiled, de-shelled eggs. In the background, neatly stacked raw eggs can be seen. A man, likely the supervisor, stands facing away, looking out an open-glass door while holding papers. The scene captures a moment of focused kitchen activity without transitions.
Three guys are standing on a side road in front of a Chinese rural village, engaging in an informal interview. The guy on the left, wearing a white T-shirt and blue jeans, is facing the other two, gesturing with his hands as he talks. The middle guy, dressed in a black polo shirt, is nodding and listening attentively. The rightmost guy, in a blue polo shirt, occasionally glances at the camera. Behind them is a large gray stone stacked on a red square pillar, resembling a landmark, with shrubs further in the background. The video focuses on the interaction among the three characters.
The video captures a scene in a spacious, modern indoor sheep farm. A group of people stands in the middle passage, observing a black-and-white lamb. Fences dividing sheep into different areas are visible on the left side. The central figure is holding the lamb, with a man on the right trying to pat it, and another man on the left looking at it. A woman in a red-and-white dress is smiling at the lamb from a sideways angle, while another man's partial figure can be seen in the distance. All the men are wearing white long coats, and the lamb is kicking its hind legs while being held firmly by the central figure.
Two singers are walking slowly and singing on a stage during a performance. The male singer, wearing a beige scarf and white top, holds a microphone and sings attentively while holding the female singer's hand. The female singer, dressed in a white dress with long brown curly hair, smiles glamorously. The stage background features steel frames and is otherwise unclear, with an orange light in the background. The video is shot from the front at a slightly low angle, capturing both singers as they face slightly towards the left. The background is dark, but the spotlight illuminates the singers, making their faces and expressions clearly visible.
A distinguished musical performance is in progress at a classical concert. In the center, a man in a formal navy coat, white shirt, and cravat holds a violin and addresses the audience. Behind him, a full orchestra of seated musicians prepares with their string and percussion instruments, some smiling and looking towards the central figure. The setting is elegant, with decorative music stands featuring intricate gold designs, indicating a prestigious event. The atmosphere exudes grandeur and professionalism, characteristic of a high-caliber classical music performance. The scene captures a moment of anticipation and readiness before the music begins.
A dynamic scene at a musical concert featuring two women performing on stage. The woman on the left, with blonde wavy hair, is passionately singing into a golden microphone. She is dressed in a sparkly, embellished yellow outfit with dark glittering details, her eyes partially closed, and her hand raised expressively. The woman on the right, shown in profile, is wearing a sleeveless, glittering dark dress. Her long, wavy hair flows over her shoulders, and she appears composed, possibly preparing for her part or listening attentively. The background, slightly blurred, reveals musicians or orchestra members, enhancing the live performance atmosphere with focused and intense energy.
A lively moment on a talk show set with the focus on a guest and the host. On the left, the guest, a woman in an elegant black outfit with sparkling details, is gesturing expressively with both hands as she passionately shares a story. Her hair is tied back, and she wears a necklace, adding to her stylish appearance. On the right, the host, in a suit and tie, sits behind a wooden desk, leaning slightly forward with a smile, appearing attentive and engaged. The backdrop showcases a nighttime city skyline, typical of late-night talk shows. The scene captures a warm interaction, highlighting the positive and relaxed mood of their conversation.
A vibrant scene featuring a talented girl group performing a synchronized dance routine against a backdrop screen displaying soft clouds and a moonlit sky. The group members are dressed in stylish pastel outfits and knee-high boots, radiating energy and confidence. Their dynamic choreography and intricate formations showcase their skill and dedication. Expressive gestures and movements highlight their commitment to the craft. The combination of music, dance, and visual elements creates an electrifying atmosphere that captivates the audience, retaining focus on their performance without scene transitions.
A scene on a set of urban stairs where two girls are sitting, both intently focused on their mobile phones. The girl on the left is adjusting her hair with one hand while holding her phone with a protective case in the other, capturing a casual yet attentive demeanor. The girl on the right, dressed in a stylish outfit, is completely absorbed in her device. The background shows people walking by, emphasizing the bustling urban environment. Despite the activity around them, the girls are engaged in their own digital worlds. Their body language reflects a blend of relaxation and concentration as they interact with their screens.
A young girl in a colorful swimsuit and a cute sun hat is playfully covering a boy with sand on a beach. She is holding a small plastic shovel and has a joyful expression, clearly enjoying the activity. The boy, lying on a beach towel in a blue swimsuit and cap, appears relaxed and enjoying the sun. The sandy beach stretches out behind them, with the ocean visible in the background. The scene captures a moment of fun and carefree play, typical of a day at the beach. There is no scene transition, focusing on this single, joyous moment.

+ 0
- 8
assets/texts/rand_types.txt View File

@@ -1,8 +0,0 @@
随机风景镜头
随机电影镜头
随机游戏镜头
随机开车镜头
随机动物镜头
随机森林镜头
随机动漫镜头
随机舞蹈镜头

+ 49
- 0
assets/texts/sora.csv View File

@@ -0,0 +1,49 @@
text
"A stylish woman walks down a Tokyo street filled with warm glowing neon and animated city signage. She wears a black leather jacket, a long red dress, and black boots, and carries a black purse. She wears sunglasses and red lipstick. She walks confidently and casually. The street is damp and reflective, creating a mirror effect of the colorful lights. Many pedestrians walk about."
"Several giant wooly mammoths approach treading through a snowy meadow, their long wooly fur lightly blows in the wind as they walk, snow covered trees and dramatic snow capped mountains in the distance, mid afternoon light with wispy clouds and a sun high in the distance creates a warm glow, the low camera view is stunning capturing the large furry mammal with beautiful photography, depth of field."
"A movie trailer featuring the adventures of the 30 year old space man wearing a red wool knitted motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors."
"Drone view of waves crashing against the rugged cliffs along Big Sur's garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff's edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff's edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway."
"Animated scene features a close-up of a short fluffy monster kneeling beside a melting red candle. The art style is 3D and realistic, with a focus on lighting and texture. The mood of the painting is one of wonder and curiosity, as the monster gazes at the flame with wide eyes and open mouth. Its pose and expression convey a sense of innocence and playfulness, as if it is exploring the world around it for the first time. The use of warm colors and dramatic lighting further enhances the cozy atmosphere of the image."
"A gorgeously rendered papercraft world of a coral reef, rife with colorful fish and sea creatures."
"This close-up shot of a Victoria crowned pigeon showcases its striking blue plumage and red chest. Its crest is made of delicate, lacy feathers, while its eye is a striking red color. The bird's head is tilted slightly to the side, giving the impression of it looking regal and majestic. The background is blurred, drawing attention to the bird's striking appearance."
Photorealistic closeup video of two pirate ships battling each other as they sail inside a cup of coffee.
"A young man at his 20s is sitting on a piece of cloud in the sky, reading a book."
Historical footage of California during the gold rush.
A close up view of a glass sphere that has a zen garden within it. There is a small dwarf in the sphere who is raking the zen garden and creating patterns in the sand.
"Extreme close up of a 24 year old woman's eye blinking, standing in Marrakech during magic hour, cinematic film shot in 70mm, depth of field, vivid colors, cinematic"
A cartoon kangaroo disco dances.
"A beautiful homemade video showing the people of Lagos, Nigeria in the year 2056. Shot with a mobile phone camera."
A petri dish with a bamboo forest growing within it that has tiny red pandas running around.
"The camera rotates around a large stack of vintage televisions all showing different programs — 1950s sci-fi movies, horror movies, news, static, a 1970s sitcom, etc, set inside a large New York museum gallery."
"3D animation of a small, round, fluffy creature with big, expressive eyes explores a vibrant, enchanted forest. The creature, a whimsical blend of a rabbit and a squirrel, has soft blue fur and a bushy, striped tail. It hops along a sparkling stream, its eyes wide with wonder. The forest is alive with magical elements: flowers that glow and change colors, trees with leaves in shades of purple and silver, and small floating lights that resemble fireflies. The creature stops to interact playfully with a group of tiny, fairy-like beings dancing around a mushroom ring. The creature looks up in awe at a large, glowing tree that seems to be the heart of the forest."
"The camera follows behind a white vintage SUV with a black roof rack as it speeds up a steep dirt road surrounded by pine trees on a steep mountain slope, dust kicks up from it's tires, the sunlight shines on the SUV as it speeds along the dirt road, casting a warm glow over the scene. The dirt road curves gently into the distance, with no other cars or vehicles in sight. The trees on either side of the road are redwoods, with patches of greenery scattered throughout. The car is seen from the rear following the curve with ease, making it seem as if it is on a rugged drive through the rugged terrain. The dirt road itself is surrounded by steep hills and mountains, with a clear blue sky above with wispy clouds."
Reflections in the window of a train traveling through the Tokyo suburbs.
"A drone camera circles around a beautiful historic church built on a rocky outcropping along the Amalfi Coast, the view showcases historic and magnificent architectural details and tiered pathways and patios, waves are seen crashing against the rocks below as the view overlooks the horizon of the coastal waters and hilly landscapes of the Amalfi Coast Italy, several distant people are seen walking and enjoying vistas on patios of the dramatic ocean views, the warm glow of the afternoon sun creates a magical and romantic feeling to the scene, the view is stunning captured with beautiful photography."
"A large orange octopus is seen resting on the bottom of the ocean floor, blending in with the sandy and rocky terrain. Its tentacles are spread out around its body, and its eyes are closed. The octopus is unaware of a king crab that is crawling towards it from behind a rock, its claws raised and ready to attack. The crab is brown and spiny, with long legs and antennae. The scene is captured from a wide angle, showing the vastness and depth of the ocean. The water is clear and blue, with rays of sunlight filtering through. The shot is sharp and crisp, with a high dynamic range. The octopus and the crab are in focus, while the background is slightly blurred, creating a depth of field effect."
"A flock of paper airplanes flutters through a dense jungle, weaving around trees as if they were migrating birds."
"A cat waking up its sleeping owner demanding breakfast. The owner tries to ignore the cat, but the cat tries new tactics and finally the owner pulls out a secret stash of treats from under the pillow to hold the cat off a little longer."
Borneo wildlife on the Kinabatangan River
A Chinese Lunar New Year celebration video with Chinese Dragon.
Tour of an art gallery with many beautiful works of art in different styles.
"Beautiful, snowy Tokyo city is bustling. The camera moves through the bustling city street, following several people enjoying the beautiful snowy weather and shopping at nearby stalls. Gorgeous sakura petals are flying through the wind along with snowflakes."
A stop motion animation of a flower growing out of the windowsill of a suburban house.
The story of a robot's life in a cyberpunk setting.
"An extreme close-up of an gray-haired man with a beard in his 60s, he is deep in thought pondering the history of the universe as he sits at a cafe in Paris, his eyes focus on people offscreen as they walk as he sits mostly motionless, he is dressed in a wool coat suit coat with a button-down shirt , he wears a brown beret and glasses and has a very professorial appearance, and the end he offers a subtle closed-mouth smile as if he found the answer to the mystery of life, the lighting is very cinematic with the golden light and the Parisian streets and city in the background, depth of field, cinematic 35mm film."
"A beautiful silhouette animation shows a wolf howling at the moon, feeling lonely, until it finds its pack."
"New York City submerged like Atlantis. Fish, whales, sea turtles and sharks swim through the streets of New York."
"A litter of golden retriever puppies playing in the snow. Their heads pop out of the snow, covered in."
"Step-printing scene of a person running, cinematic film shot in 35mm."
"Five gray wolf pups frolicking and chasing each other around a remote gravel road, surrounded by grass. The pups run and leap, chasing each other, and nipping at each other, playing."
Basketball through hoop then explodes.
"Archeologists discover a generic plastic chair in the desert, excavating and dusting it with great care."
"A grandmother with neatly combed grey hair stands behind a colorful birthday cake with numerous candles at a wood dining room table, expression is one of pure joy and happiness, with a happy glow in her eye. She leans forward and blows out the candles with a gentle puff, the cake has pink frosting and sprinkles and the candles cease to flicker, the grandmother wears a light blue blouse adorned with floral patterns, several happy friends and family sitting at the table can be seen celebrating, out of focus. The scene is beautifully captured, cinematic, showing a 3/4 view of the grandmother and the dining room. Warm color tones and soft lighting enhance the mood."
The camera directly faces colorful buildings in Burano Italy. An adorable dalmation looks through a window on a building on the ground floor. Many people are walking and cycling along the canal streets in front of the buildings.
"An adorable happy otter confidently stands on a surfboard wearing a yellow lifejacket, riding along turquoise tropical waters near lush tropical islands, 3D digital render art style."
"This close-up shot of a chameleon showcases its striking color changing capabilities. The background is blurred, drawing attention to the animal's striking appearance."
A corgi vlogging itself in tropical Maui.
"A white and orange tabby cat is seen happily darting through a dense garden, as if chasing something. Its eyes are wide and happy as it jogs forward, scanning the branches, flowers, and leaves as it walks. The path is narrow as it makes its way between all the plants. the scene is captured from a ground-level angle, following the cat closely, giving a low and intimate perspective. The image is cinematic with warm tones and a grainy texture. The scattered daylight between the leaves and plants above creates a warm contrast, accentuating the cat's orange fur. The shot is clear and sharp, with a shallow depth of field."
"Aerial view of Santorini during the blue hour, showcasing the stunning architecture of white Cycladic buildings with blue domes. The caldera views are breathtaking, and the lighting creates a beautiful, serene atmosphere."
"Tiltshift of a construction site filled with workers, equipment, and heavy machinery."
"A giant, towering cloud in the shape of a man looms over the earth. The cloud man shoots lighting bolts down to the earth."
A Samoyed and a Golden Retriever dog are playfully romping through a futuristic neon city at night. The neon lights emitted from the nearby buildings glistens off of their fur.
"The Glenfinnan Viaduct is a historic railway bridge in Scotland, UK, that crosses over the west highland line between the towns of Mallaig and Fort William. It is a stunning sight as a steam train leaves the bridge, traveling over the arch-covered viaduct. The landscape is dotted with lush greenery and rocky mountains, creating a picturesque backdrop for the train journey. The sky is blue and the sun is shining, making for a beautiful day to explore this majestic spot."

+ 0
- 8
assets/texts/t2i_samples.txt View File

@@ -1,8 +0,0 @@
A small cactus with a happy face in the Sahara desert.
Bright scene, aerial view,ancient city, fantasy, gorgeous light, mirror reflection, high detail, wide angle lens.
Nature vs human nature, surreal, UHD, 8k, hyper details, rich colors, photograph.
Poster of a mechanical cat, techical Schematics viewed from front.
Luffy from ONEPIECE, handsome face, fantasy.
Real beautiful woman.
A alpaca made of colorful building blocks, cyberpunk.
artistic

+ 0
- 10
assets/texts/t2i_sigma.txt View File

@@ -1,10 +0,0 @@
Eiffel Tower was Made up of more than 2 million translucent straws to look like a cloud, with the bell tower at the top of the building, Michel installed huge foam-making machines in the forest to blow huge amounts of unpredictable wet clouds in the building's classic architecture.
A gorgeously rendered papercraft world of a coral reef, rife with colorful fish and sea creatures.
Full body shot, a French woman, Photography, French Streets background, backlighting, rim light, Fujifilm.
Close-up photos of models, hazy light and shadow, laser metal hair accessories, soft and beautiful, light gold pupils, white eyelashes, low saturation, real skin details, clear pores and fine lines, light reflection and refraction, ultra-clear, cinematography, award-winning works.
A litter of golden retriever puppies playing in the snow. Their heads pop out of the snow, covered in.
Lego model, future rocket station, intricate details, high resolution, unreal engine, UHD
One giant, sharp, metal square mirror in the center of the frame, four young people on the foreground, background sunny palm oil planation, tropical, realistic style, photography, nostalgic, green tone, mysterious, dreamy, bright color.
Modern luxury contemporary luxury home interiors house, in the style of mimicking ruined materials, ray tracing, haunting houses, and stone, capture the essence of nature, gray and bronze, dynamic outdoor shots.
Over the shoulder game perspective, game screen of Diablo 4, Inside the gorgeous palace is the wet ground, The necromancer knelt before the king, and a horde of skeletons he summoned stood at his side, cinematic light.
A curvy timber house near a sea, designed by Zaha Hadid, represent the image of a cold, modern architecture, at night, white lighting, highly detailed.

+ 0
- 1
assets/texts/t2v_car.txt View File

@@ -1 +0,0 @@
|0|A car driving on the in forest.|2|A car driving in the desert.|4|A car driving near the coast.|6|A car driving in the city.|8|A car driving near a mountain.|10|A car driving on the surface of a river.|12|A car driving on the surface of the earch.|14|A car driving in the universe.{"reference_path": "https://cdn.openai.com/tmp/s/interp/d0.mp4", "mask_strategy": "0,0,0,0,16,0.4"}

+ 0
- 12
assets/texts/t2v_demo.txt View File

@@ -1,12 +0,0 @@
a heartfelt moment in a church, where a bride and groom are standing in front of a priest. the bride, clad in a white dress, is holding a bouquet of flowers, while the groom is dressed in a black suit. the priest, standing between them, is holding a book, possibly a bible or a prayer book. the church is adorned with stained glass windows, adding a touch of color to the otherwise solemn setting. the couple, the priest, and the church form the main subjects of the video. the bride and groom are positioned in the foreground, while the priest is slightly behind them. the stained glass windows are located in the background, providing a beautiful backdrop to the scene. the relative positions of the objects suggest a typical church setting, with the priest officiating the ceremony. the bride and groom are the focal point of the video, indicating the importance of their union in this setting. the stained glass windows add a touch of color and artistry to the scene, enhancing the overall aesthetic of the video.
the breathtaking view of the vigelandsparken sculpture park in oslo, norway. the park is nestled amidst a lush forest of trees and is surrounded by towering mountains. the camera angle is from a low angle, providing a unique perspective of the park and its surroundings. the park is bustling with people, some of whom are seen walking on the paths, while others are admiring the sculptures. the sculptures themselves are made of wood and are scattered throughout the park, adding an artistic touch to the natural beauty of the place. the colors in the video are predominantly green and brown, reflecting the natural environment of the park. the overall scene is a harmonious blend of nature and art, with the park serving as a perfect spot for relaxation and contemplation.
a 3d animated scene featuring three animated characters. in the foreground, there is a character with green skin, wearing a red and green striped hat, a brown vest, and a pair of boots. this character is holding a mug and appears to be in a cheerful mood. to the right, there is a character with pink hair and a green crown, wearing a pink and green striped dress and holding a small card. this character is smiling and seems to be in a good mood. on the left, there is a character with brown skin, wearing a green and brown striped hat, a brown vest, and a pair of boots. this character is holding a mug and appears to be in a relaxed state. in the background, there is a colorful and whimsical setting that resembles a cozy room with a window, a fireplace, and a bookshelf. the room has a warm and inviting atmosphere, with a variety of objects and colors that add to the fantastical and playful nature of the scene.
a young man is the central figure, standing in a room with a wooden floor and a window in the background. he is dressed in a blue and white striped shirt, a blue bandana wrapped around his head, and a pair of black gloves. in his hands, he holds a small bottle of medicine, which he appears to be examining closely. his gaze is focused on the bottle, suggesting a sense of curiosity or concern. behind him, a woman is seated on a chair, her attention directed towards the man. she is wearing a white dress adorned with red flowers, and a white hat with a red ribbon. her posture and expression suggest she is observing the man's actions with interest. the room they are in is simple and uncluttered, with the wooden floor and window providing a natural contrast to the indoor setting. the window allows a glimpse of the outside world, where a tree can be seen, adding a touch of nature to the scene. the video captures a moment of quiet interaction between the two characters, with the man's examination of the medicine bottle as the main action.
a bustling scene in a commercial kitchen where a group of chefs are preparing a meal. the kitchen is well equipped with stainless steel appliances and counters, and the chefs are dressed in orange aprons, indicating their professional roles. in the foreground, a chef is seen stirring a large pot filled with a dark liquid, possibly a sauce or soup. the pot is placed on a stove, and the chef is using a long handled spoon to stir the contents. the pot is placed on a stove, and the chef is using a long handled spoon to stir the contents. in the background, another chef is seen preparing a dish on a tray. the dish appears to be a type of sushi, with various ingredients neatly arranged on a bed of rice. the chef is using a pair of chopsticks to pick up a piece of sushi, presumably to inspect its quality or to serve it to a customer. the kitchen is well lit, with bright lights illuminating the workspace and highlighting the chefs' focused expressions.
a bustling scene from a souk in marrakesh, morocco. the perspective is from a low angle, looking up at the vibrant market stalls and the people moving about. the stalls are filled with a variety of goods, including colorful textiles, leather bags, and intricately designed lanterns. the people, dressed in traditional moroccan clothing, add to the lively atmosphere of the souk. the sun is shining brightly, casting a warm glow on the scene and highlighting the rich colors of the market. the video is a lively depiction of daily life in marrakesh, capturing the energy and vibrancy of the city's markets.
we see a young man sitting in the backseat of a car. he is wearing a green shirt and has brown hair. the car is in motion, as suggested by the blurred background. the man is looking out of the window, his gaze directed towards something outside the frame. the interior of the car is visible, with the backseat where the man is sitting and the front seat in the foreground. the car appears to be a sedan, and the man is seated in the middle of the backseat.
a close-up shot of an older man wearing a military uniform, including a cap with a badge, a jacket with epaulettes, and a tie. he appears to be in a room with various mechanical parts and gauges in the background, suggesting a setting related to aviation or machinery. the man has a stern expression on his face and is looking directly at the camera. the style of the video is realistic, with a focus on the man's face and upper body, and the lighting is even, without harsh shadows. the background is out of focus, which draws attention to the man. the overall tone of the video is serious and intense.
a moment in a restaurant where a young woman is seated at a table, engrossed in her meal. she is dressed in a black dress, which contrasts with the vibrant yellow of the booth she is seated on. the table in front of her is adorned with a white tablecloth and holds a plate of food, from which she is eating with chopsticks. the restaurant itself is elegantly decorated with a chandelier hanging from the ceiling, casting a warm glow over the scene. a large fish tank is also visible in the background, adding to the ambiance of the place. the woman appears to be enjoying her meal, her attention fully on the food in front of her. the overall scene paints a picture of a quiet, intimate dining experience.
a vibrant scene of a flowering plant in full bloom. the plant is adorned with pink flowers that are in the process of opening, revealing their delicate petals. the flowers are not alone; they are accompanied by a group of bees, their black bodies contrasting with the pink hues of the flowers. the bees are engaged in the act of pollination, their movements creating a sense of life and activity. the background is a blur of green, suggesting a lush garden or park setting. the overall scene is a beautiful representation of nature in action, with the bees playing a crucial role in the life cycle of the plant.
a close-up shot of a woman sitting on a bed, holding a pastry with both hands. the pastry appears to be a round, golden-brown item with a cross-section design on top. the woman is wearing a white sweater and has long, wavy brown hair. she is looking at the pastry with a smile on her face. the background is blurred but suggests an indoor setting with warm lighting. the style of the video is casual and personal, likely intended for a lifestyle or food blog.
a serene beach scene under a cloudy sky. the sky is filled with clouds, casting a soft light over the scene. the water is a light blue color, gently lapping against the sandy beach. the beach is populated with a few birds, their small forms scattered across the sand. the waves are small, rolling onto the shore in a rhythmic pattern. the overall atmosphere of the video is peaceful and calm, a typical day at the beach.

+ 0
- 7
assets/texts/t2v_latte.txt View File

@@ -1,7 +0,0 @@
Yellow and black tropical fish dart through the sea.
An epic tornado attacking above aglowing city at night.
Slow pan upward of blazing oak fire in an indoor fireplace.
a cat wearing sunglasses and working as a lifeguard at pool.
Sunset over the sea.
A dog in astronaut suit and sunglasses floating in space.
A astronaut in flying in space, 4k, high resolution

+ 0
- 12
assets/texts/t2v_pllava.txt View File

@@ -1,12 +0,0 @@
a close-up shot of a woman standing in a room with a white wall and a plant on the left side. the woman has curly hair and is wearing a green tank top. she is looking to the side with a neutral expression on her face. the lighting in the room is soft and appears to be natural, coming from the left side of the frame. the focus is on the woman, with the background being out of focus. there are no texts or other objects in the video. the style of the video is a simple, candid portrait with a shallow depth of field.
a serene scene of a pond filled with water lilies. the water is a deep blue, providing a striking contrast to the pink and white flowers that float on its surface. the flowers, in full bloom, are the main focus of the video. they are scattered across the pond, with some closer to the camera and others further away, creating a sense of depth. the pond is surrounded by lush greenery, adding a touch of nature to the scene. the video is taken from a low angle, looking up at the flowers, which gives a unique perspective and emphasizes their beauty. the overall composition of the video suggests a peaceful and tranquil setting, likely a garden or a park.
a professional setting where a woman is presenting a slide from a presentation. she is standing in front of a projector screen, which displays a bar chart. the chart is colorful, with bars of different heights, indicating some sort of data comparison. the woman is holding a pointer, which she uses to highlight specific parts of the chart. she is dressed in a white blouse and black pants, and her hair is styled in a bun. the room has a modern design, with a sleek black floor and a white ceiling. the lighting is bright, illuminating the woman and the projector screen. the focus of the image is on the woman and the projector screen, with the background being out of focus. there are no texts visible in the image. the relative positions of the objects suggest that the woman is the main subject of the image, and the projector screen is the object of her attention. the image does not provide any information about the content of the presentation or the context of the meeting.
a bustling city street from the perspective of a car. the car, a sleek black sedan, is in motion, driving down the street. the dashboard of the car is visible in the foreground, providing a view of the road ahead. the street is lined with parked cars on both sides, their colors muted in the bright sunlight. buildings rise on either side of the street, their windows reflecting the sunlight. the sky above is a clear blue, and the sun is shining brightly, casting a warm glow on the scene. the street is busy with pedestrians and other vehicles, adding to the dynamic nature of the scene. the video does not contain any text. the relative positions of the objects suggest a typical city street scene with the car in the foreground, the parked cars on either side, and the buildings in the background. the sunlight illuminates the scene, highlighting the colors and details of the objects. the pedestrians and other vehicles are in motion, adding a sense of life and activity to the scene. the buildings provide a sense of depth and scale to the image. the video does not contain any text or countable objects. the
a serene scene in a park. the sun is shining brightly, casting a warm glow on the lush green trees and the grassy field. the camera is positioned low, looking up at the towering trees, which are the main focus of the image. the trees are dense and full of leaves, creating a canopy of green that fills the frame. the sunlight filters through the leaves, creating a beautiful pattern of light and shadow on the ground. the overall atmosphere of the video is peaceful and tranquil, evoking a sense of calm and relaxation.
a moment in a movie theater. a couple is seated in the middle of the theater, engrossed in the movie they are watching. the man is dressed in a casual outfit, complete with a pair of sunglasses, while the woman is wearing a cozy sweater. they are seated on a red theater seat, which stands out against the dark surroundings. the theater itself is dimly lit, with the screen displaying the movie they are watching. the couple appears to be enjoying the movie, their attention completely absorbed by the on-screen action. the theater is mostly empty, with only a few other seats visible in the background. the video does not contain any text or additional objects. the relative positions of the objects are such that the couple is in the foreground, while the screen and the other seats are in the background. the focus of the video is clearly on the couple and their shared experience of watching a movie in a theater.
a scene where a person is examining a dog. the person is wearing a blue shirt with the word "volunteer" printed on it. the dog is lying on its side, and the person is using a stethoscope to listen to the dog's heartbeat. the dog appears to be a golden retriever and is looking directly at the camera. the background is blurred, but it seems to be an indoor setting with a white wall. the person's focus is on the dog, and they seem to be checking its health. the dog's expression is calm, and it seems to be comfortable with the person's touch. the overall atmosphere of the video is calm and professional.
a close-up shot of a woman applying makeup. she is using a black brush to apply a dark powder to her face. the woman has blonde hair and is wearing a black top. the background is black, which contrasts with her skin tone and the makeup. the focus is on her face and the brush, with the rest of her body and the background being out of focus. the lighting is soft and even, highlighting the texture of the makeup and the woman's skin. there are no texts or other objects in the video. the woman's expression is neutral, and she is looking directly at the camera. the video does not contain any action, as it is a still shot of a woman applying makeup. the relative position of the woman and the brush is such that the brush is in her hand and is being used to apply the makeup to her face. the video does not contain any other objects or actions. the woman is the only person in the video, and she is the main subject. the video does not contain any sound. the description is based on the visible content of the video and does not include any assumptions or interpretations.
a young woman is seated in a black gaming chair in a room filled with computer monitors and other gaming equipment. she is wearing a red tank top and black pants, and her hair is styled in loose waves. the room is dimly lit, with the glow of the monitors casting a soft light on her face. she is holding a black game controller in her hands, and her attention is focused on the screen in front of her. the room is filled with other gaming equipment, including keyboards and mice, and there are other chairs and desks scattered around the room. the woman appears to be engrossed in her game, her posture relaxed yet focused. the room is quiet, the only sound coming from the beeps and boops of the game. the woman is the only person in the room, adding a sense of solitude to the scene. the video does not contain any text. the relative positions of the objects suggest a well-organized gaming setup, with the woman at the center, surrounded by her gaming equipment. the video does not contain any action, but the woman's focused expression suggests that she is in the middle of an intense g
a breathtaking aerial view of a coastal landscape at sunset. the sky, painted in hues of orange and pink, serves as a stunning backdrop to the scene. the sun, partially obscured by the horizon, casts a warm glow on the landscape below. the foreground of the image is dominated by a rocky cliff, its rugged surface adding a touch of raw beauty to the scene. the cliff's edge is adorned with patches of green vegetation, providing a stark contrast to the otherwise barren landscape. the middle ground of the image reveals a winding road that hugs the coastline. the road, appearing as a thin line against the vast expanse of the landscape, guides the viewer's eye towards the horizon. in the background, the silhouette of mountains can be seen, their peaks shrouded in a light mist. the mountains, along with the road, add depth to the image, creating a sense of distance and scale. overall, the video presents a serene and majestic coastal landscape, captured at the perfect moment of sunset. the colors
A close-up shot of a person eating noodles in a simple, well-lit kitchen. The person is seated at a wooden table with a bowl of steaming noodles in front of them, using chopsticks to lift the noodles to their mouth. The camera captures the motion of the noodles as they are lifted, with the focus on the person’s hand and the noodles. The background features a plain white wall and a few kitchen utensils hanging on the wall. The lighting is natural, coming from a window to the right, casting soft shadows. The overall atmosphere is warm and homey, emphasizing the simplicity and comfort of the meal.
A tender moment of a man and woman embracing in a quiet, softly lit room. The couple stands close, their arms wrapped around each other, with the man gently resting his chin on the woman’s shoulder. The background is blurred, highlighting the intimacy of their embrace, with warm, diffused lighting creating a cozy and serene atmosphere. The focus is on the expressions of comfort and connection between them, as they hold each other in a quiet moment of affection. There are no other objects or distractions in the frame, allowing the viewer to feel the depth of their bond.​⬤

+ 0
- 6
assets/texts/t2v_ref.txt View File

@@ -1,6 +0,0 @@
Drone view of waves crashing against the rugged cliffs along Big Sur’s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff’s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff's edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.
In an ornate, historical hall, a massive tidal wave peaks and begins to crash. Two surfers, seizing the moment, skillfully navigate the face of the wave.
Pirate ship in a cosmic maelstrom nebula.
Drone view of waves crashing against the rugged cliffs along Big Sur’s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff’s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff's edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.
A sad small cactus with in the Sahara desert becomes happy.
A car driving on a road in the middle of a desert.

+ 0
- 10
assets/texts/t2v_samples.txt View File

@@ -1,10 +0,0 @@
A soaring drone footage captures the majestic beauty of a coastal cliff, its red and yellow stratified rock faces rich in color and against the vibrant turquoise of the sea. Seabirds can be seen taking flight around the cliff's precipices. As the drone slowly moves from different angles, the changing sunlight casts shifting shadows that highlight the rugged textures of the cliff and the surrounding calm sea. The water gently laps at the rock base and the greenery that clings to the top of the cliff, and the scene gives a sense of peaceful isolation at the fringes of the ocean. The video captures the essence of pristine natural beauty untouched by human structures.
A majestic beauty of a waterfall cascading down a cliff into a serene lake. The waterfall, with its powerful flow, is the central focus of the video. The surrounding landscape is lush and green, with trees and foliage adding to the natural beauty of the scene. The camera angle provides a bird's eye view of the waterfall, allowing viewers to appreciate the full height and grandeur of the waterfall. The video is a stunning representation of nature's power and beauty.
A vibrant scene of a snowy mountain landscape. The sky is filled with a multitude of colorful hot air balloons, each floating at different heights, creating a dynamic and lively atmosphere. The balloons are scattered across the sky, some closer to the viewer, others further away, adding depth to the scene. Below, the mountainous terrain is blanketed in a thick layer of snow, with a few patches of bare earth visible here and there. The snow-covered mountains provide a stark contrast to the colorful balloons, enhancing the visual appeal of the scene. In the foreground, a few cars can be seen driving along a winding road that cuts through the mountains. The cars are small compared to the vastness of the landscape, emphasizing the grandeur of the surroundings. The overall style of the video is a mix of adventure and tranquility, with the hot air balloons adding a touch of whimsy to the otherwise serene mountain landscape. The video is likely shot during the day, as the lighting is bright and even, casting soft shadows on the snow-covered mountains.
The vibrant beauty of a sunflower field. The sunflowers, with their bright yellow petals and dark brown centers, are in full bloom, creating a stunning contrast against the green leaves and stems. The sunflowers are arranged in neat rows, creating a sense of order and symmetry. The sun is shining brightly, casting a warm glow on the flowers and highlighting their intricate details. The video is shot from a low angle, looking up at the sunflowers, which adds a sense of grandeur and awe to the scene. The sunflowers are the main focus of the video, with no other objects or people present. The video is a celebration of nature's beauty and the simple joy of a sunny day in the countryside.
A serene underwater scene featuring a sea turtle swimming through a coral reef. The turtle, with its greenish-brown shell, is the main focus of the video, swimming gracefully towards the right side of the frame. The coral reef, teeming with life, is visible in the background, providing a vibrant and colorful backdrop to the turtle's journey. Several small fish, darting around the turtle, add a sense of movement and dynamism to the scene. The video is shot from a slightly elevated angle, providing a comprehensive view of the turtle's surroundings. The overall style of the video is calm and peaceful, capturing the beauty and tranquility of the underwater world.
A vibrant underwater scene. A group of blue fish, with yellow fins, are swimming around a coral reef. The coral reef is a mix of brown and green, providing a natural habitat for the fish. The water is a deep blue, indicating a depth of around 30 feet. The fish are swimming in a circular pattern around the coral reef, indicating a sense of motion and activity. The overall scene is a beautiful representation of marine life.
A bustling city street at night, filled with the glow of car headlights and the ambient light of streetlights. The scene is a blur of motion, with cars speeding by and pedestrians navigating the crosswalks. The cityscape is a mix of towering buildings and illuminated signs, creating a vibrant and dynamic atmosphere. The perspective of the video is from a high angle, providing a bird's eye view of the street and its surroundings. The overall style of the video is dynamic and energetic, capturing the essence of urban life at night.
A snowy forest landscape with a dirt road running through it. The road is flanked by trees covered in snow, and the ground is also covered in snow. The sun is shining, creating a bright and serene atmosphere. The road appears to be empty, and there are no people or animals visible in the video. The style of the video is a natural landscape shot, with a focus on the beauty of the snowy forest and the peacefulness of the road.
The dynamic movement of tall, wispy grasses swaying in the wind. The sky above is filled with clouds, creating a dramatic backdrop. The sunlight pierces through the clouds, casting a warm glow on the scene. The grasses are a mix of green and brown, indicating a change in seasons. The overall style of the video is naturalistic, capturing the beauty of the landscape in a realistic manner. The focus is on the grasses and their movement, with the sky serving as a secondary element. The video does not contain any human or animal elements.
A serene night scene in a forested area. The first frame shows a tranquil lake reflecting the star-filled sky above. The second frame reveals a beautiful sunset, casting a warm glow over the landscape. The third frame showcases the night sky, filled with stars and a vibrant Milky Way galaxy. The video is a time-lapse, capturing the transition from day to night, with the lake and forest serving as a constant backdrop. The style of the video is naturalistic, emphasizing the beauty of the night sky and the peacefulness of the forest.

+ 0
- 20
assets/texts/t2v_short.txt View File

@@ -1,20 +0,0 @@
A fat rabbit wearing a purple robe walking through a fantasy landscape
Waves crashing against a lone lighthouse, ominous lighting
A mystical forest showcasing the adventures of travelers who enter
A blue-haired mage singing
A surreal landscape with floating islands and waterfalls in the sky craft
A blue bird standing in water
A young man walks alone by the seaside
Pink rose on a glass surface with droplets, close-up
Drove viewpoint, a subway train coming out of a tunnel
Space with all planets green and pink color with background of bright white stars
A city floating in an astral space, with stars and nebulae
Sunrise on top of a high-rise building
Pink and cyan powder explosions
Deers in the woods gaze into the camera under the sunlight
In a flash of lightning, a wizard appeared from thin air, his long robes billowing in the wind
A futuristic cyberpunk cityscape at night with towering neon-lit skyscrapers
A scene where the trees, flowers, and animals come together to create a symphony of nature
A ghostly ship sailing through the clouds, navigating through a sea under a moonlit sky
A sunset with beautiful beach
A young man walking alone in the forest

+ 0
- 48
assets/texts/t2v_sora.txt View File

@@ -1,48 +0,0 @@
A stylish woman walks down a Tokyo street filled with warm glowing neon and animated city signage. She wears a black leather jacket, a long red dress, and black boots, and carries a black purse. She wears sunglasses and red lipstick. She walks confidently and casually. The street is damp and reflective, creating a mirror effect of the colorful lights. Many pedestrians walk about.
Several giant wooly mammoths approach treading through a snowy meadow, their long wooly fur lightly blows in the wind as they walk, snow covered trees and dramatic snow capped mountains in the distance, mid afternoon light with wispy clouds and a sun high in the distance creates a warm glow, the low camera view is stunning capturing the large furry mammal with beautiful photography, depth of field.
A movie trailer featuring the adventures of the 30 year old space man wearing a red wool knitted motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors.
Drone view of waves crashing against the rugged cliffs along Big Sur’s garay point beach. The crashing blue waters create white-tipped waves, while the golden light of the setting sun illuminates the rocky shore. A small island with a lighthouse sits in the distance, and green shrubbery covers the cliff’s edge. The steep drop from the road down to the beach is a dramatic feat, with the cliff’s edges jutting out over the sea. This is a view that captures the raw beauty of the coast and the rugged landscape of the Pacific Coast Highway.
Animated scene features a close-up of a short fluffy monster kneeling beside a melting red candle. The art style is 3D and realistic, with a focus on lighting and texture. The mood of the painting is one of wonder and curiosity, as the monster gazes at the flame with wide eyes and open mouth. Its pose and expression convey a sense of innocence and playfulness, as if it is exploring the world around it for the first time. The use of warm colors and dramatic lighting further enhances the cozy atmosphere of the image.
A gorgeously rendered papercraft world of a coral reef, rife with colorful fish and sea creatures.
This close-up shot of a Victoria crowned pigeon showcases its striking blue plumage and red chest. Its crest is made of delicate, lacy feathers, while its eye is a striking red color. The bird’s head is tilted slightly to the side, giving the impression of it looking regal and majestic. The background is blurred, drawing attention to the bird’s striking appearance.
Photorealistic closeup video of two pirate ships battling each other as they sail inside a cup of coffee.
A young man at his 20s is sitting on a piece of cloud in the sky, reading a book.
Historical footage of California during the gold rush.
A close up view of a glass sphere that has a zen garden within it. There is a small dwarf in the sphere who is raking the zen garden and creating patterns in the sand.
Extreme close up of a 24 year old woman’s eye blinking, standing in Marrakech during magic hour, cinematic film shot in 70mm, depth of field, vivid colors, cinematic
A cartoon kangaroo disco dances.
A beautiful homemade video showing the people of Lagos, Nigeria in the year 2056. Shot with a mobile phone camera.
A petri dish with a bamboo forest growing within it that has tiny red pandas running around.
The camera rotates around a large stack of vintage televisions all showing different programs — 1950s sci-fi movies, horror movies, news, static, a 1970s sitcom, etc, set inside a large New York museum gallery.
3D animation of a small, round, fluffy creature with big, expressive eyes explores a vibrant, enchanted forest. The creature, a whimsical blend of a rabbit and a squirrel, has soft blue fur and a bushy, striped tail. It hops along a sparkling stream, its eyes wide with wonder. The forest is alive with magical elements: flowers that glow and change colors, trees with leaves in shades of purple and silver, and small floating lights that resemble fireflies. The creature stops to interact playfully with a group of tiny, fairy-like beings dancing around a mushroom ring. The creature looks up in awe at a large, glowing tree that seems to be the heart of the forest.
The camera follows behind a white vintage SUV with a black roof rack as it speeds up a steep dirt road surrounded by pine trees on a steep mountain slope, dust kicks up from it’s tires, the sunlight shines on the SUV as it speeds along the dirt road, casting a warm glow over the scene. The dirt road curves gently into the distance, with no other cars or vehicles in sight. The trees on either side of the road are redwoods, with patches of greenery scattered throughout. The car is seen from the rear following the curve with ease, making it seem as if it is on a rugged drive through the rugged terrain. The dirt road itself is surrounded by steep hills and mountains, with a clear blue sky above with wispy clouds.
Reflections in the window of a train traveling through the Tokyo suburbs.
A drone camera circles around a beautiful historic church built on a rocky outcropping along the Amalfi Coast, the view showcases historic and magnificent architectural details and tiered pathways and patios, waves are seen crashing against the rocks below as the view overlooks the horizon of the coastal waters and hilly landscapes of the Amalfi Coast Italy, several distant people are seen walking and enjoying vistas on patios of the dramatic ocean views, the warm glow of the afternoon sun creates a magical and romantic feeling to the scene, the view is stunning captured with beautiful photography.
A large orange octopus is seen resting on the bottom of the ocean floor, blending in with the sandy and rocky terrain. Its tentacles are spread out around its body, and its eyes are closed. The octopus is unaware of a king crab that is crawling towards it from behind a rock, its claws raised and ready to attack. The crab is brown and spiny, with long legs and antennae. The scene is captured from a wide angle, showing the vastness and depth of the ocean. The water is clear and blue, with rays of sunlight filtering through. The shot is sharp and crisp, with a high dynamic range. The octopus and the crab are in focus, while the background is slightly blurred, creating a depth of field effect.
A flock of paper airplanes flutters through a dense jungle, weaving around trees as if they were migrating birds.
A cat waking up its sleeping owner demanding breakfast. The owner tries to ignore the cat, but the cat tries new tactics and finally the owner pulls out a secret stash of treats from under the pillow to hold the cat off a little longer.
Borneo wildlife on the Kinabatangan River
A Chinese Lunar New Year celebration video with Chinese Dragon.
Tour of an art gallery with many beautiful works of art in different styles.
Beautiful, snowy Tokyo city is bustling. The camera moves through the bustling city street, following several people enjoying the beautiful snowy weather and shopping at nearby stalls. Gorgeous sakura petals are flying through the wind along with snowflakes.
A stop motion animation of a flower growing out of the windowsill of a suburban house.
The story of a robot’s life in a cyberpunk setting.
An extreme close-up of an gray-haired man with a beard in his 60s, he is deep in thought pondering the history of the universe as he sits at a cafe in Paris, his eyes focus on people offscreen as they walk as he sits mostly motionless, he is dressed in a wool coat suit coat with a button-down shirt , he wears a brown beret and glasses and has a very professorial appearance, and the end he offers a subtle closed-mouth smile as if he found the answer to the mystery of life, the lighting is very cinematic with the golden light and the Parisian streets and city in the background, depth of field, cinematic 35mm film.
A beautiful silhouette animation shows a wolf howling at the moon, feeling lonely, until it finds its pack.
New York City submerged like Atlantis. Fish, whales, sea turtles and sharks swim through the streets of New York.
A litter of golden retriever puppies playing in the snow. Their heads pop out of the snow, covered in.
Step-printing scene of a person running, cinematic film shot in 35mm.
Five gray wolf pups frolicking and chasing each other around a remote gravel road, surrounded by grass. The pups run and leap, chasing each other, and nipping at each other, playing.
Basketball through hoop then explodes.
Archeologists discover a generic plastic chair in the desert, excavating and dusting it with great care.
A grandmother with neatly combed grey hair stands behind a colorful birthday cake with numerous candles at a wood dining room table, expression is one of pure joy and happiness, with a happy glow in her eye. She leans forward and blows out the candles with a gentle puff, the cake has pink frosting and sprinkles and the candles cease to flicker, the grandmother wears a light blue blouse adorned with floral patterns, several happy friends and family sitting at the table can be seen celebrating, out of focus. The scene is beautifully captured, cinematic, showing a 3/4 view of the grandmother and the dining room. Warm color tones and soft lighting enhance the mood.
The camera directly faces colorful buildings in Burano Italy. An adorable dalmation looks through a window on a building on the ground floor. Many people are walking and cycling along the canal streets in front of the buildings.
An adorable happy otter confidently stands on a surfboard wearing a yellow lifejacket, riding along turquoise tropical waters near lush tropical islands, 3D digital render art style.
This close-up shot of a chameleon showcases its striking color changing capabilities. The background is blurred, drawing attention to the animal’s striking appearance.
A corgi vlogging itself in tropical Maui.
A white and orange tabby cat is seen happily darting through a dense garden, as if chasing something. Its eyes are wide and happy as it jogs forward, scanning the branches, flowers, and leaves as it walks. The path is narrow as it makes its way between all the plants. the scene is captured from a ground-level angle, following the cat closely, giving a low and intimate perspective. The image is cinematic with warm tones and a grainy texture. The scattered daylight between the leaves and plants above creates a warm contrast, accentuating the cat’s orange fur. The shot is clear and sharp, with a shallow depth of field.
Aerial view of Santorini during the blue hour, showcasing the stunning architecture of white Cycladic buildings with blue domes. The caldera views are breathtaking, and the lighting creates a beautiful, serene atmosphere.
Tiltshift of a construction site filled with workers, equipment, and heavy machinery.
A giant, towering cloud in the shape of a man looms over the earth. The cloud man shoots lighting bolts down to the earth.
A Samoyed and a Golden Retriever dog are playfully romping through a futuristic neon city at night. The neon lights emitted from the nearby buildings glistens off of their fur.
The Glenfinnan Viaduct is a historic railway bridge in Scotland, UK, that crosses over the west highland line between the towns of Mallaig and Fort William. It is a stunning sight as a steam train leaves the bridge, traveling over the arch-covered viaduct. The landscape is dotted with lush greenery and rocky mountains, creating a picturesque backdrop for the train journey. The sky is blue and the sun is shining, making for a beautiful day to explore this majestic spot.

+ 0
- 6
assets/texts/ucf101_id.txt View File

@@ -1,6 +0,0 @@
0
1
2
3
4
5

+ 0
- 6
assets/texts/ucf101_labels.txt View File

@@ -1,6 +0,0 @@
Apply Eye Makeup
Apply Lipstick
Archery
Baby Crawling
Balance Beam
Band Marching

+ 76
- 0
configs/diffusion/inference/256px.py View File

@@ -0,0 +1,76 @@
save_dir = "samples" # save directory
seed = 42 # random seed (except seed for z)
batch_size = 1
dtype = "bf16"

cond_type = "t2v"
# conditional inference options:
# t2v: text-to-video
# i2v_head: image-to-video (head)
# i2v_tail: image-to-video (tail)
# i2v_loop: connect images
# v2v_head_half: video extension with first half
# v2v_tail_half: video extension with second half

dataset = dict(type="text")
sampling_option = dict(
resolution="256px", # 256px or 768px
aspect_ratio="16:9", # 9:16 or 16:9 or 1:1
num_frames=129, # number of frames
num_steps=50, # number of steps
shift=True,
temporal_reduction=4,
is_causal_vae=True,
guidance=7.5, # guidance for text-to-video
guidance_img=3.0, # guidance for image-to-video
text_osci=True, # enable text guidance oscillation
image_osci=True, # enable image guidance oscillation
scale_temporal_osci=True,
method="i2v", # hard-coded for now
seed=None, # random seed for z
)
motion_score = "4" # motion score for video generation
fps_save = 24 # fps for video generation and saving

# Define model components
model = dict(
type="flux",
from_pretrained="./ckpts/Open_Sora_v2.safetensors",
guidance_embed=False,
fused_qkv=False,
use_liger_rope=True,
# model architecture
in_channels=64,
vec_in_dim=768,
context_in_dim=4096,
hidden_size=3072,
mlp_ratio=4.0,
num_heads=24,
depth=19,
depth_single_blocks=38,
axes_dim=[16, 56, 56],
theta=10_000,
qkv_bias=True,
cond_embed=True,
)
ae = dict(
type="hunyuan_vae",
from_pretrained="./ckpts/hunyuan_vae.safetensors",
in_channels=3,
out_channels=3,
layers_per_block=2,
latent_channels=16,
use_spatial_tiling=True,
use_temporal_tiling=False,
)
t5 = dict(
type="text_embedder",
from_pretrained="./ckpts/google/t5-v1_1-xxl",
max_length=512,
shardformer=True,
)
clip = dict(
type="text_embedder",
from_pretrained="./ckpts/openai/clip-vit-large-patch14",
max_length=77,
)

+ 4
- 0
configs/diffusion/inference/256px_tp.py View File

@@ -0,0 +1,4 @@
_base_ = [ # inherit grammer from mmengine
"256px.py",
"plugins/tp.py", # use tensor parallel
]

+ 8
- 0
configs/diffusion/inference/768px.py View File

@@ -0,0 +1,8 @@
_base_ = [ # inherit grammer from mmengine
"256px.py",
"plugins/sp.py", # use sequence parallel
]

sampling_option = dict(
resolution="768px",
)

+ 35
- 0
configs/diffusion/inference/high_compression.py View File

@@ -0,0 +1,35 @@
_base_ = ["t2i2v_768px.py"]

# no need for parallelism
plugin = None
plugin_config = None
plugin_ae = None
plugin_config_ae = None

# model settings
patch_size = 1
model = dict(
from_pretrained="./ckpts/Open_Sora_v2_Video_DC_AE.safetensors",
in_channels=128,
cond_embed=True,
patch_size=1,
)

# AE settings
ae = dict(
_delete_=True,
type="dc_ae",
from_scratch=True,
model_name="dc-ae-f32t4c128",
from_pretrained="./ckpts/F32T4C128_AE.safetensors",
use_spatial_tiling=True,
use_temporal_tiling=True,
spatial_tile_size=256,
temporal_tile_size=32,
tile_overlap_factor=0.25,
)
ae_spatial_compression = 32

sampling_option = dict(
num_frames=128,
)

+ 20
- 0
configs/diffusion/inference/plugins/sp.py View File

@@ -0,0 +1,20 @@
plugin = "hybrid"
plugin_config = dict(
tp_size=1,
pp_size=1,
sp_size=8,
sequence_parallelism_mode="ring_attn",
enable_sequence_parallelism=True,
static_graph=True,
zero_stage=2,
overlap_allgather=False,
)

plugin_ae = "hybrid"
plugin_config_ae = dict(
tp_size=8,
pp_size=1,
sp_size=1,
zero_stage=2,
overlap_allgather=False,
)

+ 36
- 0
configs/diffusion/inference/plugins/t2i2v.py View File

@@ -0,0 +1,36 @@
use_t2i2v = True

# flux configurations
img_flux = dict(
type="flux",
from_pretrained="./ckpts/flux1-dev.safetensors",
guidance_embed=True,
# model architecture
in_channels=64,
vec_in_dim=768,
context_in_dim=4096,
hidden_size=3072,
mlp_ratio=4.0,
num_heads=24,
depth=19,
depth_single_blocks=38,
axes_dim=[16, 56, 56],
theta=10_000,
qkv_bias=True,
cond_embed=False, # pass i2v & v2v info, for t2v need this layer too but with x_cond and mask all set to 0
)

img_flux_ae = dict(
type="autoencoder_2d",
from_pretrained="./ckpts/flux1-dev-ae.safetensors",
resolution=256,
in_channels=3,
ch=128,
out_ch=3,
ch_mult=[1, 2, 4, 4],
num_res_blocks=2,
z_channels=16,
scale_factor=0.3611,
shift_factor=0.1159,
)
img_resolution = "768px"

+ 17
- 0
configs/diffusion/inference/plugins/tp.py View File

@@ -0,0 +1,17 @@
plugin = "hybrid"
plugin_config = dict(
tp_size=8,
pp_size=1,
sp_size=1,
zero_stage=2,
overlap_allgather=False,
)

plugin_ae = "hybrid"
plugin_config_ae = dict(
tp_size=8,
pp_size=1,
sp_size=1,
zero_stage=2,
overlap_allgather=False,
)

+ 4
- 0
configs/diffusion/inference/t2i2v_256px.py View File

@@ -0,0 +1,4 @@
_base_ = [ # inherit grammer from mmengine
"256px.py",
"plugins/t2i2v.py",
]

+ 4
- 0
configs/diffusion/inference/t2i2v_768px.py View File

@@ -0,0 +1,4 @@
_base_ = [ # inherit grammer from mmengine
"768px.py",
"plugins/t2i2v.py",
]

+ 12
- 0
configs/diffusion/train/demo.py View File

@@ -0,0 +1,12 @@
_base_ = ["stage1.py"]


bucket_config = {
"_delete_": True,
"256px": {
1: (1.0, 1),
33: (1.0, 1),
97: (1.0, 1),
129: (1.0, 1),
},
}

+ 71
- 0
configs/diffusion/train/high_compression.py View File

@@ -0,0 +1,71 @@
_base_ = ["image.py"]

bucket_config = {
"_delete_": True,
"768px": {
1: (1.0, 20),
16: (1.0, 8),
20: (1.0, 8),
24: (1.0, 8),
28: (1.0, 8),
32: (1.0, 8),
36: (1.0, 4),
40: (1.0, 4),
44: (1.0, 4),
48: (1.0, 4),
52: (1.0, 4),
56: (1.0, 4),
60: (1.0, 4),
64: (1.0, 4),
68: (1.0, 3),
72: (1.0, 3),
76: (1.0, 3),
80: (1.0, 3),
84: (1.0, 3),
88: (1.0, 3),
92: (1.0, 3),
96: (1.0, 3),
100: (1.0, 2),
104: (1.0, 2),
108: (1.0, 2),
112: (1.0, 2),
116: (1.0, 2),
120: (1.0, 2),
124: (1.0, 2),
128: (1.0, 2), # 30s
},
}

condition_config = dict(
t2v=1,
i2v_head=7,
)

grad_ckpt_settings = (100, 100)
patch_size = 1
model = dict(
from_pretrained=None,
grad_ckpt_settings=grad_ckpt_settings,
in_channels=128,
cond_embed=True,
patch_size=patch_size,
)
ae = dict(
_delete_=True,
type="dc_ae",
model_name="dc-ae-f32t4c128",
from_pretrained="./ckpts/F32T4C128_AE.safetensors",
from_scratch=True,
scaling_factor=0.493,
use_spatial_tiling=True,
use_temporal_tiling=True,
spatial_tile_size=256,
temporal_tile_size=32,
tile_overlap_factor=0.25,
)
is_causal_vae = False
ae_spatial_compression = 32

ckpt_every = 250
lr = 3e-5
optim = dict(lr=lr)

+ 114
- 0
configs/diffusion/train/image.py View File

@@ -0,0 +1,114 @@
# Dataset settings
dataset = dict(
type="video_text",
transform_name="resize_crop",
fps_max=24, # the desired fps for training
vmaf=True, # load vmaf scores into text
)

grad_ckpt_settings = (8, 100) # set the grad checkpoint settings
bucket_config = {
"256px": {1: (1.0, 50)},
"768px": {1: (0.5, 11)},
"1024px": {1: (0.5, 7)},
}

# Define model components
model = dict(
type="flux",
from_pretrained=None,
strict_load=False,
guidance_embed=False,
fused_qkv=False,
use_liger_rope=True,
grad_ckpt_settings=grad_ckpt_settings,
# model architecture
in_channels=64,
vec_in_dim=768,
context_in_dim=4096,
hidden_size=3072,
mlp_ratio=4.0,
num_heads=24,
depth=19,
depth_single_blocks=38,
axes_dim=[16, 56, 56],
theta=10_000,
qkv_bias=True,
)
dropout_ratio = { # probability for dropout text embedding
"t5": 0.31622777,
"clip": 0.31622777,
}
ae = dict(
type="hunyuan_vae",
from_pretrained="./ckpts/hunyuan_vae.safetensors",
in_channels=3,
out_channels=3,
layers_per_block=2,
latent_channels=16,
use_spatial_tiling=True,
use_temporal_tiling=False,
)
is_causal_vae = True
t5 = dict(
type="text_embedder",
from_pretrained="google/t5-v1_1-xxl",
cache_dir="/mnt/ddn/sora/tmp_load/huggingface/hub/",
max_length=512,
shardformer=True,
)
clip = dict(
type="text_embedder",
from_pretrained="openai/clip-vit-large-patch14",
cache_dir="/mnt/ddn/sora/tmp_load/huggingface/hub/",
max_length=77,
)

# Optimization settings
lr = 1e-5
eps = 1e-15
optim = dict(
cls="HybridAdam",
lr=lr,
eps=eps,
weight_decay=0.0,
adamw_mode=True,
)
warmup_steps = 0
update_warmup_steps = True

grad_clip = 1.0
accumulation_steps = 1
ema_decay = None

# Acceleration settings
prefetch_factor = 2
num_workers = 12
num_bucket_build_workers = 64
dtype = "bf16"
plugin = "zero2"
grad_checkpoint = True
plugin_config = dict(
reduce_bucket_size_in_m=128,
overlap_allgather=False,
)
pin_memory_cache_pre_alloc_numels = [(260 + 20) * 1024 * 1024] * 24 + [
(34 + 20) * 1024 * 1024
] * 4
async_io = False

# Other settings
seed = 42
outputs = "outputs"
epochs = 1000
log_every = 10
ckpt_every = 100
keep_n_latest = 20
wandb_project = "mmdit"

save_master_weights = True
load_master_weights = True

# For debugging
# record_time = True
# record_barrier = True

+ 56
- 0
configs/diffusion/train/stage1.py View File

@@ -0,0 +1,56 @@
_base_ = ["image.py"]

dataset = dict(memory_efficient=False)

# new config
grad_ckpt_settings = (8, 100)
bucket_config = {
"_delete_": True,
"256px": {
1: (1.0, 45),
5: (1.0, 12),
9: (1.0, 12),
13: (1.0, 12),
17: (1.0, 12),
21: (1.0, 12),
25: (1.0, 12),
29: (1.0, 12),
33: (1.0, 12),
37: (1.0, 6),
41: (1.0, 6),
45: (1.0, 6),
49: (1.0, 6),
53: (1.0, 6),
57: (1.0, 6),
61: (1.0, 6),
65: (1.0, 6),
69: (1.0, 4),
73: (1.0, 4),
77: (1.0, 4),
81: (1.0, 4),
85: (1.0, 4),
89: (1.0, 4),
93: (1.0, 4),
97: (1.0, 4),
101: (1.0, 3),
105: (1.0, 3),
109: (1.0, 3),
113: (1.0, 3),
117: (1.0, 3),
121: (1.0, 3),
125: (1.0, 3),
129: (1.0, 3),
},
"768px": {
1: (0.5, 13),
},
"1024px": {
1: (0.5, 7),
},
}

model = dict(grad_ckpt_settings=grad_ckpt_settings)
lr = 5e-5
optim = dict(lr=lr)
ckpt_every = 2000
keep_n_latest = 20

+ 14
- 0
configs/diffusion/train/stage1_i2v.py View File

@@ -0,0 +1,14 @@
_base_ = ["stage1.py"]

# Define model components
model = dict(cond_embed=True)

condition_config = dict(
t2v=1,
i2v_head=5, # train i2v (image as first frame) with weight 5
i2v_loop=1, # train image connection with weight 1
i2v_tail=1, # train i2v (image as last frame) with weight 1
)

lr = 1e-5
optim = dict(lr=lr)

+ 94
- 0
configs/diffusion/train/stage2.py View File

@@ -0,0 +1,94 @@
_base_ = ["image.py"]

# new config
grad_ckpt_settings = (100, 100)

plugin = "hybrid"
plugin_config = dict(
tp_size=1,
pp_size=1,
sp_size=4,
sequence_parallelism_mode="ring_attn",
enable_sequence_parallelism=True,
static_graph=True,
zero_stage=2,
)

bucket_config = {
"_delete_": True,
"256px": {
1: (1.0, 130),
5: (1.0, 14),
9: (1.0, 14),
13: (1.0, 14),
17: (1.0, 14),
21: (1.0, 14),
25: (1.0, 14),
29: (1.0, 14),
33: (1.0, 14),
37: (1.0, 10),
41: (1.0, 10),
45: (1.0, 10),
49: (1.0, 10),
53: (1.0, 10),
57: (1.0, 10),
61: (1.0, 10),
65: (1.0, 10),
73: (1.0, 7),
77: (1.0, 7),
81: (1.0, 7),
85: (1.0, 7),
89: (1.0, 7),
93: (1.0, 7),
97: (1.0, 7),
101: (1.0, 6),
105: (1.0, 6),
109: (1.0, 6),
113: (1.0, 6),
117: (1.0, 6),
121: (1.0, 6),
125: (1.0, 6),
129: (1.0, 6),
},
"768px": {
1: (1.0, 38),
5: (1.0, 6),
9: (1.0, 6),
13: (1.0, 6),
17: (1.0, 6),
21: (1.0, 6),
25: (1.0, 6),
29: (1.0, 6),
33: (1.0, 6),
37: (1.0, 4),
41: (1.0, 4),
45: (1.0, 4),
49: (1.0, 4),
53: (1.0, 4),
57: (1.0, 4),
61: (1.0, 4),
65: (1.0, 4),
69: (1.0, 3),
73: (1.0, 3),
77: (1.0, 3),
81: (1.0, 3),
85: (1.0, 3),
89: (1.0, 3),
93: (1.0, 3),
97: (1.0, 3),
101: (1.0, 2),
105: (1.0, 2),
109: (1.0, 2),
113: (1.0, 2),
117: (1.0, 2),
121: (1.0, 2),
125: (1.0, 2),
129: (1.0, 2),
},
}

model = dict(grad_ckpt_settings=grad_ckpt_settings)
lr = 5e-5
optim = dict(lr=lr)
ckpt_every = 200
keep_n_latest = 20

+ 87
- 0
configs/diffusion/train/stage2_i2v.py View File

@@ -0,0 +1,87 @@
_base_ = ["stage2.py"]

# Define model components
model = dict(cond_embed=True)
grad_ckpt_buffer_size = 25 * 1024**3

condition_config = dict(
t2v=1,
i2v_head=5,
i2v_loop=1,
i2v_tail=1,
)
is_causal_vae = True

bucket_config = {
"_delete_": True,
"256px": {
1: (1.0, 195),
5: (1.0, 80),
9: (1.0, 80),
13: (1.0, 80),
17: (1.0, 80),
21: (1.0, 80),
25: (1.0, 80),
29: (1.0, 80),
33: (1.0, 80),
37: (1.0, 40),
41: (1.0, 40),
45: (1.0, 40),
49: (1.0, 40),
53: (1.0, 40),
57: (1.0, 40),
61: (1.0, 40),
65: (1.0, 40),
69: (1.0, 28),
73: (1.0, 28),
77: (1.0, 28),
81: (1.0, 28),
85: (1.0, 28),
89: (1.0, 28),
93: (1.0, 28),
97: (1.0, 28),
101: (1.0, 23),
105: (1.0, 23),
109: (1.0, 23),
113: (1.0, 23),
117: (1.0, 23),
121: (1.0, 23),
125: (1.0, 23),
129: (1.0, 23),
},
"768px": {
1: (0.5, 38),
5: (0.5, 10),
9: (0.5, 10),
13: (0.5, 10),
17: (0.5, 10),
21: (0.5, 10),
25: (0.5, 10),
29: (0.5, 10),
33: (0.5, 10),
37: (0.5, 5),
41: (0.5, 5),
45: (0.5, 5),
49: (0.5, 5),
53: (0.5, 5),
57: (0.5, 5),
61: (0.5, 5),
65: (0.5, 5),
69: (0.5, 3),
73: (0.5, 3),
77: (0.5, 3),
81: (0.5, 3),
85: (0.5, 3),
89: (0.5, 3),
93: (0.5, 3),
97: (0.5, 3),
101: (0.5, 2),
105: (0.5, 2),
109: (0.5, 2),
113: (0.5, 2),
117: (0.5, 2),
121: (0.5, 2),
125: (0.5, 2),
129: (0.5, 2),
},
}

+ 0
- 31
configs/dit/inference/16x256x256.py View File

@@ -1,31 +0,0 @@
num_frames = 16
fps = 8
image_size = (256, 256)

# Define model
model = dict(
type="DiT-XL/2",
condition="text",
from_pretrained="PRETRAINED_MODEL",
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="clip",
from_pretrained="openai/clip-vit-base-patch32",
model_max_length=77,
)
scheduler = dict(
type="dpm-solver",
num_sampling_steps=20,
cfg_scale=4.0,
)
dtype = "bf16"

# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/ucf101_labels.txt"
save_dir = "./samples/samples/"

+ 0
- 31
configs/dit/inference/1x256x256-class.py View File

@@ -1,31 +0,0 @@
num_frames = 1
fps = 1
image_size = (256, 256)

# Define model
model = dict(
type="DiT-XL/2",
no_temporal_pos_emb=True,
condition="label_1000",
from_pretrained="DiT-XL-2-256x256.pt",
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="classes",
num_classes=1000,
)
scheduler = dict(
type="dpm-solver",
num_sampling_steps=20,
cfg_scale=4.0,
)
dtype = "bf16"

# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/imagenet_id.txt"
save_dir = "./samples/samples/"

+ 0
- 32
configs/dit/inference/1x256x256.py View File

@@ -1,32 +0,0 @@
num_frames = 1
fps = 1
image_size = (256, 256)

# Define model
model = dict(
type="DiT-XL/2",
no_temporal_pos_emb=True,
condition="text",
from_pretrained="PRETRAINED_MODEL",
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="clip",
from_pretrained="openai/clip-vit-base-patch32",
model_max_length=77,
)
scheduler = dict(
type="dpm-solver",
num_sampling_steps=20,
cfg_scale=4.0,
)
dtype = "bf16"

# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/imagenet_labels.txt"
save_dir = "./samples/samples/"

+ 0
- 50
configs/dit/train/16x256x256.py View File

@@ -1,50 +0,0 @@
# Define dataset
dataset = dict(
type="VideoTextDataset",
data_path=None,
num_frames=16,
frame_interval=3,
image_size=(256, 256),
)

# Define acceleration
num_workers = 4
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="DiT-XL/2",
from_pretrained="DiT-XL-2-256x256.pt",
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="clip",
from_pretrained="openai/clip-vit-base-patch32",
model_max_length=77,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 1000
load = None

batch_size = 8
lr = 2e-5
grad_clip = 1.0

+ 0
- 51
configs/dit/train/1x256x256.py View File

@@ -1,51 +0,0 @@
# Define dataset
dataset = dict(
type="VideoTextDataset",
data_path=None,
num_frames=1,
frame_interval=1,
image_size=(256, 256),
transform_name="center",
)

# Define acceleration
num_workers = 4
dtype = "bf16"
grad_checkpoint = False
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="DiT-XL/2",
no_temporal_pos_emb=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="clip",
from_pretrained="openai/clip-vit-base-patch32",
model_max_length=77,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 1000
load = None

batch_size = 128
lr = 1e-4 # according to DiT repo
grad_clip = 1.0

+ 0
- 30
configs/latte/inference/16x256x256-class.py View File

@@ -1,30 +0,0 @@
num_frames = 16
fps = 8
image_size = (256, 256)

# Define model
model = dict(
type="Latte-XL/2",
condition="label_101",
from_pretrained="Latte-XL-2-256x256-ucf101.pt",
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="classes",
num_classes=101,
)
scheduler = dict(
type="dpm-solver",
num_sampling_steps=20,
cfg_scale=4.0,
)
dtype = "bf16"

# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/ucf101_id.txt"
save_dir = "./samples/samples/"

+ 0
- 31
configs/latte/inference/16x256x256.py View File

@@ -1,31 +0,0 @@
num_frames = 16
fps = 8
image_size = (256, 256)

# Define model
model = dict(
type="Latte-XL/2",
condition="text",
from_pretrained="PRETRAINED_MODEL",
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="clip",
from_pretrained="openai/clip-vit-base-patch32",
model_max_length=77,
)
scheduler = dict(
type="dpm-solver",
num_sampling_steps=20,
cfg_scale=4.0,
)
dtype = "bf16"

# Others
batch_size = 2
seed = 42
prompt_path = "./assets/texts/ucf101_labels.txt"
save_dir = "./samples/samples/"

+ 0
- 49
configs/latte/train/16x256x256.py View File

@@ -1,49 +0,0 @@
# Define dataset
dataset = dict(
type="VideoTextDataset",
data_path=None,
num_frames=16,
frame_interval=3,
image_size=(256, 256),
)

# Define acceleration
num_workers = 4
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="Latte-XL/2",
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
)
text_encoder = dict(
type="clip",
from_pretrained="openai/clip-vit-base-patch32",
model_max_length=77,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 1000
load = None

batch_size = 8
lr = 2e-5
grad_clip = 1.0

+ 0
- 64
configs/opensora-v1-1/inference/sample-ref.py View File

@@ -1,64 +0,0 @@
num_frames = 16
frame_interval = 3
fps = 24
image_size = (240, 426)
multi_resolution = "STDiT2"

# Condition
prompt_path = None
prompt = [
'Drone view of waves crashing against the rugged cliffs along Big Sur\'s garay point beach. {"reference_path": "https://github.com/hpcaitech/Open-Sora-Demo/blob/main/images/condition/cliff.png", "mask_strategy": "0"}',
'A breathtaking sunrise scene.{"reference_path": "https://github.com/hpcaitech/Open-Sora-Demo/blob/main/images/condition/sunset1.png","mask_strategy": "0"}',
'A car driving on the ocean.{"reference_path": "https://cdn.openai.com/tmp/s/interp/d0.mp4","mask_strategy": "0,0,-8,0,8"}',
'A snowy forest.{"reference_path": "https://cdn.pixabay.com/video/2021/04/25/72171-542991404_large.mp4","mask_strategy": "0,0,0,0,15,0.8"}',
'A breathtaking sunrise scene.{"reference_path": "https://github.com/hpcaitech/Open-Sora-Demo/blob/main/images/condition/sunset1.png;https://github.com/hpcaitech/Open-Sora-Demo/blob/main/images/condition/sunset2.png","mask_strategy": "0;0,1,0,-1,1"}',
'|0|a white jeep equipped with a roof rack driving on a dirt road in a coniferous forest.|2|a white jeep equipped with a roof rack driving on a dirt road in the desert.|4|a white jeep equipped with a roof rack driving on a dirt road in a mountain.|6|A white jeep equipped with a roof rack driving on a dirt road in a city.|8|a white jeep equipped with a roof rack driving on a dirt road on the surface of a river.|10|a white jeep equipped with a roof rack driving on a dirt road under the lake.|12|a white jeep equipped with a roof rack flying into the sky.|14|a white jeep equipped with a roof rack driving in the universe. Earth is the background.{"reference_path": "https://cdn.openai.com/tmp/s/interp/d0.mp4", "mask_strategy": "0,0,0,0,15"}',
]

loop = 2
condition_frame_length = 4
# (
# loop id, [the loop index of the condition image or video]
# reference id, [the index of the condition image or video in the reference_path]
# reference start, [the start frame of the condition image or video]
# target start, [the location to insert]
# length, [the number of frames to insert]
# edit_ratio [the edit rate of the condition image or video]
# )
# See https://github.com/hpcaitech/Open-Sora/blob/main/docs/config.md#advanced-inference-config for more details
# See https://github.com/hpcaitech/Open-Sora/blob/main/docs/commands.md#inference-with-open-sora-11 for more examples

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained="hpcai-tech/OpenSora-STDiT-v2-stage3",
input_sq_size=512,
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
cache_dir=None, # "/mnt/hdd/cached_models",
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
cache_dir=None, # "/mnt/hdd/cached_models",
model_max_length=200,
)
scheduler = dict(
type="iddpm",
num_sampling_steps=100,
cfg_scale=7.0,
cfg_channel=3, # or None
)
dtype = "bf16"

# Others
batch_size = 1
seed = 42
save_dir = "./samples/samples/"

+ 0
- 44
configs/opensora-v1-1/inference/sample.py View File

@@ -1,44 +0,0 @@
num_frames = 16
frame_interval = 3
fps = 24
image_size = (240, 426)
multi_resolution = "STDiT2"

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained="hpcai-tech/OpenSora-STDiT-v2-stage3",
input_sq_size=512,
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
cache_dir=None, # "/mnt/hdd/cached_models",
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
cache_dir=None, # "/mnt/hdd/cached_models",
model_max_length=200,
)
scheduler = dict(
type="iddpm",
num_sampling_steps=100,
cfg_scale=7.0,
cfg_channel=3, # or None
)
dtype = "bf16"

# Condition
prompt_path = "./assets/texts/t2v_samples.txt"
prompt = None # prompt has higher priority than prompt_path

# Others
batch_size = 1
seed = 42
save_dir = "./samples/samples/"

+ 0
- 102
configs/opensora-v1-1/train/benchmark.py View File

@@ -1,102 +0,0 @@
# this file is only for batch size search and is not used for training

# Define dataset
dataset = dict(
type="VariableVideoTextDataset",
data_path=None,
num_frames=None,
frame_interval=3,
image_size=(None, None),
transform_name="resize_crop",
)

# bucket config format:
# 1. { resolution: {num_frames: (prob, batch_size)} }, in this case batch_size is ignored when searching
# 2. { resolution: {num_frames: (prob, (max_batch_size, ))} }, batch_size is searched in the range [batch_size_start, max_batch_size), batch_size_start is configured via CLI
# 3. { resolution: {num_frames: (prob, (min_batch_size, max_batch_size))} }, batch_size is searched in the range [min_batch_size, max_batch_size)
# 4. { resolution: {num_frames: (prob, (min_batch_size, max_batch_size, step_size))} }, batch_size is searched in the range [min_batch_size, max_batch_size) with step_size (grid search)
# 5. { resolution: {num_frames: (0.0, None)} }, this bucket will not be used

bucket_config = {
# == manual search ==
# "240p": {128: (1.0, 2)}, # 4.28s/it
# "240p": {64: (1.0, 4)},
# "240p": {32: (1.0, 8)}, # 4.6s/it
# "240p": {16: (1.0, 16)}, # 4.6s/it
# "480p": {16: (1.0, 4)}, # 4.6s/it
# "720p": {16: (1.0, 2)}, # 5.89s/it
# "256": {1: (1.0, 256)}, # 4.5s/it
# "512": {1: (1.0, 96)}, # 4.7s/it
# "512": {1: (1.0, 128)}, # 6.3s/it
# "480p": {1: (1.0, 50)}, # 4.0s/it
# "1024": {1: (1.0, 32)}, # 6.8s/it
# "1024": {1: (1.0, 20)}, # 4.3s/it
# "1080p": {1: (1.0, 16)}, # 8.6s/it
# "1080p": {1: (1.0, 8)}, # 4.4s/it
# == stage 2 ==
# "240p": {
# 16: (1.0, (2, 32)),
# 32: (1.0, (2, 16)),
# 64: (1.0, (2, 8)),
# 128: (1.0, (2, 6)),
# },
# "256": {1: (1.0, (128, 300))},
# "512": {1: (0.5, (64, 128))},
# "480p": {1: (0.4, (32, 128)), 16: (0.4, (2, 32)), 32: (0.0, None)},
# "720p": {16: (0.1, (2, 16)), 32: (0.0, None)}, # No examples now
# "1024": {1: (0.3, (8, 64))},
# "1080p": {1: (0.3, (2, 32))},
# == stage 3 ==
"720p": {1: (20, 40), 32: (0.5, (2, 4)), 64: (0.5, (1, 1))},
}


# Define acceleration
num_workers = 4
num_bucket_build_workers = 16
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained=None,
input_sq_size=512, # pretrained model is trained on 512x512
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 1000
load = None

batch_size = None
lr = 2e-5
grad_clip = 1.0

+ 0
- 66
configs/opensora-v1-1/train/image.py View File

@@ -1,66 +0,0 @@
# Define dataset
dataset = dict(
type="VariableVideoTextDataset",
data_path=None,
num_frames=None,
frame_interval=3,
image_size=(None, None),
transform_name="resize_crop",
)
bucket_config = { # 6s/it
"256": {1: (1.0, 256)},
"512": {1: (1.0, 80)},
"480p": {1: (1.0, 52)},
"1024": {1: (1.0, 20)},
"1080p": {1: (1.0, 8)},
}

# Define acceleration
num_workers = 4
num_bucket_build_workers = 16
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained=None,
input_sq_size=512, # pretrained model is trained on 512x512
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 500
load = None

batch_size = 10 # only for logging
lr = 2e-5
grad_clip = 1.0

+ 0
- 88
configs/opensora-v1-1/train/image_rflow.py View File

@@ -1,88 +0,0 @@
# Define dataset
# dataset = dict(
# type="VariableVideoTextDataset",
# data_path=None,
# num_frames=None,
# frame_interval=3,
# image_size=(None, None),
# transform_name="resize_crop",
# )
dataset = dict(
type="VideoTextDataset",
data_path=None,
num_frames=1,
frame_interval=1,
image_size=(256, 256),
transform_name="center",
)
bucket_config = { # 6s/it
"256": {1: (1.0, 256)},
"512": {1: (1.0, 80)},
"480p": {1: (1.0, 52)},
"1024": {1: (1.0, 20)},
"1080p": {1: (1.0, 8)},
}

# Define acceleration
num_workers = 16
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
# model = dict(
# type="DiT-XL/2",
# from_pretrained="/home/zhaowangbo/wangbo/PixArt-alpha/pretrained_models/PixArt-XL-2-512x512.pth",
# # input_sq_size=512, # pretrained model is trained on 512x512
# enable_flash_attn=True,
# enable_layernorm_kernel=True,
# )
model = dict(
type="PixArt-XL/2",
space_scale=1.0,
time_scale=1.0,
no_temporal_pos_emb=True,
from_pretrained="PixArt-XL-2-512x512.pth",
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
# model = dict(
# type="DiT-XL/2",
# # space_scale=1.0,
# # time_scale=1.0,
# no_temporal_pos_emb=True,
# # from_pretrained="PixArt-XL-2-512x512.pth",
# from_pretrained="/home/zhaowangbo/wangbo/PixArt-alpha/pretrained_models/PixArt-XL-2-512x512.pth",
# enable_flash_attn=True,
# enable_layernorm_kernel=True,
# )
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
)
scheduler = dict(
type="rflow",
# timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 10
log_every = 10
ckpt_every = 500
load = None

batch_size = 100 # only for logging
lr = 2e-5
grad_clip = 1.0

+ 0
- 78
configs/opensora-v1-1/train/stage1.py View File

@@ -1,78 +0,0 @@
# Define dataset
dataset = dict(
type="VariableVideoTextDataset",
data_path=None,
num_frames=None,
frame_interval=3,
image_size=(None, None),
transform_name="resize_crop",
)
# IMG: 1024 (20%) 512 (30%) 256 (50%) drop (50%)
bucket_config = { # 1s/it
"144p": {1: (0.5, 48), 16: (1.0, 6), 32: (1.0, 3), 96: (1.0, 1)},
"256": {1: (0.5, 24), 16: (0.5, 3), 48: (0.5, 1), 64: (0.0, None)},
"240p": {16: (0.3, 2), 32: (0.3, 1), 64: (0.0, None)},
"512": {1: (0.4, 12)},
"1024": {1: (0.3, 3)},
}
mask_ratios = {
"identity": 0.75,
"quarter_random": 0.025,
"quarter_head": 0.025,
"quarter_tail": 0.025,
"quarter_head_tail": 0.05,
"image_random": 0.025,
"image_head": 0.025,
"image_tail": 0.025,
"image_head_tail": 0.05,
}

# Define acceleration
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
grad_checkpoint = False
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained=None,
input_sq_size=512, # pretrained model is trained on 512x512
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 500
load = None

batch_size = None
lr = 2e-5
grad_clip = 1.0

+ 0
- 80
configs/opensora-v1-1/train/stage2.py View File

@@ -1,80 +0,0 @@
# Define dataset
dataset = dict(
type="VariableVideoTextDataset",
data_path=None,
num_frames=None,
frame_interval=3,
image_size=(None, None),
transform_name="resize_crop",
)
bucket_config = { # 7s/it
"144p": {1: (1.0, 48), 16: (1.0, 17), 32: (1.0, 9), 64: (1.0, 4), 128: (1.0, 1)},
"256": {1: (0.8, 254), 16: (0.5, 17), 32: (0.5, 9), 64: (0.5, 4), 128: (0.5, 1)},
"240p": {1: (0.1, 20), 16: (0.9, 17), 32: (0.8, 9), 64: (0.8, 4), 128: (0.8, 2)},
"512": {1: (0.5, 86), 16: (0.2, 4), 32: (0.2, 2), 64: (0.2, 1), 128: (0.0, None)},
"480p": {1: (0.4, 54), 16: (0.4, 4), 32: (0.0, None)},
"720p": {1: (0.1, 20), 16: (0.1, 2), 32: (0.0, None)},
"1024": {1: (0.3, 20)},
"1080p": {1: (0.4, 8)},
}
mask_ratios = {
"identity": 0.75,
"quarter_random": 0.025,
"quarter_head": 0.025,
"quarter_tail": 0.025,
"quarter_head_tail": 0.05,
"image_random": 0.025,
"image_head": 0.025,
"image_tail": 0.025,
"image_head_tail": 0.05,
}

# Define acceleration
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained=None,
input_sq_size=512, # pretrained model is trained on 512x512
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 500
load = None

batch_size = None
lr = 2e-5
grad_clip = 1.0

+ 0
- 80
configs/opensora-v1-1/train/stage3.py View File

@@ -1,80 +0,0 @@
# Define dataset
dataset = dict(
type="VariableVideoTextDataset",
data_path=None,
num_frames=None,
frame_interval=3,
image_size=(None, None),
transform_name="resize_crop",
)
bucket_config = { # 13s/it
"144p": {1: (1.0, 200), 16: (1.0, 36), 32: (1.0, 18), 64: (1.0, 9), 128: (1.0, 4)},
"256": {1: (0.8, 200), 16: (0.5, 22), 32: (0.5, 11), 64: (0.5, 6), 128: (0.8, 4)},
"240p": {1: (0.8, 200), 16: (0.5, 22), 32: (0.5, 10), 64: (0.5, 6), 128: (0.5, 3)},
"360p": {1: (0.5, 120), 16: (0.5, 9), 32: (0.5, 4), 64: (0.5, 2), 128: (0.5, 1)},
"512": {1: (0.5, 120), 16: (0.5, 9), 32: (0.5, 4), 64: (0.5, 2), 128: (0.8, 1)},
"480p": {1: (0.4, 80), 16: (0.6, 6), 32: (0.6, 3), 64: (0.6, 1), 128: (0.0, None)},
"720p": {1: (0.4, 40), 16: (0.6, 3), 32: (0.6, 1), 96: (0.0, None)},
"1024": {1: (0.3, 40)},
}
mask_ratios = {
"identity": 0.75,
"quarter_random": 0.025,
"quarter_head": 0.025,
"quarter_tail": 0.025,
"quarter_head_tail": 0.05,
"image_random": 0.025,
"image_head": 0.025,
"image_tail": 0.025,
"image_head_tail": 0.05,
}

# Define acceleration
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained=None,
input_sq_size=512, # pretrained model is trained on 512x512
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 500
load = None

batch_size = None
lr = 2e-5
grad_clip = 1.0

+ 0
- 68
configs/opensora-v1-1/train/video.py View File

@@ -1,68 +0,0 @@
# Define dataset
dataset = dict(
type="VariableVideoTextDataset",
data_path=None,
num_frames=None,
frame_interval=3,
image_size=(None, None),
transform_name="resize_crop",
)
bucket_config = { # 6s/it
"240p": {16: (1.0, 16), 32: (1.0, 8), 64: (1.0, 4), 128: (1.0, 2)},
"256": {1: (1.0, 256)},
"512": {1: (0.5, 80)},
"480p": {1: (0.4, 52), 16: (0.4, 4), 32: (0.0, None)},
"720p": {16: (0.1, 2), 32: (0.0, None)}, # No examples now
"1024": {1: (0.3, 20)},
"1080p": {1: (0.3, 8)},
}

# Define acceleration
num_workers = 4
num_bucket_build_workers = 16
dtype = "bf16"
grad_checkpoint = True
plugin = "zero2"
sp_size = 1

# Define model
model = dict(
type="STDiT2-XL/2",
from_pretrained=None,
input_sq_size=512, # pretrained model is trained on 512x512
qk_norm=True,
qk_norm_legacy=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=200,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="iddpm",
timestep_respacing="",
)

# Others
seed = 42
outputs = "outputs"
wandb = False

epochs = 1000
log_every = 10
ckpt_every = 500
load = None

batch_size = 10 # only for logging
lr = 2e-5
grad_clip = 1.0

+ 0
- 42
configs/opensora-v1-2/inference/sample.py View File

@@ -1,42 +0,0 @@
resolution = "240p"
aspect_ratio = "9:16"
num_frames = 51
fps = 24
frame_interval = 1
save_fps = 24

save_dir = "./samples/samples/"
seed = 42
batch_size = 1
multi_resolution = "STDiT2"
dtype = "bf16"
condition_frame_length = 5
align = 5

model = dict(
type="STDiT3-XL/2",
from_pretrained="hpcai-tech/OpenSora-STDiT-v3",
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
num_sampling_steps=30,
cfg_scale=7.0,
)

aes = 6.5
flow = None

+ 0
- 44
configs/opensora-v1-2/inference/sample_hf.py View File

@@ -1,44 +0,0 @@
resolution = "240p"
aspect_ratio = "9:16"
num_frames = 51
fps = 24
frame_interval = 1
save_fps = 24

save_dir = "./samples/samples/"
seed = 42
batch_size = 1
multi_resolution = "STDiT2"
dtype = "bf16"
condition_frame_length = 5
align = 5

model = dict(
type="STDiT3-XL/2",
from_pretrained="hpcai-tech/OpenSora-STDiT-v3",
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
force_huggingface=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
force_huggingface=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
num_sampling_steps=30,
cfg_scale=7.0,
)

aes = 6.5
flow = None

+ 0
- 117
configs/opensora-v1-2/misc/bs.py View File

@@ -1,117 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)

# == Config 1: Webvid ==
# base: (512, 408), 12s/it
grad_checkpoint = True
base = ("512", "408")
base_step_time = 12
bucket_config = {
"144p": {
1: (475, 0),
51: (51, 0),
102: (27, 0),
204: (13, 0),
408: (6, 0),
},
# ---
"240p": {
1: (297, 200), # 8.25
51: (20, 0),
102: (10, 0),
204: (5, 0),
408: (2, 0),
},
# ---
"512": {
1: (141, 0),
51: (8, 0),
102: (4, 0),
204: (2, 0),
408: (1, 0),
},
# ---
"480p": {
1: (89, 0),
51: (5, 0),
102: (2, 0),
204: (1, 0),
},
# ---
"1024": {
1: (36, 0),
51: (1, 0),
},
# ---
"1080p": {1: (5, 0)},
# ---
"2048": {1: (5, 0)},
}

# == Config 1 ==
# base: (512, 408), 16s/it

# Acceleration settings
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
plugin = "zero2"

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
sample_method="logit-normal",
)

# Mask settings
mask_ratios = {
"random": 0.2,
"intepolate": 0.01,
"quarter_random": 0.01,
"quarter_head": 0.01,
"quarter_tail": 0.01,
"quarter_head_tail": 0.01,
"image_random": 0.05,
"image_head": 0.1,
"image_tail": 0.05,
"image_head_tail": 0.05,
}

# Log settings
seed = 42
outputs = "outputs"
wandb = False
epochs = 1000
log_every = 10
ckpt_every = 500

# optimization settings
load = None
grad_clip = 1.0
lr = 2e-4
ema_decay = 0.99
adam_eps = 1e-15

+ 0
- 49
configs/opensora-v1-2/misc/eval_loss.py View File

@@ -1,49 +0,0 @@
num_workers = 8
dtype = "bf16"
seed = 42
num_eval_timesteps = 10

# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)

bucket_config = {
"144p": {1: (None, 100), 51: (None, 30), 102: (None, 20), 204: (None, 8), 408: (None, 4)},
# ---
"240p": {1: (None, 100), 51: (None, 24), 102: (None, 12), 204: (None, 4), 408: (None, 2)},
# ---
"360p": {1: (None, 60), 51: (None, 12), 102: (None, 6), 204: (None, 2), 408: (None, 1)},
# ---
"480p": {1: (None, 40), 51: (None, 6), 102: (None, 3), 204: (None, 1)},
# ---
"720p": {1: (None, 20), 51: (None, 2), 102: (None, 1)},
# ---
"1080p": {1: (None, 10)},
# ---
"2048": {1: (None, 5)},
}

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
local_files_only=True,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
local_files_only=True,
)
scheduler = dict(type="rflow")

+ 0
- 62
configs/opensora-v1-2/misc/extract.py View File

@@ -1,62 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)

# webvid
bucket_config = { # 12s/it
"144p": {1: (1.0, 475), 51: (1.0, 51), 102: ((1.0, 0.33), 27), 204: ((1.0, 0.1), 13), 408: ((1.0, 0.1), 6)},
# ---
"256": {1: (0.4, 297), 51: (0.5, 20), 102: ((0.5, 0.33), 10), 204: ((0.5, 0.1), 5), 408: ((0.5, 0.1), 2)},
"240p": {1: (0.3, 297), 51: (0.4, 20), 102: ((0.4, 0.33), 10), 204: ((0.4, 0.1), 5), 408: ((0.4, 0.1), 2)},
# ---
"360p": {1: (0.2, 141), 51: (0.15, 8), 102: ((0.15, 0.33), 4), 204: ((0.15, 0.1), 2), 408: ((0.15, 0.1), 1)},
"512": {1: (0.1, 141)},
# ---
"480p": {1: (0.1, 89)},
# ---
"720p": {1: (0.05, 36)},
"1024": {1: (0.05, 36)},
# ---
"1080p": {1: (0.1, 5)},
# ---
"2048": {1: (0.1, 5)},
}

# Acceleration settings
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
seed = 42
outputs = "outputs"
wandb = False


# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained="hpcai-tech/OpenSora-STDiT-v3",
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=32,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
local_files_only=True,
)

# feature extraction settings
save_text_features = True
save_compressed_text_features = True
bin_size = 250 # 1GB, 4195 bins
log_time = False

+ 0
- 94
configs/opensora-v1-2/misc/feat.py View File

@@ -1,94 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
dummy_text_feature=True,
)

# webvid
bucket_config = { # 12s/it
"144p": {1: (1.0, 475), 51: (1.0, 51), 102: ((1.0, 0.33), 27), 204: ((1.0, 0.1), 13), 408: ((1.0, 0.1), 6)},
# ---
"256": {1: (0.4, 297), 51: (0.5, 20), 102: ((0.5, 0.33), 10), 204: ((0.5, 0.1), 5), 408: ((0.5, 0.1), 2)},
"240p": {1: (0.3, 297), 51: (0.4, 20), 102: ((0.4, 0.33), 10), 204: ((0.4, 0.1), 5), 408: ((0.4, 0.1), 2)},
# ---
"360p": {1: (0.2, 141), 51: (0.15, 8), 102: ((0.15, 0.33), 4), 204: ((0.15, 0.1), 2), 408: ((0.15, 0.1), 1)},
"512": {1: (0.1, 141)},
# ---
"480p": {1: (0.1, 89)},
# ---
"720p": {1: (0.05, 36)},
"1024": {1: (0.05, 36)},
# ---
"1080p": {1: (0.1, 5)},
# ---
"2048": {1: (0.1, 5)},
}

grad_checkpoint = True

load_text_features = True

# Acceleration settings
num_workers = 0
num_bucket_build_workers = 16
dtype = "bf16"
plugin = "zero2"

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
freeze_y_embedder=True,
skip_y_embedder=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
local_files_only=True,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
sample_method="logit-normal",
)

# Mask settings
mask_ratios = {
"random": 0.2,
"intepolate": 0.01,
"quarter_random": 0.01,
"quarter_head": 0.01,
"quarter_tail": 0.01,
"quarter_head_tail": 0.01,
"image_random": 0.05,
"image_head": 0.1,
"image_tail": 0.05,
"image_head_tail": 0.05,
}

# Log settings
seed = 42
outputs = "outputs"
wandb = False
epochs = 1000
log_every = 10
ckpt_every = 1

# optimization settings
load = None
grad_clip = 1.0
lr = 2e-4
ema_decay = 0.99
adam_eps = 1e-15

+ 0
- 83
configs/opensora-v1-2/train/adapt.py View File

@@ -1,83 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)
bucket_config = { # 2s/it
"144p": {1: (0.5, 48), 34: (1.0, 2), 51: (1.0, 4), 102: (1.0, 2), 204: (1.0, 1)},
# ---
"256": {1: (0.6, 20), 34: (0.5, 2), 51: (0.5, 1), 68: (0.5, 1), 136: (0.0, None)},
"240p": {1: (0.6, 20), 34: (0.5, 2), 51: (0.5, 1), 68: (0.5, 1), 136: (0.0, None)},
# ---
"360p": {1: (0.5, 8), 34: (0.2, 1), 102: (0.0, None)},
"512": {1: (0.5, 8), 34: (0.2, 1), 102: (0.0, None)},
# ---
"480p": {1: (0.2, 4), 17: (0.3, 1), 68: (0.0, None)},
# ---
"720p": {1: (0.1, 2)},
"1024": {1: (0.1, 2)},
# ---
"1080p": {1: (0.1, 1)},
}
grad_checkpoint = False

# Acceleration settings
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
plugin = "zero2"

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
sample_method="logit-normal",
)

# Mask settings
mask_ratios = {
"random": 0.2,
"intepolate": 0.01,
"quarter_random": 0.01,
"quarter_head": 0.01,
"quarter_tail": 0.01,
"quarter_head_tail": 0.01,
"image_random": 0.05,
"image_head": 0.1,
"image_tail": 0.05,
"image_head_tail": 0.05,
}

# Log settings
seed = 42
outputs = "outputs"
wandb = False
epochs = 1000
log_every = 10
ckpt_every = 500

# optimization settings
load = None
grad_clip = 1.0
lr = 1e-4
ema_decay = 0.99
adam_eps = 1e-15

+ 0
- 58
configs/opensora-v1-2/train/demo_360p.py View File

@@ -1,58 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)

# webvid
bucket_config = {"360p": {102: (1.0, 1)}}
grad_checkpoint = True

# Acceleration settings
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
plugin = "zero2"

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
freeze_y_embedder=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
sample_method="logit-normal",
)

# Log settings
seed = 42
outputs = "outputs"
wandb = False
epochs = 1000
log_every = 10
ckpt_every = 200

# optimization settings
load = None
grad_clip = 1.0
lr = 1e-4
ema_decay = 0.99
adam_eps = 1e-15
warmup_steps = 1000

+ 0
- 58
configs/opensora-v1-2/train/demo_480p.py View File

@@ -1,58 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)

# webvid
bucket_config = {"480p": {51: (0.5, 5)}}
grad_checkpoint = True

# Acceleration settings
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
plugin = "zero2"

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
freeze_y_embedder=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
sample_method="logit-normal",
)

# Log settings
seed = 42
outputs = "outputs"
wandb = False
epochs = 1000
log_every = 10
ckpt_every = 200

# optimization settings
load = None
grad_clip = 1.0
lr = 1e-4
ema_decay = 0.99
adam_eps = 1e-15
warmup_steps = 1000

+ 0
- 110
configs/opensora-v1-2/train/stage1.py View File

@@ -1,110 +0,0 @@
# Dataset settings
dataset = dict(
type="VariableVideoTextDataset",
transform_name="resize_crop",
)

# backup
# bucket_config = { # 20s/it
# "144p": {1: (1.0, 100), 51: (1.0, 30), 102: (1.0, 20), 204: (1.0, 8), 408: (1.0, 4)},
# # ---
# "256": {1: (0.5, 100), 51: (0.3, 24), 102: (0.3, 12), 204: (0.3, 4), 408: (0.3, 2)},
# "240p": {1: (0.5, 100), 51: (0.3, 24), 102: (0.3, 12), 204: (0.3, 4), 408: (0.3, 2)},
# # ---
# "360p": {1: (0.5, 60), 51: (0.3, 12), 102: (0.3, 6), 204: (0.3, 2), 408: (0.3, 1)},
# "512": {1: (0.5, 60), 51: (0.3, 12), 102: (0.3, 6), 204: (0.3, 2), 408: (0.3, 1)},
# # ---
# "480p": {1: (0.5, 40), 51: (0.3, 6), 102: (0.3, 3), 204: (0.3, 1), 408: (0.0, None)},
# # ---
# "720p": {1: (0.2, 20), 51: (0.3, 2), 102: (0.3, 1), 204: (0.0, None)},
# "1024": {1: (0.1, 20), 51: (0.3, 2), 102: (0.3, 1), 204: (0.0, None)},
# # ---
# "1080p": {1: (0.1, 10)},
# # ---
# "2048": {1: (0.1, 5)},
# }

# webvid
bucket_config = { # 12s/it
"144p": {1: (1.0, 475), 51: (1.0, 51), 102: ((1.0, 0.33), 27), 204: ((1.0, 0.1), 13), 408: ((1.0, 0.1), 6)},
# ---
"256": {1: (0.4, 297), 51: (0.5, 20), 102: ((0.5, 0.33), 10), 204: ((0.5, 0.1), 5), 408: ((0.5, 0.1), 2)},
"240p": {1: (0.3, 297), 51: (0.4, 20), 102: ((0.4, 0.33), 10), 204: ((0.4, 0.1), 5), 408: ((0.4, 0.1), 2)},
# ---
"360p": {1: (0.2, 141), 51: (0.15, 8), 102: ((0.15, 0.33), 4), 204: ((0.15, 0.1), 2), 408: ((0.15, 0.1), 1)},
"512": {1: (0.1, 141)},
# ---
"480p": {1: (0.1, 89)},
# ---
"720p": {1: (0.05, 36)},
"1024": {1: (0.05, 36)},
# ---
"1080p": {1: (0.1, 5)},
# ---
"2048": {1: (0.1, 5)},
}

grad_checkpoint = True

# Acceleration settings
num_workers = 8
num_bucket_build_workers = 16
dtype = "bf16"
plugin = "zero2"

# Model settings
model = dict(
type="STDiT3-XL/2",
from_pretrained=None,
qk_norm=True,
enable_flash_attn=True,
enable_layernorm_kernel=True,
freeze_y_embedder=True,
)
vae = dict(
type="OpenSoraVAE_V1_2",
from_pretrained="hpcai-tech/OpenSora-VAE-v1.2",
micro_frame_size=17,
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=300,
shardformer=True,
)
scheduler = dict(
type="rflow",
use_timestep_transform=True,
sample_method="logit-normal",
)

# Mask settings
mask_ratios = {
"random": 0.05,
"intepolate": 0.005,
"quarter_random": 0.005,
"quarter_head": 0.005,
"quarter_tail": 0.005,
"quarter_head_tail": 0.005,
"image_random": 0.025,
"image_head": 0.05,
"image_tail": 0.025,
"image_head_tail": 0.025,
}

# Log settings
seed = 42
outputs = "outputs"
wandb = False
epochs = 1000
log_every = 10
ckpt_every = 200

# optimization settings
load = None
grad_clip = 1.0
lr = 1e-4
ema_decay = 0.99
adam_eps = 1e-15
warmup_steps = 1000

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save
Baidu
map