TF-T2V (CVPR-2024)
This repo include the checkpoints for TF-T2V and VideoLCM.
"models/midasv3dptlarge.pth": the checkpoint for depth extraction. "models/openclippytorchmodel.bin": the checkpoint for clip embedding. "models/sketchsimplificationgan.pth": the checkpoint for sketch extraction. "models/srstep110000ema.pth": the checkpoint for super-resolution model. "models/table5pidinet.pth": the checkpoint for sketch extraction. "models/tft2vt2v32framesnonema560000.pth": the checkpoint for text-to-video generation in TF-T2V (32 frames). "models/tft2vt2vnonema512000.pth": the checkpoint for text-to-video generation in TF-T2V (16 frames). "models/tft2vvcomposernonema254000.pth": the checkpoint for compositional video synthesis in TF-T2V (448x256 resulotion). "models/tft2vvcomposernonema270000896x512.pth": the checkpoint for compositional video synthesis in TF-T2V (896x512 resulotion). "models/v2-1512-ema-pruned.ckpt": the checkpoint for Stable Diffusion. "models/videolcmt2vnonema544000.pth": the checkpoint for text-to-video generation in VideoLCM. "models/videolcmvcomposernonema_914000.pth": the checkpoint for compositional video synthesis in VideoLCM.
BibTeX
If this repo is useful to you, please cite our corresponding technical paper.
@article{2023videocomposer,
title={VideoComposer: Compositional Video Synthesis with Motion Controllability},
author={Wang, Xiang and Yuan, Hangjie and Zhang, Shiwei and Chen, Dayou and Wang, Jiuniu, and Zhang, Yingya, and Shen, Yujun, and Zhao, Deli and Zhou, Jingren},
booktitle={arXiv preprint arXiv:2306.02018},
year={2023}
}
@article{2023i2vgenxl,
title={I2VGen-XL: High-Quality Image-to-Video Synthesis via Cascaded Diffusion Models},
author={Zhang, Shiwei and Wang, Jiayu and Zhang, Yingya and Zhao, Kang and Yuan, Hangjie and Qing, Zhiwu and Wang, Xiang and Zhao, Deli and Zhou, Jingren},
booktitle={arXiv preprint arXiv:2311.04145},
year={2023}
}
@article{wang2023modelscope,
title={Modelscope text-to-video technical report},
author={Wang, Jiuniu and Yuan, Hangjie and Chen, Dayou and Zhang, Yingya and Wang, Xiang and Zhang, Shiwei},
journal={arXiv preprint arXiv:2308.06571},
year={2023}
}
@article{dreamvideo,
title={DreamVideo: Composing Your Dream Videos with Customized Subject and Motion},
author={Wei, Yujie and Zhang, Shiwei and Qing, Zhiwu and Yuan, Hangjie and Liu, Zhiheng and Liu, Yu and Zhang, Yingya and Zhou, Jingren and Shan, Hongming},
journal={arXiv preprint arXiv:2312.04433},
year={2023}
}
@article{qing2023higen,
title={Hierarchical Spatio-temporal Decoupling for Text-to-Video Generation},
author={Qing, Zhiwu and Zhang, Shiwei and Wang, Jiayu and Wang, Xiang and Wei, Yujie and Zhang, Yingya and Gao, Changxin and Sang, Nong },
journal={arXiv preprint arXiv:2312.04483},
year={2023}
}
@article{wang2023videolcm,
title={VideoLCM: Video Latent Consistency Model},
author={Wang, Xiang and Zhang, Shiwei and Zhang, Han and Liu, Yu and Zhang, Yingya and Gao, Changxin and Sang, Nong },
journal={arXiv preprint arXiv:2312.09109},
year={2023}
}
@article{ma2023dreamtalk,
title={DreamTalk: When Expressive Talking Head Generation Meets Diffusion Probabilistic Models},
author={Ma, Yifeng and Zhang, Shiwei and Wang, Jiayu and Wang, Xiang and Zhang, Yingya and Deng Zhidong},
journal={arXiv preprint arXiv:2312.09767},
year={2023}
}
@article{2023InstructVideo,
title={InstructVideo: Instructing Video Diffusion Models with Human Feedback},
author={Yuan, Hangjie and Zhang, Shiwei and Wang, Xiang and Wei, Yujie and Feng, Tao and Pan, Yining and Zhang, Yingya and Liu, Ziwei and Albanie, Samuel and Ni, Dong},
booktitle={arXiv preprint arXiv:2312.12490},
year={2023}
}
@article{TFT2V,
title={A Recipe for Scaling up Text-to-Video Generation with Text-free Videos},
author={Wang, Xiang and Zhang, Shiwei and Yuan, Hangjie and Qing, Zhiwu and Gong, Biao and Zhang, Yingya and Shen, Yujun and Gao, Changxin and Sang, Nong},
journal={arXiv preprint arXiv:2312.15770},
year={2023}
}
评论