-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdemo_img2vid.py
executable file
·120 lines (102 loc) · 4.15 KB
/
demo_img2vid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL)
# Copyright (C) 2023 NEC Laboratories America, Inc. ("NECLA"). All rights reserved.
#
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-License-Identifier: BSD-2-Clause
#
# Code adapted from https://github.com/nihaomiao/CVPR23_LFDM/tree/main/demo -- BSD-2-Clause License
# Demo for TI2V-Zero
import os
from copy import deepcopy
import imageio
import numpy as np
import torch
from PIL import Image
from modelscope_t2v_pipeline import TextToVideoSynthesisPipeline, tensor2vid
from util import center_crop
print(torch.cuda.is_available())
print("Num GPUs available: ", torch.cuda.device_count())
# PARAMETER SETTINGS
# Choose your GPU device
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# Text input examples
input = "A mesmerizing display of the northern lights in the Arctic."
# input = "A panda is dancing in the Times Square."
# Image input examples
img_path = "examples/northern_lights_sd.jpg"
# img_path = "./examples/panda_dancing_sd.png"
# After running initialization.py, set the config path to your ModelScope path
config = {"model": "./weights", "device": "gpu"}
# Set your output path
output_dir = "./example-video"
output_img_dir = "./example-image"
os.makedirs(output_dir, exist_ok=True)
os.makedirs(output_img_dir, exist_ok=True)
# Set parameters for temporal resampling and DDIM
resample_iter = 4
ddim_step = 10
# Set the number of new frames
NUM_NEW_FRAMES = 15
print("#new_frame:", NUM_NEW_FRAMES)
# Set the number of generated videos
NUM_SAMPLES = 1
postfix = "-resample%02d-s%02d-mean%04d" % (resample_iter, ddim_step, np.random.randint(low=0, high=10000))
add_vid_cond = True
use_ddpm_inversion = True
print(img_path)
print(input, postfix)
print("video_cond:", add_vid_cond, "ddpm_inv:", use_ddpm_inversion, "#resample:", resample_iter)
# default parameters
IMG_H = 256
IMG_W = 256
NUM_FRAMES = 16
NUM_COND_FRAMES = 15
# read image
first_img_npy = imageio.v2.imread(img_path)
# crop image
first_img_npy = center_crop(first_img_npy)
# resize image
first_img_npy = np.asarray(Image.fromarray(first_img_npy).resize((IMG_H, IMG_W)))
# repeat image
first_img_npy_list = [first_img_npy for i in range(NUM_COND_FRAMES)]
cond_vid_npy = np.stack(first_img_npy_list, axis=0)
t2v_pipeline = TextToVideoSynthesisPipeline(**config)
processed_input = t2v_pipeline.preprocess([input])
for sample_idx in range(NUM_SAMPLES):
newpostfix = postfix + "-%02d" % sample_idx
vid_tensor = t2v_pipeline.preprocess_vid(deepcopy(cond_vid_npy))
new_output_tensor = vid_tensor.clone().detach().cpu()
output_filename = input.replace(" ", "_")[:-1] + "%s-%02d.gif" % (newpostfix, NUM_NEW_FRAMES)
video_name = os.path.basename(output_filename)[:-4]
save_img_dir = os.path.join(output_img_dir, video_name)
os.makedirs(save_img_dir, exist_ok=True)
img_name = video_name + "%03d.jpg" % 0
img_path = os.path.join(save_img_dir, img_name)
imageio.v2.imsave(img_path, first_img_npy)
# image-to-video generation
for i in range(NUM_NEW_FRAMES):
print("i:", i, input, newpostfix)
output = t2v_pipeline.forward_with_vid_resample(
processed_input,
vid=vid_tensor,
add_vid_cond=add_vid_cond,
use_ddpm_inversion=use_ddpm_inversion,
resample_iter=resample_iter,
ddim_step=ddim_step,
guide_scale=9.0,
)
with torch.no_grad():
new_frame = t2v_pipeline.model.autoencoder.decode(output[:, :, -1].cuda())
new_frame = new_frame.data.cpu().unsqueeze(dim=2)
img_npy = tensor2vid(new_frame.clone().detach())[0]
img_name = video_name + "%03d.jpg" % (i + 1)
img_path = os.path.join(save_img_dir, img_name)
imageio.v2.imsave(img_path, img_npy)
new_output_tensor = torch.cat((new_output_tensor, new_frame), dim=2)
vid_tensor = new_output_tensor[:, :, (i + 1) :]
assert vid_tensor.size(2) == NUM_COND_FRAMES
output_video = t2v_pipeline.postprocess(
new_output_tensor[:, :, (NUM_COND_FRAMES - 1) :], os.path.join(output_dir, output_filename)
)
print("saving to", save_img_dir)
print("saving video to", os.path.join(output_dir, output_filename))