osanseviero fffiloni commited on
Commit
6783bbb
·
0 Parent(s):

Duplicate from fffiloni/RAFT

Browse files

Co-authored-by: Sylvain Filoni <fffiloni@users.noreply.huggingface.co>

Files changed (9) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +178 -0
  4. basket1.jpg +0 -0
  5. basket2.jpg +0 -0
  6. frame1.jpg +0 -0
  7. frame2.jpg +0 -0
  8. requirements.txt +13 -0
  9. spacex.mp4 +0 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RAFT Optical Flow
3
+ emoji: 😻
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.19.1
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: fffiloni/RAFT
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ """
4
+ =====================================================
5
+ Optical Flow: Predicting movement with the RAFT model
6
+ =====================================================
7
+
8
+ Optical flow is the task of predicting movement between two images, usually two
9
+ consecutive frames of a video. Optical flow models take two images as input, and
10
+ predict a flow: the flow indicates the displacement of every single pixel in the
11
+ first image, and maps it to its corresponding pixel in the second image. Flows
12
+ are (2, H, W)-dimensional tensors, where the first axis corresponds to the
13
+ predicted horizontal and vertical displacements.
14
+
15
+ The following example illustrates how torchvision can be used to predict flows
16
+ using our implementation of the RAFT model. We will also see how to convert the
17
+ predicted flows to RGB images for visualization.
18
+ """
19
+
20
+ import cv2
21
+ import numpy as np
22
+ import os
23
+ import sys
24
+ import torch
25
+ from PIL import Image
26
+ import matplotlib.pyplot as plt
27
+ import torchvision.transforms.functional as F
28
+ from torchvision.io import read_video, read_image, ImageReadMode
29
+ from torchvision.models.optical_flow import Raft_Large_Weights
30
+ from torchvision.models.optical_flow import raft_large
31
+ from torchvision.io import write_jpeg
32
+ import torchvision.transforms as T
33
+
34
+ import tempfile
35
+ from pathlib import Path
36
+ from urllib.request import urlretrieve
37
+
38
+ from scipy.interpolate import LinearNDInterpolator
39
+ from imageio import imread, imwrite
40
+
41
+
42
+
43
+ def write_flo(flow, filename):
44
+ """
45
+ Write optical flow in Middlebury .flo format
46
+
47
+ :param flow: optical flow map
48
+ :param filename: optical flow file path to be saved
49
+ :return: None
50
+
51
+ from https://github.com/liruoteng/OpticalFlowToolkit/
52
+
53
+ """
54
+ # forcing conversion to float32 precision
55
+ flow = flow.cpu().data.numpy()
56
+ flow = flow.astype(np.float32)
57
+ f = open(filename, 'wb')
58
+ magic = np.array([202021.25], dtype=np.float32)
59
+ (height, width) = flow.shape[0:2]
60
+ w = np.array([width], dtype=np.int32)
61
+ h = np.array([height], dtype=np.int32)
62
+ magic.tofile(f)
63
+ w.tofile(f)
64
+ h.tofile(f)
65
+ flow.tofile(f)
66
+ f.close()
67
+
68
+
69
+
70
+ def infer(frameA, frameB):
71
+ #video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
72
+ #video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
73
+ #_ = urlretrieve(video_url, video_path)
74
+
75
+ #frames, _, _ = read_video(str("./spacex.mp4"), output_format="TCHW")
76
+ #print(f"FRAME BEFORE stack: {frames[100]}")
77
+
78
+
79
+ input_frame_1 = read_image(str(frameA), ImageReadMode.UNCHANGED)
80
+ print(f"FRAME 1: {input_frame_1}")
81
+ input_frame_2 = read_image(str(frameB), ImageReadMode.UNCHANGED)
82
+ print(f"FRAME 1: {input_frame_2}")
83
+
84
+ #img1_batch = torch.stack([frames[0]])
85
+ #img2_batch = torch.stack([frames[1]])
86
+
87
+ img1_batch = torch.stack([input_frame_1])
88
+ img2_batch = torch.stack([input_frame_2])
89
+
90
+ print(f"FRAME AFTER stack: {img1_batch}")
91
+
92
+ weights = Raft_Large_Weights.DEFAULT
93
+ transforms = weights.transforms()
94
+
95
+
96
+ def preprocess(img1_batch, img2_batch):
97
+ img1_batch = F.resize(img1_batch, size=[520, 960])
98
+ img2_batch = F.resize(img2_batch, size=[520, 960])
99
+ return transforms(img1_batch, img2_batch)
100
+
101
+
102
+ img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
103
+
104
+ print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")
105
+
106
+
107
+ ####################################
108
+ # Estimating Optical flow using RAFT
109
+ # ----------------------------------
110
+ # We will use our RAFT implementation from
111
+ # :func:`~torchvision.models.optical_flow.raft_large`, which follows the same
112
+ # architecture as the one described in the `original paper <https://arxiv.org/abs/2003.12039>`_.
113
+ # We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
114
+ # builder, which is smaller and faster to run, sacrificing a bit of accuracy.
115
+
116
+
117
+
118
+ # If you can, run this example on a GPU, it will be a lot faster.
119
+ device = "cuda" if torch.cuda.is_available() else "cpu"
120
+
121
+ model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
122
+ model = model.eval()
123
+
124
+ list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
125
+ print(f"list_of_flows type = {type(list_of_flows)}")
126
+ print(f"list_of_flows length = {len(list_of_flows)} = number of iterations of the model")
127
+
128
+ ####################################
129
+ # The RAFT model outputs lists of predicted flows where each entry is a
130
+ # (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
131
+ # in the model. For more details on the iterative nature of the model, please
132
+ # refer to the `original paper <https://arxiv.org/abs/2003.12039>`_. Here, we
133
+ # are only interested in the final predicted flows (they are the most acccurate
134
+ # ones), so we will just retrieve the last item in the list.
135
+ #
136
+ # As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H,
137
+ # W) for batches of flows) where each entry corresponds to the horizontal and
138
+ # vertical displacement of each pixel from the first image to the second image.
139
+ # Note that the predicted flows are in "pixel" unit, they are not normalized
140
+ # w.r.t. the dimensions of the images.
141
+ predicted_flows = list_of_flows[-1]
142
+ print(f"predicted_flows dtype = {predicted_flows.dtype}")
143
+ print(f"predicted_flows shape = {predicted_flows.shape} = (N, 2, H, W)")
144
+ print(f"predicted_flows min = {predicted_flows.min()}, predicted_flows max = {predicted_flows.max()}")
145
+
146
+
147
+ ####################################
148
+ # Visualizing predicted flows
149
+ # ---------------------------
150
+ # Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
151
+ # convert a flow into an RGB image. It also supports batches of flows.
152
+ # each "direction" in the flow will be mapped to a given RGB color. In the
153
+ # images below, pixels with similar colors are assumed by the model to be moving
154
+ # in similar directions. The model is properly able to predict the movement of
155
+ # the ball and the player. Note in particular the different predicted direction
156
+ # of the ball in the first image (going to the left) and in the second image
157
+ # (going up).
158
+
159
+ from torchvision.utils import flow_to_image
160
+
161
+ #flow_imgs = flow_to_image(predicted_flows)
162
+
163
+ #print(flow_imgs)
164
+
165
+ predicted_flow = list_of_flows[-1][0]
166
+ print(f"predicted flow dtype = {predicted_flow.dtype}")
167
+ print(f"predicted flow shape = {predicted_flow.shape}")
168
+
169
+ flow_img = flow_to_image(predicted_flow).to("cpu")
170
+ write_jpeg(flow_img, f"predicted_flow.jpg")
171
+
172
+ flo_file = write_flo(predicted_flow, "flofile.flo")
173
+
174
+ return "predicted_flow.jpg", ["flofile.flo"]
175
+
176
+ description="<p style='text-align:center'>PyTorch way to Generate optical flow image & .flo file from 2 consecutive frames with RAFT model</p>"
177
+
178
+ gr.Interface(fn=infer, inputs=[gr.Image(source="upload", type="filepath", label="frame 1"), gr.Image(source="upload", type="filepath", label="frame 2")], outputs=[gr.Image(label="flow image"), gr.Files(label="flow file")], title="RAFT Optical Flow", description=description).launch()
basket1.jpg ADDED
basket2.jpg ADDED
frame1.jpg ADDED
frame2.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ av
3
+ torch
4
+ torchvision
5
+ pathlib
6
+ matplotlib
7
+ opencv-contrib-python
8
+ scipy
9
+ imageio
10
+ git+https://github.com/huggingface/diffusers.git
11
+ transformers
12
+ git+https://github.com/huggingface/accelerate
13
+ xformers==0.0.16
spacex.mp4 ADDED
Binary file (539 kB). View file