Spaces:
Runtime error
Runtime error
Commit
·
6783bbb
0
Parent(s):
Duplicate from fffiloni/RAFT
Browse filesCo-authored-by: Sylvain Filoni <fffiloni@users.noreply.huggingface.co>
- .gitattributes +34 -0
- README.md +13 -0
- app.py +178 -0
- basket1.jpg +0 -0
- basket2.jpg +0 -0
- frame1.jpg +0 -0
- frame2.jpg +0 -0
- requirements.txt +13 -0
- spacex.mp4 +0 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: RAFT Optical Flow
|
3 |
+
emoji: 😻
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.19.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: fffiloni/RAFT
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
"""
|
4 |
+
=====================================================
|
5 |
+
Optical Flow: Predicting movement with the RAFT model
|
6 |
+
=====================================================
|
7 |
+
|
8 |
+
Optical flow is the task of predicting movement between two images, usually two
|
9 |
+
consecutive frames of a video. Optical flow models take two images as input, and
|
10 |
+
predict a flow: the flow indicates the displacement of every single pixel in the
|
11 |
+
first image, and maps it to its corresponding pixel in the second image. Flows
|
12 |
+
are (2, H, W)-dimensional tensors, where the first axis corresponds to the
|
13 |
+
predicted horizontal and vertical displacements.
|
14 |
+
|
15 |
+
The following example illustrates how torchvision can be used to predict flows
|
16 |
+
using our implementation of the RAFT model. We will also see how to convert the
|
17 |
+
predicted flows to RGB images for visualization.
|
18 |
+
"""
|
19 |
+
|
20 |
+
import cv2
|
21 |
+
import numpy as np
|
22 |
+
import os
|
23 |
+
import sys
|
24 |
+
import torch
|
25 |
+
from PIL import Image
|
26 |
+
import matplotlib.pyplot as plt
|
27 |
+
import torchvision.transforms.functional as F
|
28 |
+
from torchvision.io import read_video, read_image, ImageReadMode
|
29 |
+
from torchvision.models.optical_flow import Raft_Large_Weights
|
30 |
+
from torchvision.models.optical_flow import raft_large
|
31 |
+
from torchvision.io import write_jpeg
|
32 |
+
import torchvision.transforms as T
|
33 |
+
|
34 |
+
import tempfile
|
35 |
+
from pathlib import Path
|
36 |
+
from urllib.request import urlretrieve
|
37 |
+
|
38 |
+
from scipy.interpolate import LinearNDInterpolator
|
39 |
+
from imageio import imread, imwrite
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
def write_flo(flow, filename):
|
44 |
+
"""
|
45 |
+
Write optical flow in Middlebury .flo format
|
46 |
+
|
47 |
+
:param flow: optical flow map
|
48 |
+
:param filename: optical flow file path to be saved
|
49 |
+
:return: None
|
50 |
+
|
51 |
+
from https://github.com/liruoteng/OpticalFlowToolkit/
|
52 |
+
|
53 |
+
"""
|
54 |
+
# forcing conversion to float32 precision
|
55 |
+
flow = flow.cpu().data.numpy()
|
56 |
+
flow = flow.astype(np.float32)
|
57 |
+
f = open(filename, 'wb')
|
58 |
+
magic = np.array([202021.25], dtype=np.float32)
|
59 |
+
(height, width) = flow.shape[0:2]
|
60 |
+
w = np.array([width], dtype=np.int32)
|
61 |
+
h = np.array([height], dtype=np.int32)
|
62 |
+
magic.tofile(f)
|
63 |
+
w.tofile(f)
|
64 |
+
h.tofile(f)
|
65 |
+
flow.tofile(f)
|
66 |
+
f.close()
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
def infer(frameA, frameB):
|
71 |
+
#video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
|
72 |
+
#video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
|
73 |
+
#_ = urlretrieve(video_url, video_path)
|
74 |
+
|
75 |
+
#frames, _, _ = read_video(str("./spacex.mp4"), output_format="TCHW")
|
76 |
+
#print(f"FRAME BEFORE stack: {frames[100]}")
|
77 |
+
|
78 |
+
|
79 |
+
input_frame_1 = read_image(str(frameA), ImageReadMode.UNCHANGED)
|
80 |
+
print(f"FRAME 1: {input_frame_1}")
|
81 |
+
input_frame_2 = read_image(str(frameB), ImageReadMode.UNCHANGED)
|
82 |
+
print(f"FRAME 1: {input_frame_2}")
|
83 |
+
|
84 |
+
#img1_batch = torch.stack([frames[0]])
|
85 |
+
#img2_batch = torch.stack([frames[1]])
|
86 |
+
|
87 |
+
img1_batch = torch.stack([input_frame_1])
|
88 |
+
img2_batch = torch.stack([input_frame_2])
|
89 |
+
|
90 |
+
print(f"FRAME AFTER stack: {img1_batch}")
|
91 |
+
|
92 |
+
weights = Raft_Large_Weights.DEFAULT
|
93 |
+
transforms = weights.transforms()
|
94 |
+
|
95 |
+
|
96 |
+
def preprocess(img1_batch, img2_batch):
|
97 |
+
img1_batch = F.resize(img1_batch, size=[520, 960])
|
98 |
+
img2_batch = F.resize(img2_batch, size=[520, 960])
|
99 |
+
return transforms(img1_batch, img2_batch)
|
100 |
+
|
101 |
+
|
102 |
+
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
|
103 |
+
|
104 |
+
print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")
|
105 |
+
|
106 |
+
|
107 |
+
####################################
|
108 |
+
# Estimating Optical flow using RAFT
|
109 |
+
# ----------------------------------
|
110 |
+
# We will use our RAFT implementation from
|
111 |
+
# :func:`~torchvision.models.optical_flow.raft_large`, which follows the same
|
112 |
+
# architecture as the one described in the `original paper <https://arxiv.org/abs/2003.12039>`_.
|
113 |
+
# We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
|
114 |
+
# builder, which is smaller and faster to run, sacrificing a bit of accuracy.
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
# If you can, run this example on a GPU, it will be a lot faster.
|
119 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
120 |
+
|
121 |
+
model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
|
122 |
+
model = model.eval()
|
123 |
+
|
124 |
+
list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
|
125 |
+
print(f"list_of_flows type = {type(list_of_flows)}")
|
126 |
+
print(f"list_of_flows length = {len(list_of_flows)} = number of iterations of the model")
|
127 |
+
|
128 |
+
####################################
|
129 |
+
# The RAFT model outputs lists of predicted flows where each entry is a
|
130 |
+
# (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
|
131 |
+
# in the model. For more details on the iterative nature of the model, please
|
132 |
+
# refer to the `original paper <https://arxiv.org/abs/2003.12039>`_. Here, we
|
133 |
+
# are only interested in the final predicted flows (they are the most acccurate
|
134 |
+
# ones), so we will just retrieve the last item in the list.
|
135 |
+
#
|
136 |
+
# As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H,
|
137 |
+
# W) for batches of flows) where each entry corresponds to the horizontal and
|
138 |
+
# vertical displacement of each pixel from the first image to the second image.
|
139 |
+
# Note that the predicted flows are in "pixel" unit, they are not normalized
|
140 |
+
# w.r.t. the dimensions of the images.
|
141 |
+
predicted_flows = list_of_flows[-1]
|
142 |
+
print(f"predicted_flows dtype = {predicted_flows.dtype}")
|
143 |
+
print(f"predicted_flows shape = {predicted_flows.shape} = (N, 2, H, W)")
|
144 |
+
print(f"predicted_flows min = {predicted_flows.min()}, predicted_flows max = {predicted_flows.max()}")
|
145 |
+
|
146 |
+
|
147 |
+
####################################
|
148 |
+
# Visualizing predicted flows
|
149 |
+
# ---------------------------
|
150 |
+
# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
|
151 |
+
# convert a flow into an RGB image. It also supports batches of flows.
|
152 |
+
# each "direction" in the flow will be mapped to a given RGB color. In the
|
153 |
+
# images below, pixels with similar colors are assumed by the model to be moving
|
154 |
+
# in similar directions. The model is properly able to predict the movement of
|
155 |
+
# the ball and the player. Note in particular the different predicted direction
|
156 |
+
# of the ball in the first image (going to the left) and in the second image
|
157 |
+
# (going up).
|
158 |
+
|
159 |
+
from torchvision.utils import flow_to_image
|
160 |
+
|
161 |
+
#flow_imgs = flow_to_image(predicted_flows)
|
162 |
+
|
163 |
+
#print(flow_imgs)
|
164 |
+
|
165 |
+
predicted_flow = list_of_flows[-1][0]
|
166 |
+
print(f"predicted flow dtype = {predicted_flow.dtype}")
|
167 |
+
print(f"predicted flow shape = {predicted_flow.shape}")
|
168 |
+
|
169 |
+
flow_img = flow_to_image(predicted_flow).to("cpu")
|
170 |
+
write_jpeg(flow_img, f"predicted_flow.jpg")
|
171 |
+
|
172 |
+
flo_file = write_flo(predicted_flow, "flofile.flo")
|
173 |
+
|
174 |
+
return "predicted_flow.jpg", ["flofile.flo"]
|
175 |
+
|
176 |
+
description="<p style='text-align:center'>PyTorch way to Generate optical flow image & .flo file from 2 consecutive frames with RAFT model</p>"
|
177 |
+
|
178 |
+
gr.Interface(fn=infer, inputs=[gr.Image(source="upload", type="filepath", label="frame 1"), gr.Image(source="upload", type="filepath", label="frame 2")], outputs=[gr.Image(label="flow image"), gr.Files(label="flow file")], title="RAFT Optical Flow", description=description).launch()
|
basket1.jpg
ADDED
![]() |
basket2.jpg
ADDED
![]() |
frame1.jpg
ADDED
![]() |
frame2.jpg
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
+
av
|
3 |
+
torch
|
4 |
+
torchvision
|
5 |
+
pathlib
|
6 |
+
matplotlib
|
7 |
+
opencv-contrib-python
|
8 |
+
scipy
|
9 |
+
imageio
|
10 |
+
git+https://github.com/huggingface/diffusers.git
|
11 |
+
transformers
|
12 |
+
git+https://github.com/huggingface/accelerate
|
13 |
+
xformers==0.0.16
|
spacex.mp4
ADDED
Binary file (539 kB). View file
|
|