kutipense commited on
Commit
7fff87a
·
1 Parent(s): 18dbcb5
.gitattributes CHANGED
@@ -33,8 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- *.jpg filter=lfs diff=lfs merge=lfs -text
37
- *.md filter=lfs diff=lfs merge=lfs -text
38
- *.py filter=lfs diff=lfs merge=lfs -text
39
- *.txt filter=lfs diff=lfs merge=lfs -text
40
  examples/demo19.jpg filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
36
  examples/demo19.jpg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,13 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d2a8df0a0165c80105ab7e07422628914c75a185ab3112df72d0e4128481473
3
- size 266
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Depth Anything V2 tinygrad
3
+ emoji: 🌖
4
+ colorFrom: red
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.36.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,3 +1,158 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eedd9efc509f1d9870c4b9f655124348ef968df3b9c8004417bd20e275e179ad
3
- size 4822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from typing import Literal
4
+
5
+ import gradio as gr
6
+ import matplotlib
7
+ import numpy as np
8
+ import safetensors as st
9
+ import spaces
10
+ import torch
11
+ from gradio_imageslider import ImageSlider
12
+ from huggingface_hub import hf_hub_download
13
+ from PIL import Image
14
+ from tinygrad import Tensor, nn
15
+ from transform import image2tensor
16
+
17
+ from dpt import DPTv2, DPTv2Config
18
+
19
+ css = """
20
+ #img-display-container {
21
+ max-height: 100vh;
22
+ }
23
+ #img-display-input {
24
+ max-height: 80vh;
25
+ }
26
+ #img-display-output {
27
+ max-height: 80vh;
28
+ }
29
+ #download {
30
+ height: 62px;
31
+ }
32
+ """
33
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
34
+
35
+
36
+ def get_config(m_size: Literal["vits", "vitb", "vitl", "vitg"]):
37
+ return DPTv2Config(img_size=518, patch_size=14, in_channels=3, mlp_ratio=4, **model_configs[m_size])
38
+
39
+
40
+ model_configs = {
41
+ "vits": {
42
+ "indermediate_layers": [2, 5, 8, 11],
43
+ "depth": 12,
44
+ "num_heads": 6,
45
+ "embed_dim": 384,
46
+ "features": 64,
47
+ "out_channels": [48, 96, 192, 384],
48
+ },
49
+ "vitb": {
50
+ "indermediate_layers": [2, 5, 8, 11],
51
+ "depth": 12,
52
+ "num_heads": 12,
53
+ "embed_dim": 768,
54
+ "features": 128,
55
+ "out_channels": [96, 192, 384, 768],
56
+ },
57
+ "vitl": {
58
+ "indermediate_layers": [4, 11, 17, 23],
59
+ "depth": 24,
60
+ "num_heads": 16,
61
+ "embed_dim": 1024,
62
+ "features": 256,
63
+ "out_channels": [256, 512, 1024, 1024],
64
+ },
65
+ }
66
+
67
+ encoder2name = {
68
+ "vits": "Small",
69
+ "vitb": "Base",
70
+ "vitl": "Large",
71
+ }
72
+
73
+ encoder = "vits"
74
+ filepath = hf_hub_download(
75
+ repo_id=f"depth-anything/Depth-Anything-V2-{encoder2name[encoder]}-hf",
76
+ filename=f"{encoder}.safetensors",
77
+ repo_type="model",
78
+ )
79
+
80
+ config = get_config(encoder)
81
+ model = DPTv2(config)
82
+
83
+ Tensor.no_grad = True
84
+ with st.safe_open(f"{encoder}.safetensors", "numpy") as f:
85
+ tensors = {key: Tensor(f.get_tensor(key)) for key in f.keys()}
86
+ nn.state.load_state_dict(model, tensors, verbose=False, strict=True, consume=True)
87
+
88
+
89
+ title = "# Depth Anything V2 tinygrad"
90
+ description = """Demo for **Depth Anything V2 tinygrad**. Refer to [github](https://github.com/kutipense/Depth-Anything-V2-tinygrad) for source.
91
+ Please also refer to original [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), and [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details."""
92
+
93
+
94
+ @spaces.GPU
95
+ def predict_depth(image):
96
+ image, (h, w) = image2tensor(image, input_size=config.img_size)
97
+ output = model(image)
98
+ output = output.interpolate((h, w), mode="linear", align_corners=True).realize()
99
+ output = output.numpy()[0, 0]
100
+ return output
101
+
102
+
103
+ with gr.Blocks(css=css) as demo:
104
+ gr.Markdown(title)
105
+ gr.Markdown(description)
106
+ gr.Markdown("### Depth Prediction demo")
107
+
108
+ with gr.Row():
109
+ input_image = gr.Image(label="Input Image", type="numpy", elem_id="img-display-input")
110
+ depth_image_slider = ImageSlider(
111
+ label="Depth Map with Slider View", elem_id="img-display-output", position=0.5
112
+ )
113
+ submit = gr.Button(value="Compute Depth")
114
+ gray_depth_file = gr.File(
115
+ label="Grayscale depth map",
116
+ elem_id="download",
117
+ )
118
+ raw_file = gr.File(
119
+ label="16-bit raw output (can be considered as disparity)",
120
+ elem_id="download",
121
+ )
122
+
123
+ cmap = matplotlib.colormaps.get_cmap("Spectral_r")
124
+
125
+ def on_submit(image):
126
+ original_image = image.copy()
127
+
128
+ depth = predict_depth(image[:, :, ::-1])
129
+
130
+ raw_depth = Image.fromarray(depth.astype("uint16"))
131
+ tmp_raw_depth = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
132
+ raw_depth.save(tmp_raw_depth.name)
133
+
134
+ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
135
+ depth = depth.astype(np.uint8)
136
+ colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)
137
+
138
+ gray_depth = Image.fromarray(depth)
139
+ tmp_gray_depth = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
140
+ gray_depth.save(tmp_gray_depth.name)
141
+
142
+ return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]
143
+
144
+ submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file])
145
+
146
+ example_files = os.listdir("assets/examples")
147
+ example_files.sort()
148
+ example_files = [os.path.join("assets/examples", filename) for filename in example_files]
149
+ examples = gr.Examples(
150
+ examples=example_files,
151
+ inputs=[input_image],
152
+ outputs=[depth_image_slider, gray_depth_file, raw_file],
153
+ fn=on_submit,
154
+ )
155
+
156
+
157
+ if __name__ == "__main__":
158
+ demo.queue().launch(share=True)
dpt.py CHANGED
@@ -1,3 +1,240 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:813608b8524baebc1081e684fdab88b93160a95634591cd4273607fe022d3862
3
- size 8535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ from tinygrad import Tensor, nn
4
+
5
+
6
+ @dataclass
7
+ class DPTv2Config:
8
+ img_size: int
9
+ patch_size: int
10
+ in_channels: int
11
+ embed_dim: int
12
+ depth: int
13
+ mlp_ratio: int
14
+ num_heads: int
15
+ features: int
16
+ out_channels: list[int]
17
+ indermediate_layers: list[int]
18
+
19
+
20
+ class PatchEmbeddings:
21
+ def __init__(self, config: DPTv2Config):
22
+ self.projection = nn.Conv2d(
23
+ config.in_channels, config.embed_dim, kernel_size=config.patch_size, stride=config.patch_size
24
+ )
25
+
26
+ def __call__(self, x: Tensor) -> Tensor:
27
+ return self.projection(x).flatten(2).transpose(1, 2)
28
+
29
+
30
+ class Embeddings:
31
+ def __init__(self, config: DPTv2Config, num_tokens=1):
32
+ num_patches = (config.img_size // config.patch_size) ** 2
33
+
34
+ self.patch_embeddings = PatchEmbeddings(config)
35
+ self.cls_token = Tensor.zeros(1, 1, config.embed_dim)
36
+ self.mask_token = Tensor.zeros(1, config.embed_dim) # unused
37
+ self.position_embeddings = Tensor.zeros(1, num_patches + num_tokens, config.embed_dim)
38
+
39
+ def __call__(self, x: Tensor) -> Tensor:
40
+ x = self.patch_embeddings(x)
41
+ x = Tensor.cat(self.cls_token.expand(x.shape[0], -1, -1), x, dim=1)
42
+ x = x + self.position_embeddings
43
+
44
+ return x
45
+
46
+
47
+ class Attention:
48
+ def __init__(self, config: DPTv2Config):
49
+ self.num_heads = config.num_heads
50
+ self.scale = (config.embed_dim // config.num_heads) ** -0.5
51
+
52
+ self.query = nn.Linear(config.embed_dim, config.embed_dim)
53
+ self.key = nn.Linear(config.embed_dim, config.embed_dim)
54
+ self.value = nn.Linear(config.embed_dim, config.embed_dim)
55
+
56
+ def __call__(self, x: Tensor) -> Tensor:
57
+ B, N, C = x.shape
58
+ ch = C // self.num_heads
59
+ q = self.query(x).reshape(B, N, self.num_heads, ch).transpose(2, 1)
60
+ k = self.key(x).reshape(B, N, self.num_heads, ch).transpose(2, 1)
61
+ v = self.value(x).reshape(B, N, self.num_heads, ch).transpose(2, 1)
62
+
63
+ attn: Tensor = (q @ k.transpose(-2, -1)) * self.scale
64
+ attn = attn.softmax(axis=-1)
65
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
66
+
67
+ return x
68
+
69
+
70
+ class MLP:
71
+ def __init__(self, config: DPTv2Config):
72
+ in_features = config.embed_dim
73
+ hidden_features = int(config.embed_dim * config.mlp_ratio)
74
+ self.fc1 = nn.Linear(in_features, hidden_features)
75
+ self.fc2 = nn.Linear(hidden_features, in_features)
76
+
77
+ def __call__(self, x: Tensor) -> Tensor:
78
+ return self.fc2(self.fc1(x).gelu())
79
+
80
+
81
+ class Layer:
82
+ def __init__(self, config: DPTv2Config):
83
+ self.attention = Attention(config)
84
+ self.dense = nn.Linear(config.embed_dim, config.embed_dim)
85
+ self.layer_scales = [Tensor.ones(config.embed_dim) * 1e-5 for _ in range(2)]
86
+ self.norms = [nn.LayerNorm(config.embed_dim, eps=1e-6) for _ in range(2)]
87
+ self.mlp = MLP(config)
88
+
89
+ def __call__(self, x: Tensor) -> Tensor:
90
+ x = x + self.layer_scales[0] * self.dense(self.attention(self.norms[0](x)))
91
+ x = x + self.layer_scales[1] * self.mlp(self.norms[1](x))
92
+ return x
93
+
94
+ def _asdict(self):
95
+ return {
96
+ "attention.attention": self.attention,
97
+ "attention.output.dense": self.dense,
98
+ "layer_scale1.lambda1": self.layer_scales[0],
99
+ "layer_scale2.lambda1": self.layer_scales[1],
100
+ "mlp": self.mlp,
101
+ "norm1": self.norms[0],
102
+ "norm2": self.norms[1],
103
+ }
104
+
105
+
106
+ class Encoder:
107
+ def __init__(self, config: DPTv2Config):
108
+ self.layer = [Layer(config) for _ in range(config.depth)]
109
+
110
+ def __call__(self, x: Tensor) -> Tensor:
111
+ outputs = []
112
+ for layer in self.layer:
113
+ x = layer(x)
114
+ outputs.append(x)
115
+ return outputs
116
+
117
+
118
+ class Backbone:
119
+ def __init__(self, config: DPTv2Config):
120
+ self.indermediate_layers = config.indermediate_layers
121
+ self.embeddings = Embeddings(config)
122
+ self.encoder = Encoder(config)
123
+ self.layernorm = nn.LayerNorm(config.embed_dim, eps=1e-6)
124
+
125
+ def __call__(self, x: Tensor) -> Tensor:
126
+ x = self.encoder(self.embeddings(x))
127
+ return [self.layernorm(x[ind]) for ind in self.indermediate_layers]
128
+
129
+
130
+ class Head:
131
+ def __init__(self, config: DPTv2Config):
132
+ in_feats, out_feats = config.features, config.features // 2
133
+ self.conv1 = nn.Conv2d(in_feats, out_feats, kernel_size=3, padding=1)
134
+ self.conv2 = nn.Conv2d(out_feats, 32, kernel_size=3, padding=1)
135
+ self.conv3 = nn.Conv2d(32, 1, kernel_size=1)
136
+
137
+ self.patch_h = self.patch_w = config.img_size // config.patch_size
138
+ self.patch_h = self.patch_w = self.patch_h * config.patch_size
139
+
140
+ def __call__(self, x: Tensor) -> Tensor:
141
+ x = self.conv1(x).interpolate((self.patch_h, self.patch_w), align_corners=True)
142
+ x = self.conv3(self.conv2(x).relu()).relu()
143
+ return x
144
+
145
+
146
+ class ResidualLayer:
147
+ def __init__(self, config: DPTv2Config):
148
+ in_feats = config.features
149
+ self.convolution1 = nn.Conv2d(in_feats, in_feats, kernel_size=3, padding=1)
150
+ self.convolution2 = nn.Conv2d(in_feats, in_feats, kernel_size=3, padding=1)
151
+
152
+ def __call__(self, x: Tensor) -> Tensor:
153
+ return self.convolution2(self.convolution1(x.relu()).relu()) + x
154
+
155
+
156
+ class FusionStage:
157
+ def __init__(self, config: DPTv2Config):
158
+ in_feats = config.features
159
+ self.residual_layer1 = ResidualLayer(config)
160
+ self.residual_layer2 = ResidualLayer(config)
161
+ self.projection = nn.Conv2d(in_feats, in_feats, kernel_size=1)
162
+
163
+ def __call__(self, layer0: Tensor, layer1: Tensor = None, size=None) -> Tensor:
164
+ if layer1 is not None:
165
+ layer0 = layer0 + self.residual_layer1(layer1)
166
+
167
+ layer0 = self.residual_layer2(layer0)
168
+ size = list(map(lambda x: x * 2, layer0.shape[2:])) if size is None else size
169
+ return self.projection(layer0.interpolate(size, align_corners=True))
170
+
171
+
172
+ class ReassembleStage:
173
+ def __init__(self, config: DPTv2Config):
174
+ ins, outs = config.embed_dim, config.out_channels
175
+
176
+ self.projection = [
177
+ nn.Conv2d(in_channels=ins, out_channels=out_channel, kernel_size=1) for out_channel in outs
178
+ ]
179
+
180
+ self.resize_layers = [
181
+ nn.ConvTranspose2d(in_channels=outs[0], out_channels=outs[0], kernel_size=4, stride=4),
182
+ nn.ConvTranspose2d(in_channels=outs[1], out_channels=outs[1], kernel_size=2, stride=2),
183
+ lambda x: x,
184
+ nn.Conv2d(in_channels=outs[3], out_channels=outs[3], kernel_size=3, stride=2, padding=1),
185
+ ]
186
+
187
+ self.patch_h = self.patch_w = config.img_size // config.patch_size
188
+
189
+ def __call__(self, inputs: list[Tensor]) -> list[Tensor]:
190
+ outputs = []
191
+ for i, out in enumerate(inputs):
192
+ x = out[:, 1:] # remove the cls token
193
+ x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], self.patch_h, self.patch_w))
194
+ x = self.resize_layers[i](self.projection[i](x))
195
+ outputs.append(x)
196
+ return outputs
197
+
198
+ def _asdict(self):
199
+ return {
200
+ "layers": [{"projection": p, "resize": r} for p, r in zip(self.projection, self.resize_layers)]
201
+ }
202
+
203
+
204
+ class Neck:
205
+ def __init__(self, config: DPTv2Config):
206
+ self.convs = [
207
+ nn.Conv2d(in_channels=ch, out_channels=config.features, kernel_size=3, padding=1, bias=False)
208
+ for ch in config.out_channels
209
+ ]
210
+
211
+ self.reassemble_stage = ReassembleStage(config)
212
+ self.fusion_stage = [FusionStage(config) for _ in range(4)]
213
+
214
+ def __call__(self, x: Tensor) -> Tensor:
215
+ outputs = self.reassemble_stage(x)
216
+ outputs = [conv(out) for out, conv in zip(outputs, self.convs)]
217
+
218
+ path_4 = self.fusion_stage[0](outputs[3], size=outputs[2].shape[2:])
219
+ path_3 = self.fusion_stage[1](path_4, outputs[2], size=outputs[1].shape[2:])
220
+ path_2 = self.fusion_stage[2](path_3, outputs[1], size=outputs[0].shape[2:])
221
+ path_1 = self.fusion_stage[3](path_2, outputs[0])
222
+
223
+ return path_1
224
+
225
+ def _asdict(self):
226
+ return {
227
+ "convs": self.convs,
228
+ "fusion_stage.layers": self.fusion_stage,
229
+ "reassemble_stage": self.reassemble_stage,
230
+ }
231
+
232
+
233
+ class DPTv2:
234
+ def __init__(self, config):
235
+ self.backbone = Backbone(config)
236
+ self.head = Head(config)
237
+ self.neck = Neck(config)
238
+
239
+ def __call__(self, x: Tensor) -> Tensor:
240
+ return self.head(self.neck(self.backbone(x)))
examples/demo01.jpg CHANGED

Git LFS Details

  • SHA256: 35ef1bbb63f6540e49aa9b6302b9b938be4fe8b9c08c07c3694b02396b0e87e0
  • Pointer size: 131 Bytes
  • Size of remote file: 488 kB
examples/demo02.jpg CHANGED

Git LFS Details

  • SHA256: c1f116034aa5abd5b5470226be2bb03bd938c8affe90389c52d10fe8b1ac7e21
  • Pointer size: 131 Bytes
  • Size of remote file: 511 kB
examples/demo03.jpg CHANGED

Git LFS Details

  • SHA256: 764dffd4d97bbacd620bc005fa86837018393ccb5ffd1059c2245a3cacff7782
  • Pointer size: 131 Bytes
  • Size of remote file: 465 kB
examples/demo04.jpg CHANGED

Git LFS Details

  • SHA256: 3a301f4e0361fe75ca4d256a35062f87eecc3f7655d747c9def3259c86e26a45
  • Pointer size: 131 Bytes
  • Size of remote file: 300 kB
examples/demo05.jpg CHANGED

Git LFS Details

  • SHA256: 50e7e2f057c5a2d27bb09b0b3e814147966e30139ddaf54362c72746a5320339
  • Pointer size: 131 Bytes
  • Size of remote file: 353 kB
examples/demo06.jpg CHANGED

Git LFS Details

  • SHA256: 0fd815bddeab139e7477c948a22fffdf84d9b87f81d77dcf6fd8ef39ebaaafb5
  • Pointer size: 131 Bytes
  • Size of remote file: 783 kB
examples/demo07.jpg CHANGED

Git LFS Details

  • SHA256: 345bec735adc4c238bf14ddf1d182c4881f8ba08814c4f4074c1d79e9e4adc52
  • Pointer size: 131 Bytes
  • Size of remote file: 400 kB
examples/demo08.jpg CHANGED

Git LFS Details

  • SHA256: d32b480349013be5f84521b0df1d6590139163aef8457f051076ed03c7371e6f
  • Pointer size: 131 Bytes
  • Size of remote file: 103 kB
examples/demo09.jpg CHANGED

Git LFS Details

  • SHA256: 6a64033ba69bb408c092dbff811abfbcb0196f1f87541902d03d2a909a0b8ea9
  • Pointer size: 131 Bytes
  • Size of remote file: 410 kB
examples/demo10.jpg CHANGED

Git LFS Details

  • SHA256: bc77f215081f58de8d079e821e2808f6ee2727dfa729c10a5921c186a32c7638
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
examples/demo11.jpg CHANGED

Git LFS Details

  • SHA256: 150ef98e997ee6ff705bd06105c343f76a8f181ef93ff9ceebbd62a3ab6b592b
  • Pointer size: 131 Bytes
  • Size of remote file: 244 kB
examples/demo12.jpg CHANGED

Git LFS Details

  • SHA256: 264458adcf5af6e3733dfda7ef4628c4a1dc49ed249aa8896256d9534a8377c4
  • Pointer size: 131 Bytes
  • Size of remote file: 263 kB
examples/demo13.jpg CHANGED

Git LFS Details

  • SHA256: 9168fc752a002d50138a56621e8de5fab7fed125a978dd293319d28d30993564
  • Pointer size: 131 Bytes
  • Size of remote file: 421 kB
examples/demo14.jpg CHANGED

Git LFS Details

  • SHA256: 01480d952bc950332f0eea31da0777f66d5f285d8edfe2a5f47508f4b260a99f
  • Pointer size: 131 Bytes
  • Size of remote file: 643 kB
examples/demo15.jpg CHANGED

Git LFS Details

  • SHA256: bf60ce3879f627e8886280cc61442174c91908894a5b059681341fed600f7db3
  • Pointer size: 131 Bytes
  • Size of remote file: 769 kB
examples/demo16.jpg CHANGED

Git LFS Details

  • SHA256: a92e51732b38ad8b21b5cbbc6883374bd5ab56bb4907d6c4f1e13307970480ee
  • Pointer size: 131 Bytes
  • Size of remote file: 378 kB
examples/demo17.jpg CHANGED

Git LFS Details

  • SHA256: 7174dcfbbb95a2e581ebf1e14cfbb4bef7a1295ae9cece405c87145223dcb32d
  • Pointer size: 131 Bytes
  • Size of remote file: 153 kB
examples/demo18.jpg CHANGED

Git LFS Details

  • SHA256: 4deeb16dbee40108f194bd87c8621416110427c8ab5fc5ad6a1d9002b2b620c2
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB
examples/demo20.jpg CHANGED

Git LFS Details

  • SHA256: 2958fd1b7018e40b68ccc8d74ff8e50bf143f5046711d57c54eec2a479550ace
  • Pointer size: 131 Bytes
  • Size of remote file: 498 kB
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:29ed775b4357eee3c4f3c7b591810521d94c058e1a7b19600e1b2563b73c6eea
3
- size 95
 
 
 
 
 
1
+ gradio_imageslider
2
+ gradio==4.36.0
3
+ tinygrad
4
+ safetensors
5
+ opencv-python
6
+ matplotlib
7
+ huggingface_hub
transform.py CHANGED
@@ -1,3 +1,185 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff3a6169f6ca0bd3677861765d49e8dfaddcfd57e2797f880a339bf862e05b75
3
- size 7018
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from tinygrad import Tensor
4
+
5
+
6
+ class Resize(object):
7
+ """Resize sample to given size (width, height)."""
8
+
9
+ def __init__(
10
+ self,
11
+ width,
12
+ height,
13
+ resize_target=True,
14
+ keep_aspect_ratio=False,
15
+ ensure_multiple_of=1,
16
+ resize_method="lower_bound",
17
+ image_interpolation_method=cv2.INTER_AREA,
18
+ ):
19
+ """Init.
20
+
21
+ Args:
22
+ width (int): desired output width
23
+ height (int): desired output height
24
+ resize_target (bool, optional):
25
+ True: Resize the full sample (image, mask, target).
26
+ False: Resize image only.
27
+ Defaults to True.
28
+ keep_aspect_ratio (bool, optional):
29
+ True: Keep the aspect ratio of the input sample.
30
+ Output sample might not have the given width and height, and
31
+ resize behaviour depends on the parameter 'resize_method'.
32
+ Defaults to False.
33
+ ensure_multiple_of (int, optional):
34
+ Output width and height is constrained to be multiple of this parameter.
35
+ Defaults to 1.
36
+ resize_method (str, optional):
37
+ "lower_bound": Output will be at least as large as the given size.
38
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
39
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
40
+ Defaults to "lower_bound".
41
+ """
42
+ self.__width = width
43
+ self.__height = height
44
+
45
+ self.__resize_target = resize_target
46
+ self.__keep_aspect_ratio = keep_aspect_ratio
47
+ self.__multiple_of = ensure_multiple_of
48
+ self.__resize_method = resize_method
49
+ self.__image_interpolation_method = image_interpolation_method
50
+
51
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
52
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
53
+
54
+ if max_val is not None and y > max_val:
55
+ y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
56
+
57
+ if y < min_val:
58
+ y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
59
+
60
+ return y
61
+
62
+ def get_size(self, width, height):
63
+ # determine new height and width
64
+ scale_height = self.__height / height
65
+ scale_width = self.__width / width
66
+
67
+ if self.__keep_aspect_ratio:
68
+ if self.__resize_method == "lower_bound":
69
+ # scale such that output size is lower bound
70
+ if scale_width > scale_height:
71
+ # fit width
72
+ scale_height = scale_width
73
+ else:
74
+ # fit height
75
+ scale_width = scale_height
76
+ elif self.__resize_method == "upper_bound":
77
+ # scale such that output size is upper bound
78
+ if scale_width < scale_height:
79
+ # fit width
80
+ scale_height = scale_width
81
+ else:
82
+ # fit height
83
+ scale_width = scale_height
84
+ elif self.__resize_method == "minimal":
85
+ # scale as least as possbile
86
+ if abs(1 - scale_width) < abs(1 - scale_height):
87
+ # fit width
88
+ scale_height = scale_width
89
+ else:
90
+ # fit height
91
+ scale_width = scale_height
92
+ else:
93
+ raise ValueError(f"resize_method {self.__resize_method} not implemented")
94
+
95
+ if self.__resize_method == "lower_bound":
96
+ new_height = self.constrain_to_multiple_of(scale_height * height, min_val=self.__height)
97
+ new_width = self.constrain_to_multiple_of(scale_width * width, min_val=self.__width)
98
+ elif self.__resize_method == "upper_bound":
99
+ new_height = self.constrain_to_multiple_of(scale_height * height, max_val=self.__height)
100
+ new_width = self.constrain_to_multiple_of(scale_width * width, max_val=self.__width)
101
+ elif self.__resize_method == "minimal":
102
+ new_height = self.constrain_to_multiple_of(scale_height * height)
103
+ new_width = self.constrain_to_multiple_of(scale_width * width)
104
+ else:
105
+ raise ValueError(f"resize_method {self.__resize_method} not implemented")
106
+
107
+ return (new_width, new_height)
108
+
109
+ def __call__(self, sample):
110
+ width, height = self.get_size(sample["image"].shape[1], sample["image"].shape[0])
111
+
112
+ # resize sample
113
+ sample["image"] = cv2.resize(
114
+ sample["image"], (width, height), interpolation=self.__image_interpolation_method
115
+ )
116
+
117
+ if self.__resize_target:
118
+ if "depth" in sample:
119
+ sample["depth"] = cv2.resize(
120
+ sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
121
+ )
122
+
123
+ if "mask" in sample:
124
+ sample["mask"] = cv2.resize(
125
+ sample["mask"].astype(np.float32), (width, height), interpolation=cv2.INTER_NEAREST
126
+ )
127
+
128
+ return sample
129
+
130
+
131
+ class NormalizeImage(object):
132
+ """Normlize image by given mean and std."""
133
+
134
+ def __init__(self, mean, std):
135
+ self.__mean = mean
136
+ self.__std = std
137
+
138
+ def __call__(self, sample):
139
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
140
+
141
+ return sample
142
+
143
+
144
+ class PrepareForNet(object):
145
+ """Prepare sample for usage as network input."""
146
+
147
+ def __init__(self):
148
+ pass
149
+
150
+ def __call__(self, sample):
151
+ image = np.transpose(sample["image"], (2, 0, 1))
152
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
153
+
154
+ if "depth" in sample:
155
+ depth = sample["depth"].astype(np.float32)
156
+ sample["depth"] = np.ascontiguousarray(depth)
157
+
158
+ if "mask" in sample:
159
+ sample["mask"] = sample["mask"].astype(np.float32)
160
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
161
+
162
+ return sample
163
+
164
+
165
+ def image2tensor(raw_image, input_size=518):
166
+ transforms = [
167
+ Resize(
168
+ width=input_size,
169
+ height=input_size,
170
+ resize_target=False,
171
+ keep_aspect_ratio=False,
172
+ ensure_multiple_of=14,
173
+ resize_method="lower_bound",
174
+ image_interpolation_method=cv2.INTER_CUBIC,
175
+ ),
176
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
177
+ PrepareForNet(),
178
+ ]
179
+
180
+ image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0
181
+ for transform in transforms:
182
+ image = transform({"image": image})["image"]
183
+ image = Tensor(image).unsqueeze(0)
184
+
185
+ return image, raw_image.shape[:2]