CodeGoat24 commited on
Commit
9a89940
·
verified ·
1 Parent(s): 9f99a98

Update leaderboard_data.json

Browse files
Files changed (1) hide show
  1. leaderboard_data.json +410 -0
leaderboard_data.json CHANGED
@@ -1,5 +1,415 @@
1
  {
2
  "leaderboard": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "model": "GPT-4o",
5
  "link": "https://platform.openai.com/docs/guides/image-generation",
 
1
  {
2
  "leaderboard": [
3
+ {
4
+ "model": "BLIP-3o",
5
+ "link": "https://arxiv.org/pdf/2505.09568",
6
+ "hf": "https://huggingface.co/BLIP3o/BLIP3o-Model-8B",
7
+ "open_source": true,
8
+ "release_date": "2025-5",
9
+
10
+ "Overall": 59.87,
11
+ "Style": 92.80,
12
+ "World Knowledge": 80.22,
13
+ "Attribute-Overall": 63.89,
14
+ "Quantity": 51.39,
15
+ "Expression": 60.26,
16
+ "Material": 64.62,
17
+ "Size": 75.00,
18
+ "Shape": 54.37,
19
+ "Color": 81.67,
20
+
21
+ "Action-Overall": 63.97,
22
+ "Hand": 58.33,
23
+ "Full body": 70.11,
24
+ "Animal": 70.59,
25
+ "Non Contact": 60.20,
26
+ "Contact": 51.79,
27
+ "State": 71.70,
28
+
29
+ "Relationship-Overall": 66.50,
30
+ "Composition": 70.61,
31
+ "Similarity": 60.00,
32
+ "Inclusion": 67.39,
33
+ "Comparison": 64.84,
34
+
35
+ "Compound-Overall": 53.74,
36
+ "Imagination": 61.73,
37
+ "Feature matching": 45.57,
38
+
39
+ "Grammar-Overall": 68.58,
40
+ "Pronoun Reference": 79.04,
41
+ "Consistency": 61.11,
42
+ "Negation": 63.85,
43
+
44
+ "Layout-Overall": 68.47,
45
+ "2D": 72.79,
46
+ "3D": 64.02,
47
+
48
+ "Logical Reasoning": 39.55,
49
+
50
+ "Text": 1.15
51
+ },
52
+ {
53
+ "model": "CogVew4",
54
+ "link": "https://arxiv.org/pdf/2403.05121",
55
+ "hf": "https://huggingface.co/zai-org/CogView4-6B",
56
+ "open_source": true,
57
+ "release_date": "2024-3",
58
+
59
+ "Overall": 56.30,
60
+ "Style": 82.00,
61
+ "World Knowledge": 83.07,
62
+ "Attribute-Overall": 63.25,
63
+ "Quantity": 71.53,
64
+ "Expression": 44.23,
65
+ "Material": 55.19,
66
+ "Size": 72.22,
67
+ "Shape": 57.50,
68
+ "Color": 89.17,
69
+
70
+ "Action-Overall": 57.51,
71
+ "Hand": 53.85,
72
+ "Full body": 59.78,
73
+ "Animal": 68.38,
74
+ "Non Contact": 50.51,
75
+ "Contact": 51.19,
76
+ "State": 62.74,
77
+
78
+ "Relationship-Overall": 62.44,
79
+ "Composition": 60.47,
80
+ "Similarity": 60.00,
81
+ "Inclusion": 69.57,
82
+ "Comparison": 60.16,
83
+
84
+ "Compound-Overall": 44.72,
85
+ "Imagination": 47.19,
86
+ "Feature matching": 42.19,
87
+
88
+ "Grammar-Overall": 54.81,
89
+ "Pronoun Reference": 69.49,
90
+ "Consistency": 56.02,
91
+ "Negation": 38.46,
92
+
93
+ "Layout-Overall": 69.22,
94
+ "2D": 77.21,
95
+ "3D": 60.98,
96
+
97
+ "Logical Reasoning": 28.18,
98
+
99
+ "Text": 17.82
100
+ },
101
+ {
102
+ "model": "Hunyuan-DiT",
103
+ "link": "https://arxiv.org/pdf/2405.08748",
104
+ "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT",
105
+ "open_source": true,
106
+ "release_date": "2024-5",
107
+
108
+ "Overall": 51.38,
109
+
110
+ "Style": 94.10,
111
+
112
+ "World Knowledge": 80.70,
113
+
114
+ "Attribute-Overall": 62.71,
115
+ "Quantity": 67.36,
116
+ "Expression": 44.23,
117
+ "Material": 71.70,
118
+ "Size": 61.81,
119
+ "Shape": 47.50,
120
+ "Color": 86.67,
121
+
122
+ "Action-Overall": 49.05,
123
+ "Hand": 35.90,
124
+ "Full body": 54.89,
125
+ "Animal": 54.41,
126
+ "Non Contact": 46.94,
127
+ "Contact": 35.71,
128
+ "State": 62.74,
129
+
130
+ "Relationship-Overall": 59.64,
131
+ "Composition": 60.14,
132
+ "Similarity": 64.44,
133
+ "Inclusion": 60.33,
134
+ "Comparison": 50.78,
135
+
136
+ "Compound-Overall": 41.62,
137
+ "Imagination": 46.68,
138
+ "Feature matching": 36.46,
139
+
140
+ "Grammar-Overall": 55.48,
141
+ "Pronoun Reference": 62.87,
142
+ "Consistency": 57.87,
143
+ "Negation": 45.77,
144
+
145
+ "Layout-Overall": 44.78,
146
+ "2D": 39.34,
147
+ "3D": 50.38,
148
+
149
+ "Logical Reasoning": 24.55,
150
+
151
+ "Text": 1.15
152
+ },
153
+ {
154
+ "model": "Janus",
155
+ "link": "https://arxiv.org/pdf/2410.13848",
156
+ "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B",
157
+ "open_source": true,
158
+ "release_date": "2024-10",
159
+
160
+ "Overall": 51.23,
161
+
162
+ "Style": 89.90,
163
+
164
+ "World Knowledge": 73.58,
165
+
166
+ "Attribute-Overall": 54.81,
167
+ "Quantity": 37.50,
168
+ "Expression": 37.82,
169
+ "Material": 58.96,
170
+ "Size": 65.97,
171
+ "Shape": 47.50,
172
+ "Color": 86.67,
173
+
174
+ "Action-Overall": 50.38,
175
+ "Hand": 32.69,
176
+ "Full body": 51.63,
177
+ "Animal": 61.76,
178
+ "Non Contact": 48.47,
179
+ "Contact": 38.10,
180
+ "State": 66.51,
181
+
182
+ "Relationship-Overall": 55.08,
183
+ "Composition": 56.76,
184
+ "Similarity": 53.89,
185
+ "Inclusion": 59.24,
186
+ "Comparison": 46.88,
187
+
188
+ "Compound-Overall": 46.65,
189
+ "Imagination": 58.16,
190
+ "Feature matching": 34.90,
191
+
192
+ "Grammar-Overall": 59.09,
193
+ "Pronoun Reference": 66.18,
194
+ "Consistency": 51.39,
195
+ "Negation": 58.08,
196
+
197
+ "Layout-Overall": 54.85,
198
+ "2D": 57.72,
199
+ "3D": 51.89,
200
+
201
+ "Logical Reasoning": 26.82,
202
+
203
+ "Text": 1.15
204
+ },
205
+ {
206
+ "model": "Janus-flow",
207
+ "link": "https://arxiv.org/pdf/2411.07975",
208
+ "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B",
209
+ "open_source": true,
210
+ "release_date": "2024-11",
211
+
212
+ "Overall": 46.39,
213
+
214
+ "Style": 86.20,
215
+
216
+ "World Knowledge": 62.50,
217
+
218
+ "Attribute-Overall": 47.97,
219
+ "Quantity": 43.06,
220
+ "Expression": 30.77,
221
+ "Material": 55.19,
222
+ "Size": 55.56,
223
+ "Shape": 30.00,
224
+ "Color": 78.33,
225
+
226
+ "Action-Overall": 43.35,
227
+ "Hand": 23.08,
228
+ "Full body": 48.37,
229
+ "Animal": 58.82,
230
+ "Non Contact": 36.73,
231
+ "Contact": 36.31,
232
+ "State": 55.66,
233
+
234
+ "Relationship-Overall": 50.00,
235
+ "Composition": 59.80,
236
+ "Similarity": 38.89,
237
+ "Inclusion": 51.63,
238
+ "Comparison": 40.62,
239
+
240
+ "Compound-Overall": 45.10,
241
+ "Imagination": 57.65,
242
+ "Feature matching": 32.29,
243
+
244
+ "Grammar-Overall": 60.29,
245
+ "Pronoun Reference": 66.18,
246
+ "Consistency": 48.61,
247
+ "Negation": 63.85,
248
+
249
+ "Layout-Overall": 46.46,
250
+ "2D": 49.26,
251
+ "3D": 43.56,
252
+
253
+ "Logical Reasoning": 21.14,
254
+
255
+ "Text": 0.86
256
+ },
257
+ {
258
+ "model": "Emu-3",
259
+ "link": "https://arxiv.org/pdf/2409.18869",
260
+ "hf": "https://huggingface.co/BAAI/Emu3-Gen",
261
+ "open_source": true,
262
+ "release_date": "2024-09",
263
+
264
+ "Overall": 46.02,
265
+
266
+ "Style": 86.80,
267
+
268
+ "World Knowledge": 77.06,
269
+
270
+ "Attribute-Overall": 51.39,
271
+ "Quantity": 44.44,
272
+ "Expression": 45.51,
273
+ "Material": 53.77,
274
+ "Size": 43.06,
275
+ "Shape": 46.25,
276
+ "Color": 80.00,
277
+
278
+ "Action-Overall": 40.11,
279
+ "Hand": 25.00,
280
+ "Full body": 47.28,
281
+ "Animal": 50.74,
282
+ "Non Contact": 35.20,
283
+ "Contact": 27.98,
284
+ "State": 52.36,
285
+
286
+ "Relationship-Overall": 49.75,
287
+ "Composition": 56.76,
288
+ "Similarity": 46.67,
289
+ "Inclusion": 48.37,
290
+ "Comparison": 39.84,
291
+
292
+ "Compound-Overall": 36.86,
293
+ "Imagination": 41.33,
294
+ "Feature matching": 32.29,
295
+
296
+ "Grammar-Overall": 52.94,
297
+ "Pronoun Reference": 59.56,
298
+ "Consistency": 53.70,
299
+ "Negation": 45.38,
300
+
301
+ "Layout-Overall": 44.78,
302
+ "2D": 45.22,
303
+ "3D": 44.32,
304
+
305
+ "Logical Reasoning": 19.32,
306
+
307
+ "Text": 1.15
308
+ },
309
+ {
310
+ "model": "Playground2.5",
311
+ "link": "https://arxiv.org/pdf/2402.17245",
312
+ "hf": "https://huggingface.co/playgroundai/playground-v2-512px-base",
313
+ "open_source": true,
314
+ "release_date": "2024-02",
315
+
316
+ "Overall": 45.61,
317
+
318
+ "Style": 89.50,
319
+
320
+ "World Knowledge": 76.11,
321
+
322
+ "Attribute-Overall": 52.78,
323
+ "Quantity": 58.33,
324
+ "Expression": 43.59,
325
+ "Material": 57.08,
326
+ "Size": 44.44,
327
+ "Shape": 41.25,
328
+ "Color": 75.83,
329
+
330
+ "Action-Overall": 42.68,
331
+ "Hand": 28.85,
332
+ "Full body": 50.00,
333
+ "Animal": 52.21,
334
+ "Non Contact": 35.20,
335
+ "Contact": 29.17,
336
+ "State": 58.02,
337
+
338
+ "Relationship-Overall": 51.52,
339
+ "Composition": 60.14,
340
+ "Similarity": 49.44,
341
+ "Inclusion": 48.37,
342
+ "Comparison": 39.06,
343
+
344
+ "Compound-Overall": 35.44,
345
+ "Imagination": 43.88,
346
+ "Feature matching": 26.82,
347
+
348
+ "Grammar-Overall": 53.21,
349
+ "Pronoun Reference": 58.82,
350
+ "Consistency": 50.00,
351
+ "Negation": 50.00,
352
+
353
+ "Layout-Overall": 37.13,
354
+ "2D": 34.56,
355
+ "3D": 39.77,
356
+
357
+ "Logical Reasoning": 16.59,
358
+
359
+ "Text": 1.15
360
+ },
361
+ {
362
+ "model": "SDXL",
363
+ "link": "https://arxiv.org/pdf/2307.01952",
364
+ "hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
365
+ "open_source": true,
366
+ "release_date": "2023-07",
367
+
368
+ "Overall": 39.75,
369
+
370
+ "Style": 87.40,
371
+
372
+ "World Knowledge": 72.63,
373
+
374
+ "Attribute-Overall": 44.34,
375
+ "Quantity": 44.44,
376
+ "Expression": 25.00,
377
+ "Material": 52.83,
378
+ "Size": 44.44,
379
+ "Shape": 33.75,
380
+ "Color": 68.33,
381
+
382
+ "Action-Overall": 34.22,
383
+ "Hand": 19.23,
384
+ "Full body": 35.33,
385
+ "Animal": 43.38,
386
+ "Non Contact": 26.53,
387
+ "Contact": 24.40,
388
+ "State": 53.30,
389
+
390
+ "Relationship-Overall": 44.92,
391
+ "Composition": 53.72,
392
+ "Similarity": 38.33,
393
+ "Inclusion": 39.67,
394
+ "Comparison": 41.41,
395
+
396
+ "Compound-Overall": 26.68,
397
+ "Imagination": 33.93,
398
+ "Feature matching": 19.27,
399
+
400
+ "Grammar-Overall": 47.33,
401
+ "Pronoun Reference": 50.37,
402
+ "Consistency": 42.59,
403
+ "Negation": 48.08,
404
+
405
+ "Layout-Overall": 29.85,
406
+ "2D": 26.47,
407
+ "3D": 33.33,
408
+
409
+ "Logical Reasoning": 9.55,
410
+
411
+ "Text": 1.15
412
+ },
413
  {
414
  "model": "GPT-4o",
415
  "link": "https://platform.openai.com/docs/guides/image-generation",