hmb HF Staff commited on
Commit
e3245dd
·
1 Parent(s): 671aa5a
Files changed (5) hide show
  1. package.json +4 -0
  2. pnpm-lock.yaml +0 -0
  3. requirements.txt +3 -0
  4. src/App.svelte +244 -37
  5. src/app.css +1 -0
package.json CHANGED
@@ -16,5 +16,9 @@
16
  "svelte-check": "^4.1.6",
17
  "typescript": "~5.8.3",
18
  "vite": "^6.3.5"
 
 
 
 
19
  }
20
  }
 
16
  "svelte-check": "^4.1.6",
17
  "typescript": "~5.8.3",
18
  "vite": "^6.3.5"
19
+ },
20
+ "dependencies": {
21
+ "@gradio/dataframe": "^0.18.8",
22
+ "@xenova/transformers": "^2.17.2"
23
  }
24
  }
pnpm-lock.yaml ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ transformers
3
+ uvicorn
src/App.svelte CHANGED
@@ -1,47 +1,254 @@
1
  <script lang="ts">
2
- import svelteLogo from './assets/svelte.svg'
3
- import viteLogo from '/vite.svg'
4
- import Counter from './lib/Counter.svelte'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  </script>
6
 
7
- <main>
8
- <div>
9
- <a href="https://vite.dev" target="_blank" rel="noreferrer">
10
- <img src={viteLogo} class="logo" alt="Vite Logo" />
11
- </a>
12
- <a href="https://svelte.dev" target="_blank" rel="noreferrer">
13
- <img src={svelteLogo} class="logo svelte" alt="Svelte Logo" />
14
- </a>
15
- </div>
16
- <h1>Vite + Svelte</h1>
17
-
18
- <div class="card">
19
- <Counter />
20
- </div>
21
-
22
- <p>
23
- Check out <a href="https://github.com/sveltejs/kit#readme" target="_blank" rel="noreferrer">SvelteKit</a>, the official Svelte app framework powered by Vite!
24
- </p>
25
-
26
- <p class="read-the-docs">
27
- Click on the Vite and Svelte logos to learn more
28
- </p>
29
- </main>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  <style>
32
- .logo {
33
- height: 6em;
34
- padding: 1.5em;
35
- will-change: filter;
36
- transition: filter 300ms;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
- .logo:hover {
39
- filter: drop-shadow(0 0 2em #646cffaa);
 
 
 
 
 
 
 
 
40
  }
41
- .logo.svelte:hover {
42
- filter: drop-shadow(0 0 2em #ff3e00aa);
43
  }
44
- .read-the-docs {
45
- color: #888;
 
46
  }
47
  </style>
 
1
  <script lang="ts">
2
+ import Dataframe from '@gradio/dataframe';
3
+
4
+ let rawData = '';
5
+ let cleanedData = '';
6
+ let cleaningSteps: any[] = [];
7
+ let showSteps = false;
8
+ let fileInput: HTMLInputElement;
9
+
10
+
11
+ let inputValue: { data: string[][]; headers: string[] } = { data: [[]], headers: [] };
12
+ let cleanedValue: { data: string[][]; headers: string[] } = { data: [[]], headers: [] };
13
+
14
+
15
+ function parseCSVorTSV(text: string) {
16
+ if (!text) return { data: [[]], headers: [] };
17
+ const lines = text.trim().split(/\r?\n/);
18
+ if (lines.length === 0) return { data: [[]], headers: [] };
19
+ const sep = lines[0].includes('\t') ? '\t' : ',';
20
+ const headers = lines[0].split(sep).map(h => h.trim());
21
+ const data = lines.slice(1).map(line => line.split(sep).map(cell => cell.trim()));
22
+ return { data, headers };
23
+ }
24
+
25
+ function updateInputValueFromRaw() {
26
+ inputValue = parseCSVorTSV(rawData);
27
+ }
28
+
29
+ function updateRawFromInputValue() {
30
+ // Convert inputValue back to CSV string
31
+ if (!inputValue.headers.length) return;
32
+ const sep = ',';
33
+ const lines = [inputValue.headers.join(sep), ...inputValue.data.map(row => row.join(sep))];
34
+ rawData = lines.join('\n');
35
+ }
36
+
37
+ function handleFileUpload(event: Event) {
38
+ const files = (event.target as HTMLInputElement).files;
39
+ if (files && files.length > 0) {
40
+ const reader = new FileReader();
41
+ reader.onload = (e) => {
42
+ rawData = e.target?.result as string;
43
+ updateInputValueFromRaw();
44
+ };
45
+ reader.readAsText(files[0]);
46
+ }
47
+ }
48
+
49
+ function handleInputChange(e: CustomEvent) {
50
+ inputValue = e.detail;
51
+ updateRawFromInputValue();
52
+ }
53
+
54
+ // In-browser AI cleaning using transformers.js (DistilGPT2 as example)
55
+ import { pipeline } from '@xenova/transformers';
56
+ let generator: any = null;
57
+ let loadingModel = false;
58
+
59
+ async function analyzeAndClean() {
60
+ showSteps = false;
61
+ cleaningSteps = [];
62
+ cleanedValue = { data: [[]], headers: [] };
63
+ cleanedData = '';
64
+ loadingModel = true;
65
+ try {
66
+ if (!generator) {
67
+ generator = await pipeline('text-generation', 'Xenova/gpt2');
68
+ }
69
+ loadingModel = false;
70
+ // Prepare a prompt for the model
71
+ const tableString = [inputValue.headers, ...inputValue.data].map(row => row.join(',')).join('\n');
72
+ const prompt = `Clean this table and suggest steps. Table:\n${tableString}\nReturn JSON with keys steps (array of strings) and cleaned (array of arrays, first row is headers).`;
73
+ const output = await generator(prompt, { max_new_tokens: 128 });
74
+ let content = output?.[0]?.generated_text || '';
75
+ let parsed;
76
+ try {
77
+ parsed = JSON.parse(content.match(/\{[\s\S]*\}/)?.[0] || '');
78
+ } catch (e) {
79
+ alert('AI did not return valid cleaning suggestions.');
80
+ return;
81
+ }
82
+ if (parsed && parsed.cleaned && parsed.steps) {
83
+ cleaningSteps = parsed.steps.map((step: string) => ({ step, accepted: true }));
84
+ cleanedValue = {
85
+ headers: parsed.cleaned[0],
86
+ data: parsed.cleaned.slice(1),
87
+ };
88
+ cleanedData = cleanedValue.data.map(row => row.join(',')).join('\n');
89
+ showSteps = true;
90
+ } else {
91
+ alert('AI did not return valid cleaning suggestions.');
92
+ }
93
+ } catch (err) {
94
+ alert('Failed to load or run the model. Please check your internet connection and model support.');
95
+ loadingModel = false;
96
+ }
97
+ }
98
+
99
+ function toggleStep(idx: number) {
100
+ cleaningSteps[idx].accepted = !cleaningSteps[idx].accepted;
101
+ }
102
+
103
+ function exportCleaned() {
104
+ // Export cleanedValue as CSV
105
+ if (!cleanedValue.headers.length) return;
106
+ const sep = ',';
107
+ const lines = [cleanedValue.headers.join(sep), ...cleanedValue.data.map(row => row.join(sep))];
108
+ const csv = lines.join('\n');
109
+ const blob = new Blob([csv], { type: 'text/csv' });
110
+ const url = URL.createObjectURL(blob);
111
+ const a = document.createElement('a');
112
+ a.href = url;
113
+ a.download = 'cleaned_data.csv';
114
+ a.click();
115
+ URL.revokeObjectURL(url);
116
+ }
117
+
118
+ $: updateInputValueFromRaw();
119
  </script>
120
 
121
+ <div class="df-theme">
122
+ <main>
123
+ <h1>AI Data Cleaning Playground</h1>
124
+ <section class="input-section">
125
+ <label for="data-input">Paste your tabular data (CSV/TSV):</label>
126
+ <textarea id="data-input" bind:value={rawData} rows="8" cols="60" placeholder="Paste CSV or TSV data here..." on:input={updateInputValueFromRaw}></textarea>
127
+ <div>
128
+ <input type="file" accept=".csv,.tsv,.txt" bind:this={fileInput} on:change={handleFileUpload} />
129
+ </div>
130
+ <div style="margin: 1rem 0; width: 100%;">
131
+ <Dataframe
132
+ bind:value={inputValue}
133
+ show_search="search"
134
+ show_row_numbers={true}
135
+ show_copy_button={true}
136
+ show_fullscreen_button={true}
137
+ editable={true}
138
+ on:change={handleInputChange}
139
+ />
140
+ </div>
141
+ <button on:click={analyzeAndClean}>Analyze &amp; Clean</button>
142
+ </section>
143
+
144
+ {#if showSteps}
145
+ <section class="steps-section">
146
+ <h2>AI-Suggested Cleaning Steps</h2>
147
+ <ul>
148
+ {#each cleaningSteps as step, idx}
149
+ <li>
150
+ <input type="checkbox" bind:checked={step.accepted} on:change={() => toggleStep(idx)} />
151
+ {step.step}
152
+ </li>
153
+ {/each}
154
+ </ul>
155
+ </section>
156
+ {/if}
157
+
158
+ <section class="dataframes-section">
159
+ <div class="dataframe original">
160
+ <h3>Original Data</h3>
161
+ <Dataframe
162
+ bind:value={inputValue}
163
+ show_search="search"
164
+ show_row_numbers={true}
165
+ show_copy_button={true}
166
+ show_fullscreen_button={true}
167
+ editable={true}
168
+ on:change={handleInputChange}
169
+ />
170
+ </div>
171
+ <div class="dataframe cleaned">
172
+ <h3>Cleaned Data (Preview)</h3>
173
+ <Dataframe
174
+ bind:value={cleanedValue}
175
+ show_search="search"
176
+ show_row_numbers={true}
177
+ show_copy_button={true}
178
+ show_fullscreen_button={true}
179
+ editable={false}
180
+ />
181
+ </div>
182
+ </section>
183
+
184
+ <button on:click={exportCleaned}>Export Cleaned Data</button>
185
+ </main>
186
+ </div>
187
 
188
  <style>
189
+ main {
190
+ max-width: 900px;
191
+ margin: 2rem auto;
192
+ padding: 2rem;
193
+ background: #fff;
194
+ border-radius: 12px;
195
+ box-shadow: 0 2px 8px rgba(0,0,0,0.07);
196
+ }
197
+ h1 {
198
+ text-align: center;
199
+ margin-bottom: 2rem;
200
+ }
201
+ .input-section {
202
+ margin-bottom: 2rem;
203
+ display: flex;
204
+ flex-direction: column;
205
+ gap: 0.5rem;
206
+ align-items: flex-start;
207
+ }
208
+ .steps-section {
209
+ margin-bottom: 2rem;
210
+ background: #f8f9fa;
211
+ padding: 1rem;
212
+ border-radius: 8px;
213
+ }
214
+ .dataframes-section {
215
+ display: flex;
216
+ gap: 2rem;
217
+ justify-content: center;
218
+ margin-bottom: 2rem;
219
+ }
220
+ .dataframe {
221
+ flex: 1;
222
+ display: flex;
223
+ flex-direction: column;
224
+ align-items: center;
225
+ }
226
+ textarea {
227
+ width: 100%;
228
+ font-family: monospace;
229
+ font-size: 1rem;
230
+ border-radius: 6px;
231
+ border: 1px solid #ccc;
232
+ padding: 0.5rem;
233
+ margin-top: 0.5rem;
234
+ resize: vertical;
235
  }
236
+ button {
237
+ margin-top: 1rem;
238
+ padding: 0.5rem 1.5rem;
239
+ font-size: 1rem;
240
+ border-radius: 6px;
241
+ border: none;
242
+ background: #3f51b5;
243
+ color: #fff;
244
+ cursor: pointer;
245
+ transition: background 0.2s;
246
  }
247
+ button:hover {
248
+ background: #283593;
249
  }
250
+ .df-theme {
251
+ --gr-df-table-text: #222 !important;
252
+ background: #fff;
253
  }
254
  </style>
src/app.css CHANGED
@@ -11,6 +11,7 @@
11
  text-rendering: optimizeLegibility;
12
  -webkit-font-smoothing: antialiased;
13
  -moz-osx-font-smoothing: grayscale;
 
14
  }
15
 
16
  a {
 
11
  text-rendering: optimizeLegibility;
12
  -webkit-font-smoothing: antialiased;
13
  -moz-osx-font-smoothing: grayscale;
14
+ color: black;
15
  }
16
 
17
  a {