Spaces:
Sleeping
Sleeping
Update app.py
#16
by
MrUnknown420
- opened
app.py
CHANGED
@@ -2,7 +2,9 @@
|
|
2 |
|
3 |
import os
|
4 |
import json
|
|
|
5 |
import datetime
|
|
|
6 |
import gradio as gr
|
7 |
from transformers import (
|
8 |
AutoTokenizer, AutoModelForSequenceClassification,
|
@@ -19,61 +21,87 @@ os.makedirs("logs", exist_ok=True)
|
|
19 |
os.makedirs("memory", exist_ok=True)
|
20 |
|
21 |
# =========================
|
22 |
-
#
|
23 |
# =========================
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
27 |
|
28 |
-
def
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
if os.path.exists(file):
|
31 |
with open(file, "r") as f:
|
32 |
return json.load(f)
|
33 |
return []
|
34 |
|
35 |
-
def save_memory(
|
36 |
-
file = get_memory_file(
|
37 |
-
memory = load_memory(
|
38 |
memory.append(conversation)
|
39 |
with open(file, "w") as f:
|
40 |
json.dump(memory, f, indent=2)
|
41 |
|
42 |
# =========================
|
43 |
-
#
|
44 |
# =========================
|
45 |
-
def
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
|
50 |
# =========================
|
51 |
# Training Pipeline
|
52 |
# =========================
|
53 |
-
|
54 |
-
|
55 |
-
logging.basicConfig(
|
56 |
-
filename=os.path.join("logs", "app.log"),
|
57 |
-
level=logging.INFO,
|
58 |
-
format="%(asctime)s [%(levelname)s] %(message)s"
|
59 |
-
)
|
60 |
-
log = logging.getLogger(__name__)
|
61 |
-
|
62 |
-
def train_model(model_name, dataset_name, epochs):
|
63 |
try:
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
parts = dataset_name.split(" ")
|
66 |
-
|
67 |
if len(parts) == 2:
|
68 |
dataset_repo, dataset_config = parts
|
69 |
-
dataset = load_dataset(dataset_repo, dataset_config, split="train[:200]") # CPU-friendly
|
70 |
else:
|
71 |
dataset = load_dataset(dataset_name, split="train[:200]")
|
72 |
|
73 |
-
|
74 |
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
77 |
|
78 |
def tokenize_fn(examples):
|
79 |
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
|
@@ -81,131 +109,105 @@ def train_model(model_name, dataset_name, epochs):
|
|
81 |
dataset = dataset.map(tokenize_fn, batched=True)
|
82 |
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
|
83 |
|
|
|
|
|
|
|
84 |
training_args = TrainingArguments(
|
85 |
-
output_dir=
|
86 |
-
|
|
|
87 |
save_strategy="epoch",
|
88 |
-
learning_rate=2e-5,
|
89 |
-
per_device_train_batch_size=4,
|
90 |
num_train_epochs=int(epochs),
|
|
|
91 |
logging_dir="./logs",
|
92 |
logging_steps=10,
|
|
|
|
|
93 |
)
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
trainer = Trainer(
|
96 |
model=model,
|
97 |
args=training_args,
|
98 |
train_dataset=dataset,
|
99 |
tokenizer=tokenizer,
|
|
|
|
|
100 |
)
|
101 |
|
102 |
trainer.train()
|
103 |
-
return "Training complete โ
"
|
104 |
-
except Exception as e:
|
105 |
-
log.error(f"Training failed: {e}")
|
106 |
-
return f"Error during training: {e}"
|
107 |
-
|
108 |
-
# Load model
|
109 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
|
110 |
-
|
111 |
-
# Training arguments
|
112 |
-
training_args = TrainingArguments(
|
113 |
-
output_dir=os.path.join(output_dir, model_name.replace("/", "_")),
|
114 |
-
overwrite_output_dir=True,
|
115 |
-
evaluation_strategy="epoch",
|
116 |
-
save_strategy="epoch",
|
117 |
-
num_train_epochs=epochs,
|
118 |
-
per_device_train_batch_size=8,
|
119 |
-
logging_dir="./logs",
|
120 |
-
logging_steps=10,
|
121 |
-
report_to="none", # prevent wandb errors
|
122 |
-
no_cuda=True # force CPU
|
123 |
-
)
|
124 |
-
|
125 |
-
# Progress tracking
|
126 |
-
progress = {"epoch": 0, "loss": []}
|
127 |
-
|
128 |
-
def compute_metrics(eval_pred):
|
129 |
-
logits, labels = eval_pred
|
130 |
-
preds = logits.argmax(-1)
|
131 |
-
acc = (preds == labels).astype(float).mean().item()
|
132 |
-
return {"accuracy": acc}
|
133 |
-
|
134 |
-
def log_callback(trainer, state, control, **kwargs):
|
135 |
-
if state.is_local_process_zero and state.log_history:
|
136 |
-
last_log = state.log_history[-1]
|
137 |
-
if "loss" in last_log:
|
138 |
-
progress["epoch"] = state.epoch
|
139 |
-
progress["loss"].append(last_log["loss"])
|
140 |
-
log_event(f"Epoch {state.epoch} - Loss: {last_log['loss']}")
|
141 |
-
|
142 |
-
# Trainer
|
143 |
-
trainer = Trainer(
|
144 |
-
model=model,
|
145 |
-
args=training_args,
|
146 |
-
train_dataset=dataset,
|
147 |
-
tokenizer=tokenizer,
|
148 |
-
compute_metrics=compute_metrics,
|
149 |
-
callbacks=[log_callback]
|
150 |
-
)
|
151 |
-
|
152 |
-
trainer.train()
|
153 |
-
|
154 |
-
# Save trained model
|
155 |
-
save_dir = os.path.join(output_dir, model_name.replace("/", "_"))
|
156 |
-
model.save_pretrained(save_dir)
|
157 |
-
tokenizer.save_pretrained(save_dir)
|
158 |
-
|
159 |
-
log_event(f"Training finished: model saved at {save_dir}")
|
160 |
-
return f"โ
Training complete. Model saved at {save_dir}", progress
|
161 |
-
|
162 |
-
# app.py (Part 2 of 2) โ UI
|
163 |
|
164 |
-
|
|
|
|
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
# =========================
|
169 |
-
TOP_MODELS = [
|
170 |
-
"distilbert-base-uncased", "bert-base-uncased", "roberta-base",
|
171 |
-
"google/electra-base-discriminator", "albert-base-v2",
|
172 |
-
"facebook/bart-base", "gpt2", "t5-small",
|
173 |
-
"microsoft/deberta-base", "xlnet-base-cased"
|
174 |
-
]
|
175 |
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
182 |
|
183 |
# =========================
|
184 |
-
# Inference (
|
185 |
# =========================
|
186 |
-
def chat_with_model(
|
187 |
-
|
|
|
188 |
if not os.path.exists(model_dir):
|
189 |
return "โ Model not trained yet. Train it first."
|
190 |
|
191 |
-
|
192 |
-
|
|
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
|
202 |
-
|
|
|
|
|
|
|
203 |
|
204 |
# =========================
|
205 |
# View Memory
|
206 |
# =========================
|
207 |
-
def view_memory(
|
208 |
-
memory = load_memory(
|
209 |
if not memory:
|
210 |
return "๐ญ No memory yet for this model."
|
211 |
return json.dumps(memory, indent=2)
|
@@ -233,31 +235,34 @@ This app allows you to **train, fine-tune, test, and manage AI models** directly
|
|
233 |
|
234 |
## ๐น Step 1: Training a Model
|
235 |
1. Go to the **Training Tab**.
|
236 |
-
2. Select a **model** from
|
237 |
-
3. Select a **dataset** from
|
238 |
-
4.
|
239 |
-
5.
|
240 |
-
6.
|
|
|
241 |
|
242 |
---
|
243 |
|
244 |
## ๐น Step 2: Testing Your Model
|
245 |
1. Switch to the **Testing Tab**.
|
246 |
-
2.
|
247 |
-
3.
|
248 |
-
4.
|
|
|
249 |
|
250 |
---
|
251 |
|
252 |
## ๐น Step 3: Viewing Memory
|
253 |
- Go to the **Memory Tab**.
|
254 |
-
-
|
|
|
255 |
|
256 |
---
|
257 |
|
258 |
## ๐น Step 4: Viewing Logs
|
259 |
- All activity is logged.
|
260 |
-
- Open the **Logs Tab** to
|
261 |
|
262 |
---
|
263 |
|
@@ -265,63 +270,94 @@ This app allows you to **train, fine-tune, test, and manage AI models** directly
|
|
265 |
- Training runs on **CPU** (slower but free).
|
266 |
- Uses Hugging Face **Transformers + Datasets**.
|
267 |
- Stores:
|
268 |
-
- Models โ `trained_models
|
269 |
- Logs โ `logs/events.log`
|
270 |
-
- Memory โ `memory/memory_{
|
271 |
"""
|
272 |
|
273 |
# =========================
|
274 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
# =========================
|
276 |
with gr.Blocks() as demo:
|
277 |
gr.Markdown("# ๐ง AI Model Builder\nTrain, Fine-tune, Test, and Manage Your Own AI Models")
|
278 |
|
|
|
279 |
with gr.Tab("๐ ๏ธ Training"):
|
280 |
with gr.Row():
|
281 |
-
model_dropdown = gr.Dropdown(choices=TOP_MODELS, label="Select Model", interactive=True)
|
282 |
model_textbox = gr.Textbox(label="Or enter custom model ID")
|
283 |
with gr.Row():
|
284 |
dataset_dropdown = gr.Dropdown(choices=TOP_DATASETS, label="Select Dataset", interactive=True)
|
285 |
dataset_textbox = gr.Textbox(label="Or enter custom dataset ID")
|
|
|
286 |
epochs = gr.Slider(1, 5, value=1, step=1, label="Epochs (Training Cycles)")
|
287 |
train_button = gr.Button("๐ Start Training")
|
288 |
train_output = gr.Textbox(label="Training Status")
|
289 |
progress_output = gr.JSON(label="Progress Details")
|
290 |
|
291 |
-
def run_training(model_dropdown, model_textbox, dataset_dropdown, dataset_textbox, epochs):
|
292 |
-
|
293 |
dataset_name = dataset_textbox if dataset_textbox else dataset_dropdown
|
294 |
-
|
|
|
|
|
295 |
|
296 |
train_button.click(
|
297 |
run_training,
|
298 |
-
inputs=[model_dropdown, model_textbox, dataset_dropdown, dataset_textbox, epochs],
|
299 |
outputs=[train_output, progress_output]
|
300 |
)
|
301 |
|
|
|
302 |
with gr.Tab("๐ฌ Testing"):
|
303 |
-
|
|
|
304 |
test_input = gr.Textbox(label="Your Message")
|
305 |
test_button = gr.Button("๐ก Predict")
|
306 |
test_output = gr.Textbox(label="Model Response")
|
307 |
-
test_button.click(chat_with_model, inputs=[test_model_name, test_input], outputs=test_output)
|
308 |
|
|
|
|
|
|
|
|
|
309 |
with gr.Tab("๐งพ Memory"):
|
310 |
-
|
|
|
311 |
mem_button = gr.Button("๐ Load Memory")
|
312 |
mem_output = gr.Textbox(label="Conversation Memory", lines=15)
|
313 |
-
mem_button.click(view_memory, inputs=mem_model_name, outputs=mem_output)
|
314 |
|
|
|
|
|
|
|
|
|
315 |
with gr.Tab("๐ Logs"):
|
316 |
log_button = gr.Button("๐ Show Logs")
|
317 |
log_output = gr.Textbox(label="Logs", lines=20)
|
318 |
log_button.click(view_logs, outputs=log_output)
|
319 |
|
|
|
320 |
with gr.Tab("๐ Guide"):
|
321 |
gr.Markdown(USER_GUIDE)
|
322 |
|
323 |
# =========================
|
324 |
-
# Launch
|
325 |
# =========================
|
326 |
if __name__ == "__main__":
|
327 |
demo.launch()
|
|
|
2 |
|
3 |
import os
|
4 |
import json
|
5 |
+
import time
|
6 |
import datetime
|
7 |
+
import logging
|
8 |
import gradio as gr
|
9 |
from transformers import (
|
10 |
AutoTokenizer, AutoModelForSequenceClassification,
|
|
|
21 |
os.makedirs("memory", exist_ok=True)
|
22 |
|
23 |
# =========================
|
24 |
+
# Logging Setup
|
25 |
# =========================
|
26 |
+
logging.basicConfig(
|
27 |
+
filename=os.path.join("logs", "app.log"),
|
28 |
+
level=logging.INFO,
|
29 |
+
format="%(asctime)s [%(levelname)s] %(message)s"
|
30 |
+
)
|
31 |
+
log = logging.getLogger(__name__)
|
32 |
|
33 |
+
def log_event(event: str):
|
34 |
+
"""Append event to logs/events.log and console."""
|
35 |
+
log_file = os.path.join("logs", "events.log")
|
36 |
+
with open(log_file, "a") as f:
|
37 |
+
f.write(f"[{datetime.datetime.now()}] {event}\n")
|
38 |
+
print(event)
|
39 |
+
|
40 |
+
# =========================
|
41 |
+
# Memory System
|
42 |
+
# =========================
|
43 |
+
def get_memory_file(model_run: str):
|
44 |
+
return os.path.join("memory", f"memory_{model_run}.json")
|
45 |
+
|
46 |
+
def load_memory(model_run: str):
|
47 |
+
file = get_memory_file(model_run)
|
48 |
if os.path.exists(file):
|
49 |
with open(file, "r") as f:
|
50 |
return json.load(f)
|
51 |
return []
|
52 |
|
53 |
+
def save_memory(model_run: str, conversation: dict):
|
54 |
+
file = get_memory_file(model_run)
|
55 |
+
memory = load_memory(model_run)
|
56 |
memory.append(conversation)
|
57 |
with open(file, "w") as f:
|
58 |
json.dump(memory, f, indent=2)
|
59 |
|
60 |
# =========================
|
61 |
+
# Helper: List Trained Models
|
62 |
# =========================
|
63 |
+
def list_trained_models():
|
64 |
+
"""Scan trained_models/ and return available model runs."""
|
65 |
+
models = []
|
66 |
+
for d in os.listdir("trained_models"):
|
67 |
+
full_path = os.path.join("trained_models", d)
|
68 |
+
if os.path.isdir(full_path):
|
69 |
+
models.append(d)
|
70 |
+
return models if models else ["โ No trained models yet"]
|
71 |
|
72 |
# =========================
|
73 |
# Training Pipeline
|
74 |
# =========================
|
75 |
+
def train_model(base_model, dataset_name, custom_name, epochs):
|
76 |
+
"""Train Hugging Face model with progress + logging."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
try:
|
78 |
+
# -------------------------
|
79 |
+
# Prepare run identifiers
|
80 |
+
# -------------------------
|
81 |
+
run_id = f"{base_model.replace('/', '_')}__{custom_name.strip()}"
|
82 |
+
save_dir = os.path.join("trained_models", run_id)
|
83 |
+
os.makedirs(save_dir, exist_ok=True)
|
84 |
+
progress = {"status": "starting", "loss": [], "accuracy": [], "time": 0}
|
85 |
+
|
86 |
+
log_event(f"๐ Starting training run: {run_id} on dataset {dataset_name}")
|
87 |
+
|
88 |
+
# -------------------------
|
89 |
+
# Load dataset
|
90 |
+
# -------------------------
|
91 |
parts = dataset_name.split(" ")
|
|
|
92 |
if len(parts) == 2:
|
93 |
dataset_repo, dataset_config = parts
|
94 |
+
dataset = load_dataset(dataset_repo, dataset_config, split="train[:200]") # CPU-friendly
|
95 |
else:
|
96 |
dataset = load_dataset(dataset_name, split="train[:200]")
|
97 |
|
98 |
+
log_event("๐ Dataset loaded successfully")
|
99 |
|
100 |
+
# -------------------------
|
101 |
+
# Tokenizer + Model
|
102 |
+
# -------------------------
|
103 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
104 |
+
model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels=2)
|
105 |
|
106 |
def tokenize_fn(examples):
|
107 |
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
|
|
|
109 |
dataset = dataset.map(tokenize_fn, batched=True)
|
110 |
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
|
111 |
|
112 |
+
# -------------------------
|
113 |
+
# Training Args
|
114 |
+
# -------------------------
|
115 |
training_args = TrainingArguments(
|
116 |
+
output_dir=save_dir,
|
117 |
+
overwrite_output_dir=True,
|
118 |
+
evaluation_strategy="epoch",
|
119 |
save_strategy="epoch",
|
|
|
|
|
120 |
num_train_epochs=int(epochs),
|
121 |
+
per_device_train_batch_size=4,
|
122 |
logging_dir="./logs",
|
123 |
logging_steps=10,
|
124 |
+
report_to="none",
|
125 |
+
no_cuda=True # force CPU
|
126 |
)
|
127 |
|
128 |
+
# -------------------------
|
129 |
+
# Metrics
|
130 |
+
# -------------------------
|
131 |
+
def compute_metrics(eval_pred):
|
132 |
+
logits, labels = eval_pred
|
133 |
+
preds = logits.argmax(-1)
|
134 |
+
acc = (preds == labels).astype(float).mean().item()
|
135 |
+
return {"accuracy": acc}
|
136 |
+
|
137 |
+
# -------------------------
|
138 |
+
# Custom Progress Callback
|
139 |
+
# -------------------------
|
140 |
+
start_time = time.time()
|
141 |
+
|
142 |
+
def log_callback(trainer, state, control, **kwargs):
|
143 |
+
if state.is_local_process_zero and state.log_history:
|
144 |
+
last_log = state.log_history[-1]
|
145 |
+
if "loss" in last_log:
|
146 |
+
progress["status"] = "running"
|
147 |
+
progress["loss"].append(last_log["loss"])
|
148 |
+
progress["time"] = round(time.time() - start_time, 2)
|
149 |
+
log_event(f"๐ Epoch {state.epoch} - Loss: {last_log['loss']}")
|
150 |
+
|
151 |
+
# -------------------------
|
152 |
+
# Trainer
|
153 |
+
# -------------------------
|
154 |
trainer = Trainer(
|
155 |
model=model,
|
156 |
args=training_args,
|
157 |
train_dataset=dataset,
|
158 |
tokenizer=tokenizer,
|
159 |
+
compute_metrics=compute_metrics,
|
160 |
+
callbacks=[log_callback]
|
161 |
)
|
162 |
|
163 |
trainer.train()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
+
# Save artifacts
|
166 |
+
model.save_pretrained(save_dir)
|
167 |
+
tokenizer.save_pretrained(save_dir)
|
168 |
|
169 |
+
progress["status"] = "done"
|
170 |
+
log_event(f"โ
Training finished: model saved at {save_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
+
return f"โ
Training complete: {run_id}", progress
|
173 |
+
|
174 |
+
except Exception as e:
|
175 |
+
log_event(f"โ Training failed: {e}")
|
176 |
+
return f"Error during training: {e}", {"status": "error"}
|
177 |
+
|
178 |
+
# app.py (Part 2 of 2)
|
179 |
|
180 |
# =========================
|
181 |
+
# Inference (Testing / Chat)
|
182 |
# =========================
|
183 |
+
def chat_with_model(model_run, user_input):
|
184 |
+
"""Run inference on a trained model run."""
|
185 |
+
model_dir = os.path.join("trained_models", model_run)
|
186 |
if not os.path.exists(model_dir):
|
187 |
return "โ Model not trained yet. Train it first."
|
188 |
|
189 |
+
try:
|
190 |
+
tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
191 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
|
192 |
|
193 |
+
inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True)
|
194 |
+
outputs = model(**inputs)
|
195 |
+
prediction = torch.argmax(outputs.logits, dim=-1).item()
|
196 |
|
197 |
+
# Save memory
|
198 |
+
conversation = {"input": user_input, "prediction": prediction}
|
199 |
+
save_memory(model_run, conversation)
|
200 |
|
201 |
+
return f"๐ฎ Prediction: {prediction}"
|
202 |
+
except Exception as e:
|
203 |
+
log_event(f"โ Inference failed: {e}")
|
204 |
+
return f"Error during inference: {e}"
|
205 |
|
206 |
# =========================
|
207 |
# View Memory
|
208 |
# =========================
|
209 |
+
def view_memory(model_run):
|
210 |
+
memory = load_memory(model_run)
|
211 |
if not memory:
|
212 |
return "๐ญ No memory yet for this model."
|
213 |
return json.dumps(memory, indent=2)
|
|
|
235 |
|
236 |
## ๐น Step 1: Training a Model
|
237 |
1. Go to the **Training Tab**.
|
238 |
+
2. Select a **base model** (from dropdown or enter manually).
|
239 |
+
3. Select a **dataset** (from dropdown or enter manually).
|
240 |
+
4. Enter a **custom run name** (to keep multiple versions without overwriting).
|
241 |
+
5. Choose the number of **epochs**.
|
242 |
+
6. Click **Start Training**.
|
243 |
+
7. Training progress will appear, and the model will be saved under `trained_models/{run_id}`.
|
244 |
|
245 |
---
|
246 |
|
247 |
## ๐น Step 2: Testing Your Model
|
248 |
1. Switch to the **Testing Tab**.
|
249 |
+
2. Select a trained model run.
|
250 |
+
3. Enter any input text.
|
251 |
+
4. The app will return a **prediction**.
|
252 |
+
5. Each chat is saved in per-model **memory**.
|
253 |
|
254 |
---
|
255 |
|
256 |
## ๐น Step 3: Viewing Memory
|
257 |
- Go to the **Memory Tab**.
|
258 |
+
- Select a trained model run.
|
259 |
+
- View past chats + predictions.
|
260 |
|
261 |
---
|
262 |
|
263 |
## ๐น Step 4: Viewing Logs
|
264 |
- All activity is logged.
|
265 |
+
- Open the **Logs Tab** to see training sessions, progress, and errors.
|
266 |
|
267 |
---
|
268 |
|
|
|
270 |
- Training runs on **CPU** (slower but free).
|
271 |
- Uses Hugging Face **Transformers + Datasets**.
|
272 |
- Stores:
|
273 |
+
- Models โ `trained_models/{run_id}`
|
274 |
- Logs โ `logs/events.log`
|
275 |
+
- Memory โ `memory/memory_{run_id}.json`
|
276 |
"""
|
277 |
|
278 |
# =========================
|
279 |
+
# UI Defaults
|
280 |
+
# =========================
|
281 |
+
TOP_MODELS = [
|
282 |
+
"distilbert-base-uncased", "bert-base-uncased", "roberta-base",
|
283 |
+
"google/electra-base-discriminator", "albert-base-v2",
|
284 |
+
"facebook/bart-base", "gpt2", "t5-small",
|
285 |
+
"microsoft/deberta-base", "xlnet-base-cased"
|
286 |
+
]
|
287 |
+
|
288 |
+
TOP_DATASETS = [
|
289 |
+
"imdb", "ag_news", "yelp_polarity",
|
290 |
+
"dbpedia_14", "amazon_polarity",
|
291 |
+
"tweet_eval", "glue", "sst2",
|
292 |
+
"cnn_dailymail", "emotion"
|
293 |
+
]
|
294 |
+
|
295 |
+
# =========================
|
296 |
+
# Gradio UI
|
297 |
# =========================
|
298 |
with gr.Blocks() as demo:
|
299 |
gr.Markdown("# ๐ง AI Model Builder\nTrain, Fine-tune, Test, and Manage Your Own AI Models")
|
300 |
|
301 |
+
# ---- Training Tab ----
|
302 |
with gr.Tab("๐ ๏ธ Training"):
|
303 |
with gr.Row():
|
304 |
+
model_dropdown = gr.Dropdown(choices=TOP_MODELS, label="Select Base Model", interactive=True)
|
305 |
model_textbox = gr.Textbox(label="Or enter custom model ID")
|
306 |
with gr.Row():
|
307 |
dataset_dropdown = gr.Dropdown(choices=TOP_DATASETS, label="Select Dataset", interactive=True)
|
308 |
dataset_textbox = gr.Textbox(label="Or enter custom dataset ID")
|
309 |
+
run_name = gr.Textbox(label="Custom Run Name (required)")
|
310 |
epochs = gr.Slider(1, 5, value=1, step=1, label="Epochs (Training Cycles)")
|
311 |
train_button = gr.Button("๐ Start Training")
|
312 |
train_output = gr.Textbox(label="Training Status")
|
313 |
progress_output = gr.JSON(label="Progress Details")
|
314 |
|
315 |
+
def run_training(model_dropdown, model_textbox, dataset_dropdown, dataset_textbox, run_name, epochs):
|
316 |
+
base_model = model_textbox if model_textbox else model_dropdown
|
317 |
dataset_name = dataset_textbox if dataset_textbox else dataset_dropdown
|
318 |
+
if not base_model or not dataset_name or not run_name:
|
319 |
+
return "โ Please provide base model, dataset, and run name", {"status": "error"}
|
320 |
+
return train_model(base_model, dataset_name, run_name, epochs)
|
321 |
|
322 |
train_button.click(
|
323 |
run_training,
|
324 |
+
inputs=[model_dropdown, model_textbox, dataset_dropdown, dataset_textbox, run_name, epochs],
|
325 |
outputs=[train_output, progress_output]
|
326 |
)
|
327 |
|
328 |
+
# ---- Testing Tab ----
|
329 |
with gr.Tab("๐ฌ Testing"):
|
330 |
+
test_model_dropdown = gr.Dropdown(choices=list_trained_models(), label="Select Trained Model Run", interactive=True)
|
331 |
+
refresh_button = gr.Button("๐ Refresh Model List")
|
332 |
test_input = gr.Textbox(label="Your Message")
|
333 |
test_button = gr.Button("๐ก Predict")
|
334 |
test_output = gr.Textbox(label="Model Response")
|
|
|
335 |
|
336 |
+
refresh_button.click(lambda: gr.update(choices=list_trained_models()), None, test_model_dropdown)
|
337 |
+
test_button.click(chat_with_model, inputs=[test_model_dropdown, test_input], outputs=test_output)
|
338 |
+
|
339 |
+
# ---- Memory Tab ----
|
340 |
with gr.Tab("๐งพ Memory"):
|
341 |
+
mem_model_dropdown = gr.Dropdown(choices=list_trained_models(), label="Select Trained Model Run", interactive=True)
|
342 |
+
mem_refresh = gr.Button("๐ Refresh Model List")
|
343 |
mem_button = gr.Button("๐ Load Memory")
|
344 |
mem_output = gr.Textbox(label="Conversation Memory", lines=15)
|
|
|
345 |
|
346 |
+
mem_refresh.click(lambda: gr.update(choices=list_trained_models()), None, mem_model_dropdown)
|
347 |
+
mem_button.click(view_memory, inputs=mem_model_dropdown, outputs=mem_output)
|
348 |
+
|
349 |
+
# ---- Logs Tab ----
|
350 |
with gr.Tab("๐ Logs"):
|
351 |
log_button = gr.Button("๐ Show Logs")
|
352 |
log_output = gr.Textbox(label="Logs", lines=20)
|
353 |
log_button.click(view_logs, outputs=log_output)
|
354 |
|
355 |
+
# ---- Guide Tab ----
|
356 |
with gr.Tab("๐ Guide"):
|
357 |
gr.Markdown(USER_GUIDE)
|
358 |
|
359 |
# =========================
|
360 |
+
# Launch App
|
361 |
# =========================
|
362 |
if __name__ == "__main__":
|
363 |
demo.launch()
|