tunna123's picture
Update app.py
002d56a verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# 1) Load your tokenizer
tokenizer = AutoTokenizer.from_pretrained("tunna123/ahma-3b-lora-elektromart")
# 2) Load the base model onto CPU (full-precision)
base = AutoModelForCausalLM.from_pretrained("finnish-nlp/ahma-3b")
# 3) Apply your LoRA adapter without any device dispatching
model = PeftModel.from_pretrained(
base,
"tunna123/ahma-3b-lora-elektromart",
device_map=None
)
model.to("cpu")
model.eval()
# 4) Define the chat function
def chat_fn(prompt):
inputs = tokenizer(prompt, return_tensors="pt")
inputs.pop("token_type_ids", None)
inputs = {k: v.to("cpu") for k, v in inputs.items()}
outputs = model.generate(**inputs, max_new_tokens=100)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# 5) Launch Gradio
gr.Interface(
fn=chat_fn,
inputs=gr.Textbox(placeholder="Kysy jotain…"),
outputs="text",
title="ElektroMart Chatbot"
).launch()