Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel | |
# 1) Load your tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("tunna123/ahma-3b-lora-elektromart") | |
# 2) Load the base model onto CPU (full-precision) | |
base = AutoModelForCausalLM.from_pretrained("finnish-nlp/ahma-3b") | |
# 3) Apply your LoRA adapter without any device dispatching | |
model = PeftModel.from_pretrained( | |
base, | |
"tunna123/ahma-3b-lora-elektromart", | |
device_map=None | |
) | |
model.to("cpu") | |
model.eval() | |
# 4) Define the chat function | |
def chat_fn(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt") | |
inputs.pop("token_type_ids", None) | |
inputs = {k: v.to("cpu") for k, v in inputs.items()} | |
outputs = model.generate(**inputs, max_new_tokens=100) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# 5) Launch Gradio | |
gr.Interface( | |
fn=chat_fn, | |
inputs=gr.Textbox(placeholder="Kysy jotain…"), | |
outputs="text", | |
title="ElektroMart Chatbot" | |
).launch() | |