alphaoumardev commited on
Commit
51e03da
·
verified ·
1 Parent(s): da0937c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -1,23 +1,27 @@
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- # Replace with your actual model ID
 
 
 
6
  model_id = "alphaoumardev/Llama3-8B-noryu-instruct"
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(model_id)
9
- model = AutoModelForCausalLM.from_pretrained(model_id)
 
10
  model.eval()
11
 
12
- # If you're using GPU on HF Spaces with GPU enabled
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  model.to(device)
15
 
16
  def chat(user_input, history=[]):
17
- # Add user input to the history
18
  history.append({"role": "user", "content": user_input})
19
 
20
- # Format prompt (adjust as needed depending on your training)
21
  prompt = ""
22
  for turn in history:
23
  role = turn["role"]
@@ -38,24 +42,19 @@ def chat(user_input, history=[]):
38
  )
39
 
40
  output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
-
42
- # Extract just the assistant's reply
43
  assistant_reply = output_text.split("assistant:")[-1].strip()
44
  history.append({"role": "assistant", "content": assistant_reply})
45
 
46
- # Return response and updated history for Gradio
47
  chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
48
  return chat_history, history
49
 
50
- # Set up Gradio ChatInterface
51
  with gr.Blocks() as demo:
52
  chatbot = gr.Chatbot()
53
- state = gr.State([]) # for storing history
54
  txt = gr.Textbox(show_label=False, placeholder="Type your message...")
55
 
56
- def user_submit(user_message, history):
57
- return chat(user_message, history)
58
 
59
- txt.submit(user_submit, [txt, state], [chatbot, state])
60
-
61
  demo.launch()
 
1
+ import os
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
+ # Get the HF token from environment
7
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
8
+
9
+ # Your fine-tuned model
10
  model_id = "alphaoumardev/Llama3-8B-noryu-instruct"
11
 
12
+ # Authenticate with token when loading tokenizer/model
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
14
+ model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token)
15
  model.eval()
16
 
17
+ # Device setup
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
  model.to(device)
20
 
21
  def chat(user_input, history=[]):
 
22
  history.append({"role": "user", "content": user_input})
23
 
24
+ # Format the prompt
25
  prompt = ""
26
  for turn in history:
27
  role = turn["role"]
 
42
  )
43
 
44
  output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
45
  assistant_reply = output_text.split("assistant:")[-1].strip()
46
  history.append({"role": "assistant", "content": assistant_reply})
47
 
48
+ # Gradio expects tuple list format for Chatbot display
49
  chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
50
  return chat_history, history
51
 
52
+ # Gradio Blocks UI
53
  with gr.Blocks() as demo:
54
  chatbot = gr.Chatbot()
55
+ state = gr.State([]) # memory of the conversation
56
  txt = gr.Textbox(show_label=False, placeholder="Type your message...")
57
 
58
+ txt.submit(chat, [txt, state], [chatbot, state])
 
59
 
 
 
60
  demo.launch()