abrakjamson commited on
Commit
d8d631a
·
1 Parent(s): fa4b963

zerogpu updates

Browse files
Files changed (2) hide show
  1. app.py +25 -14
  2. requirements.txt +2 -1
app.py CHANGED
@@ -6,6 +6,7 @@ import torch
6
  import re
7
  import tempfile
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
9
  from repeng import ControlVector, ControlModel, DatasetEntry
10
  import gradio as gr
11
 
@@ -21,19 +22,28 @@ login(access_token)
21
  tokenizer = AutoTokenizer.from_pretrained(mistral_path)
22
  tokenizer.pad_token_id = 0
23
 
24
- model = AutoModelForCausalLM.from_pretrained(
25
- mistral_path,
26
- torch_dtype=torch.float16,
27
- trust_remote_code=True,
28
- use_safetensors=True
29
- )
30
- cuda = torch.cuda.is_available()
31
- print(f"Is CUDA available: {cuda}")
32
- model = model.to("cuda:0" if cuda else "cpu")
33
- if cuda:
34
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
35
-
36
- model = ControlModel(model, list(range(-5, -18, -1)))
 
 
 
 
 
 
 
 
 
37
 
38
  # Generation settings
39
  # Generation settings
@@ -86,6 +96,7 @@ def construct_prompt(history, system_prompt, user_message):
86
  formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
87
  return formatted_prompt
88
 
 
89
  def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, user_model, input_checkbox, input_slider, *args):
90
  """
91
  Applies the control vectors and calls the language model.
@@ -115,7 +126,7 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
115
  control_vectors.append(ControlVector.import_gguf(f"control_models/{cv_file}") * weight)
116
  assistant_message_title += f"{cv_file.split('.')[0]}: {weight};"
117
 
118
-
119
  # The control model takes a sum of positive and negative control vectors
120
  model.reset()
121
  combined_vector = None
 
6
  import re
7
  import tempfile
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+ import spaces
10
  from repeng import ControlVector, ControlModel, DatasetEntry
11
  import gradio as gr
12
 
 
22
  tokenizer = AutoTokenizer.from_pretrained(mistral_path)
23
  tokenizer.pad_token_id = 0
24
 
25
+ global model
26
+
27
+
28
+ global isModelDefined
29
+ isModelDefined = False
30
+
31
+ def defineModel():
32
+ global model
33
+ global isModelDefined
34
+ cuda = torch.cuda.is_available()
35
+ if not isModelDefined:
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ mistral_path,
38
+ torch_dtype=torch.float16,
39
+ trust_remote_code=True,
40
+ use_safetensors=True
41
+ )
42
+ print(f"Is CUDA available: {cuda}")
43
+ model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")
44
+ # these are good magic numbers for this model
45
+ model = ControlModel(model, list(range(-5, -18, -1)))
46
+ isModelDefined = True
47
 
48
  # Generation settings
49
  # Generation settings
 
96
  formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
97
  return formatted_prompt
98
 
99
+ @spaces.GPU
100
  def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, user_model, input_checkbox, input_slider, *args):
101
  """
102
  Applies the control vectors and calls the language model.
 
126
  control_vectors.append(ControlVector.import_gguf(f"control_models/{cv_file}") * weight)
127
  assistant_message_title += f"{cv_file.split('.')[0]}: {weight};"
128
 
129
+ defineModel()
130
  # The control model takes a sum of positive and negative control vectors
131
  model.reset()
132
  combined_vector = None
requirements.txt CHANGED
@@ -93,4 +93,5 @@ tzdata==2024.2
93
  urllib3==2.2.3
94
  uvicorn==0.30.6
95
  websockets==12.0
96
- xformers==0.0.27.post2
 
 
93
  urllib3==2.2.3
94
  uvicorn==0.30.6
95
  websockets==12.0
96
+ xformers==0.0.27.post2
97
+ spaces