Spaces:

Abrak
/

Controlled_Chat

Running on Zero

App Files Files Community

abrakjamson commited on 7 days ago

Commit

d8d631a

1 Parent(s): fa4b963

zerogpu updates

Browse files

Files changed (2) hide show

app.py +25 -14
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 import re
 import tempfile
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from repeng import ControlVector, ControlModel, DatasetEntry
 import gradio as gr
@@ -21,19 +22,28 @@ login(access_token)
 tokenizer = AutoTokenizer.from_pretrained(mistral_path)
 tokenizer.pad_token_id = 0
-model = AutoModelForCausalLM.from_pretrained(
-    mistral_path,
-    torch_dtype=torch.float16,
-    trust_remote_code=True,
-    use_safetensors=True
-)
-cuda = torch.cuda.is_available()
-print(f"Is CUDA available: {cuda}")
-model = model.to("cuda:0" if cuda else "cpu")
-if cuda:
-    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
 # Generation settings
@@ -86,6 +96,7 @@ def construct_prompt(history, system_prompt, user_message):
     formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
     return formatted_prompt
 def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, user_model, input_checkbox, input_slider, *args):
     """
     Applies the control vectors and calls the language model.
@@ -115,7 +126,7 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
             control_vectors.append(ControlVector.import_gguf(f"control_models/{cv_file}") * weight)
             assistant_message_title += f"{cv_file.split('.')[0]}: {weight};"
     # The control model takes a sum of positive and negative control vectors
     model.reset()
     combined_vector = None

 import re
 import tempfile
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+import spaces
 from repeng import ControlVector, ControlModel, DatasetEntry
 import gradio as gr
 tokenizer = AutoTokenizer.from_pretrained(mistral_path)
 tokenizer.pad_token_id = 0
+global model
+global isModelDefined
+isModelDefined = False
+def defineModel():
+    global model
+    global isModelDefined
+    cuda = torch.cuda.is_available()
+    if not isModelDefined:
+        model = AutoModelForCausalLM.from_pretrained(
+            mistral_path,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            use_safetensors=True
+        )
+        print(f"Is CUDA available: {cuda}")
+        model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")
+        # these are good magic numbers for this model
+        model = ControlModel(model, list(range(-5, -18, -1)))
+        isModelDefined = True
 # Generation settings
 # Generation settings
     formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
     return formatted_prompt
+@spaces.GPU
 def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, user_model, input_checkbox, input_slider, *args):
     """
     Applies the control vectors and calls the language model.
             control_vectors.append(ControlVector.import_gguf(f"control_models/{cv_file}") * weight)
             assistant_message_title += f"{cv_file.split('.')[0]}: {weight};"
+    defineModel()
     # The control model takes a sum of positive and negative control vectors
     model.reset()
     combined_vector = None

requirements.txt CHANGED Viewed

@@ -93,4 +93,5 @@ tzdata==2024.2
 urllib3==2.2.3
 uvicorn==0.30.6
 websockets==12.0
-xformers==0.0.27.post2

 urllib3==2.2.3
 uvicorn==0.30.6
 websockets==12.0
+xformers==0.0.27.post2
+spaces