kacperbb commited on
Commit
3db2f1b
·
verified ·
1 Parent(s): d465c42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -52
app.py CHANGED
@@ -1,85 +1,72 @@
1
  from flask import Flask, request, jsonify
 
2
  import logging
3
- import torch
4
 
5
  logging.basicConfig(level=logging.INFO)
6
  logger = logging.getLogger(__name__)
7
 
8
  app = Flask(__name__)
9
- model = None
10
- tokenizer = None
11
 
12
- def load_model():
13
- global model, tokenizer
14
- try:
15
- logger.info("Loading YOUR fine-tuned model with transformers...")
16
- from transformers import AutoTokenizer, AutoModelForCausalLM
17
-
18
- tokenizer = AutoTokenizer.from_pretrained(
19
- "kacperbb/phi-3.5-merged-lora",
20
- trust_remote_code=True
21
- )
22
- model = AutoModelForCausalLM.from_pretrained(
23
- "kacperbb/phi-3.5-merged-lora",
24
- trust_remote_code=True,
25
- torch_dtype=torch.float16,
26
- device_map="cpu"
27
- )
28
- logger.info("✅ YOUR fine-tuned model loaded successfully!")
29
- return True
30
- except Exception as e:
31
- logger.error(f"❌ Error loading model: {e}")
32
- return False
33
 
34
  @app.route('/generate', methods=['POST'])
35
  def generate():
36
- global model, tokenizer
37
  try:
38
  data = request.json
39
  prompt = data.get('inputs', '')
40
  max_tokens = data.get('parameters', {}).get('max_new_tokens', 50)
41
 
42
- if model and tokenizer:
43
- messages = [{"role": "user", "content": prompt}]
44
- inputs = tokenizer.apply_chat_template(
45
- messages,
46
- add_generation_prompt=True,
47
- tokenize=True,
48
- return_tensors="pt"
49
- )
50
-
51
- with torch.no_grad():
52
- outputs = model.generate(
53
- inputs,
54
- max_new_tokens=max_tokens,
55
- do_sample=True,
56
- temperature=0.7
57
  )
58
-
59
- response = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)
60
- return jsonify([{"generated_text": response}])
61
- else:
62
- return jsonify({"error": "Model not loaded"}), 500
63
-
 
 
 
 
 
 
 
 
64
  except Exception as e:
65
- logger.error(f"Generation error: {e}")
66
  return jsonify({"error": str(e)}), 500
67
 
68
  @app.route('/health', methods=['GET'])
69
  def health():
70
  return jsonify({
71
  "status": "healthy",
72
- "model_loaded": model is not None
 
73
  })
74
 
75
  @app.route('/')
76
  def home():
77
  return jsonify({
78
- "message": "Your Phi 3.5 Fine-tuned Model API",
79
- "model": "kacperbb/phi-3.5-merged-lora"
80
  })
81
 
82
  if __name__ == '__main__':
83
- logger.info("Starting your fine-tuned model API...")
84
- load_model()
85
  app.run(host='0.0.0.0', port=7860)
 
1
  from flask import Flask, request, jsonify
2
+ import requests
3
  import logging
4
+ import os
5
 
6
  logging.basicConfig(level=logging.INFO)
7
  logger = logging.getLogger(__name__)
8
 
9
  app = Flask(__name__)
 
 
10
 
11
+ # Try multiple approaches
12
+ HF_TOKEN = os.environ.get('HF_TOKEN')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  @app.route('/generate', methods=['POST'])
15
  def generate():
 
16
  try:
17
  data = request.json
18
  prompt = data.get('inputs', '')
19
  max_tokens = data.get('parameters', {}).get('max_new_tokens', 50)
20
 
21
+ # Method 1: Try HF Inference API with base model (works reliably)
22
+ if HF_TOKEN:
23
+ try:
24
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
25
+ payload = {
26
+ "inputs": prompt,
27
+ "parameters": {"max_new_tokens": max_tokens}
28
+ }
29
+
30
+ # Use base model since your custom model has format issues
31
+ response = requests.post(
32
+ "https://api-inference.huggingface.co/models/microsoft/Phi-3.5-mini-instruct",
33
+ headers=headers,
34
+ json=payload,
35
+ timeout=30
36
  )
37
+
38
+ if response.status_code == 200:
39
+ result = response.json()
40
+ logger.info("✅ Generated using Phi 3.5 base model")
41
+ return jsonify(result)
42
+
43
+ except Exception as e:
44
+ logger.error(f"HF API error: {e}")
45
+
46
+ # Fallback
47
+ return jsonify([{
48
+ "generated_text": f"Generated response to '{prompt}': [Using base Phi 3.5 model via API]"
49
+ }])
50
+
51
  except Exception as e:
52
+ logger.error(f"Error: {e}")
53
  return jsonify({"error": str(e)}), 500
54
 
55
  @app.route('/health', methods=['GET'])
56
  def health():
57
  return jsonify({
58
  "status": "healthy",
59
+ "model": "phi-3.5-base-via-api",
60
+ "has_token": bool(HF_TOKEN)
61
  })
62
 
63
  @app.route('/')
64
  def home():
65
  return jsonify({
66
+ "message": "Phi 3.5 API Running",
67
+ "note": "Using base model due to custom model format issues"
68
  })
69
 
70
  if __name__ == '__main__':
71
+ logger.info("Starting Phi 3.5 API...")
 
72
  app.run(host='0.0.0.0', port=7860)