KeviniveK commited on
Commit
97644cf
·
verified ·
1 Parent(s): 63edb66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -31
app.py CHANGED
@@ -2,56 +2,72 @@ import streamlit as st
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import torch
4
 
5
- # 设置设备(优先用GPU)
6
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
- pipe_device = 0 if torch.cuda.is_available() else -1
8
-
9
- # 加载自定义情感分析模型
10
- sentiment_pipeline = pipeline(
11
- "text-classification",
12
- model="KeviniveK/CustomModel_IMDB",
13
- device=pipe_device
14
- )
15
-
16
- # 加载 NLLB 翻译模型:facebook/nllb-200-distilled-600M
17
- translation_model_name = "facebook/nllb-200-distilled-600M"
18
- translation_tokenizer = AutoTokenizer.from_pretrained(translation_model_name)
19
- translation_model = AutoModelForSeq2SeqLM.from_pretrained(translation_model_name).to(device)
20
-
21
- # 翻译函数(英文 → 中文简体)
22
- def translate_en_to_zh(text):
23
- inputs = translation_tokenizer(
 
 
 
 
 
 
 
 
 
24
  text,
25
  return_tensors="pt",
26
  truncation=True,
27
  padding=True
28
- ).to(device)
 
29
 
30
- translated_tokens = translation_model.generate(
31
  **inputs,
32
- forced_bos_token_id=translation_tokenizer.lang_code_to_id["zho_Hans"]
33
  )
 
34
 
35
- translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
36
- return translated_text
37
-
38
- # Streamlit 主逻辑
39
  def main():
40
  st.set_page_config(page_title="IMDB 影评分析与翻译", layout="centered")
41
  st.title("🎬 英文影评情感分析 + 中文翻译")
42
  st.write("请输入英文影评内容,系统将输出情感分析结果与对应的中文翻译。")
43
 
44
- user_input = st.text_area("英文影评输入", height=150, placeholder="e.g. The story was powerful and the acting was phenomenal.")
 
 
 
 
45
 
46
  if user_input:
47
  with st.spinner("分析处理中..."):
48
- # 情感分析
 
 
 
 
49
  sentiment_result = sentiment_pipeline(user_input)[0]
50
  sentiment = sentiment_result["label"]
51
  confidence = sentiment_result["score"]
52
 
53
- # 翻译
54
- translation = translate_en_to_zh(user_input)
55
 
56
  st.success("分析完成 ✅")
57
  st.markdown(f"**情感分类**:{sentiment}")
@@ -60,4 +76,3 @@ def main():
60
 
61
  if __name__ == "__main__":
62
  main()
63
-
 
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import torch
4
 
5
+ # ------------ 缓存模型加载 ------------
6
+ @st.cache_resource(show_spinner=False)
7
+ def load_sentiment_pipeline():
8
+ device = 0 if torch.cuda.is_available() else -1
9
+ return pipeline(
10
+ "text-classification",
11
+ model="KeviniveK/CustomModel_IMDB",
12
+ device=device
13
+ )
14
+
15
+ @st.cache_resource(show_spinner=False)
16
+ def load_translation_model():
17
+ model_name = "facebook/nllb-200-distilled-600M"
18
+ # 加载 tokenizer
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ # 加载 model,并让 accelerate 自动把各层放到合适设备上
21
+ model = AutoModelForSeq2SeqLM.from_pretrained(
22
+ model_name,
23
+ device_map="auto", # 自动分配设备
24
+ torch_dtype=torch.float16 # 半精度减少显存占用(可选)
25
+ )
26
+ return tokenizer, model
27
+
28
+ # ------------ 翻译函数 ------------
29
+ @st.cache_data(show_spinner=False)
30
+ def translate_en_to_zh(text: str, tokenizer, model):
31
+ # 直接将 inputs 放到 model.device 上
32
+ inputs = tokenizer(
33
  text,
34
  return_tensors="pt",
35
  truncation=True,
36
  padding=True
37
+ )
38
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
39
 
40
+ translated_tokens = model.generate(
41
  **inputs,
42
+ forced_bos_token_id=tokenizer.lang_code_to_id["zho_Hans"]
43
  )
44
+ return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
45
 
46
+ # ------------ Streamlit App 主逻辑 ------------
 
 
 
47
  def main():
48
  st.set_page_config(page_title="IMDB 影评分析与翻译", layout="centered")
49
  st.title("🎬 英文影评情感分析 + 中文翻译")
50
  st.write("请输入英文影评内容,系统将输出情感分析结果与对应的中文翻译。")
51
 
52
+ user_input = st.text_area(
53
+ "英文影评输入",
54
+ height=150,
55
+ placeholder="e.g. The story was powerful and the acting was phenomenal."
56
+ )
57
 
58
  if user_input:
59
  with st.spinner("分析处理中..."):
60
+ # 加载已经缓存的模型
61
+ sentiment_pipeline = load_sentiment_pipeline()
62
+ tokenizer, translation_model = load_translation_model()
63
+
64
+ # 1. 情感分析
65
  sentiment_result = sentiment_pipeline(user_input)[0]
66
  sentiment = sentiment_result["label"]
67
  confidence = sentiment_result["score"]
68
 
69
+ # 2. 翻译
70
+ translation = translate_en_to_zh(user_input, tokenizer, translation_model)
71
 
72
  st.success("分析完成 ✅")
73
  st.markdown(f"**情感分类**:{sentiment}")
 
76
 
77
  if __name__ == "__main__":
78
  main()