Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,56 +2,72 @@ import streamlit as st
|
|
2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import torch
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
text,
|
25 |
return_tensors="pt",
|
26 |
truncation=True,
|
27 |
padding=True
|
28 |
-
)
|
|
|
29 |
|
30 |
-
translated_tokens =
|
31 |
**inputs,
|
32 |
-
forced_bos_token_id=
|
33 |
)
|
|
|
34 |
|
35 |
-
|
36 |
-
return translated_text
|
37 |
-
|
38 |
-
# Streamlit 主逻辑
|
39 |
def main():
|
40 |
st.set_page_config(page_title="IMDB 影评分析与翻译", layout="centered")
|
41 |
st.title("🎬 英文影评情感分析 + 中文翻译")
|
42 |
st.write("请输入英文影评内容,系统将输出情感分析结果与对应的中文翻译。")
|
43 |
|
44 |
-
user_input = st.text_area(
|
|
|
|
|
|
|
|
|
45 |
|
46 |
if user_input:
|
47 |
with st.spinner("分析处理中..."):
|
48 |
-
#
|
|
|
|
|
|
|
|
|
49 |
sentiment_result = sentiment_pipeline(user_input)[0]
|
50 |
sentiment = sentiment_result["label"]
|
51 |
confidence = sentiment_result["score"]
|
52 |
|
53 |
-
# 翻译
|
54 |
-
translation = translate_en_to_zh(user_input)
|
55 |
|
56 |
st.success("分析完成 ✅")
|
57 |
st.markdown(f"**情感分类**:{sentiment}")
|
@@ -60,4 +76,3 @@ def main():
|
|
60 |
|
61 |
if __name__ == "__main__":
|
62 |
main()
|
63 |
-
|
|
|
2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import torch
|
4 |
|
5 |
+
# ------------ 缓存模型加载 ------------
|
6 |
+
@st.cache_resource(show_spinner=False)
|
7 |
+
def load_sentiment_pipeline():
|
8 |
+
device = 0 if torch.cuda.is_available() else -1
|
9 |
+
return pipeline(
|
10 |
+
"text-classification",
|
11 |
+
model="KeviniveK/CustomModel_IMDB",
|
12 |
+
device=device
|
13 |
+
)
|
14 |
+
|
15 |
+
@st.cache_resource(show_spinner=False)
|
16 |
+
def load_translation_model():
|
17 |
+
model_name = "facebook/nllb-200-distilled-600M"
|
18 |
+
# 加载 tokenizer
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
20 |
+
# 加载 model,并让 accelerate 自动把各层放到合适设备上
|
21 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
22 |
+
model_name,
|
23 |
+
device_map="auto", # 自动分配设备
|
24 |
+
torch_dtype=torch.float16 # 半精度减少显存占用(可选)
|
25 |
+
)
|
26 |
+
return tokenizer, model
|
27 |
+
|
28 |
+
# ------------ 翻译函数 ------------
|
29 |
+
@st.cache_data(show_spinner=False)
|
30 |
+
def translate_en_to_zh(text: str, tokenizer, model):
|
31 |
+
# 直接将 inputs 放到 model.device 上
|
32 |
+
inputs = tokenizer(
|
33 |
text,
|
34 |
return_tensors="pt",
|
35 |
truncation=True,
|
36 |
padding=True
|
37 |
+
)
|
38 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
39 |
|
40 |
+
translated_tokens = model.generate(
|
41 |
**inputs,
|
42 |
+
forced_bos_token_id=tokenizer.lang_code_to_id["zho_Hans"]
|
43 |
)
|
44 |
+
return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
45 |
|
46 |
+
# ------------ Streamlit App 主逻辑 ------------
|
|
|
|
|
|
|
47 |
def main():
|
48 |
st.set_page_config(page_title="IMDB 影评分析与翻译", layout="centered")
|
49 |
st.title("🎬 英文影评情感分析 + 中文翻译")
|
50 |
st.write("请输入英文影评内容,系统将输出情感分析结果与对应的中文翻译。")
|
51 |
|
52 |
+
user_input = st.text_area(
|
53 |
+
"英文影评输入",
|
54 |
+
height=150,
|
55 |
+
placeholder="e.g. The story was powerful and the acting was phenomenal."
|
56 |
+
)
|
57 |
|
58 |
if user_input:
|
59 |
with st.spinner("分析处理中..."):
|
60 |
+
# 加载已经缓存的模型
|
61 |
+
sentiment_pipeline = load_sentiment_pipeline()
|
62 |
+
tokenizer, translation_model = load_translation_model()
|
63 |
+
|
64 |
+
# 1. 情感分析
|
65 |
sentiment_result = sentiment_pipeline(user_input)[0]
|
66 |
sentiment = sentiment_result["label"]
|
67 |
confidence = sentiment_result["score"]
|
68 |
|
69 |
+
# 2. 翻译
|
70 |
+
translation = translate_en_to_zh(user_input, tokenizer, translation_model)
|
71 |
|
72 |
st.success("分析完成 ✅")
|
73 |
st.markdown(f"**情感分类**:{sentiment}")
|
|
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
main()
|
|