AIEcosystem commited on
Commit
494c66d
·
verified ·
1 Parent(s): 0ed8010

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +79 -113
src/streamlit_app.py CHANGED
@@ -5,12 +5,22 @@ import streamlit as st
5
  import pandas as pd
6
  import io
7
  import plotly.express as px
8
- import zipfile
9
  import hashlib
10
  from gliner import GLiNER
11
  from streamlit_extras.stylable_container import stylable_container
12
  from comet_ml import Experiment
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  st.markdown(
15
  """
16
  <style>
@@ -55,31 +65,13 @@ st.markdown(
55
  }
56
  </style>
57
  """,
58
- unsafe_allow_html=True
59
- )
60
-
61
-
62
  # --- Page Configuration and UI Elements
63
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
64
  st.subheader("InfoFinder", divider="violet")
65
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
66
-
67
  expander = st.expander("**Important notes**")
68
- expander.write("""**How to Use:**
69
- 1. Type or paste your text into the text area below, then press Ctrl + Enter.
70
- 2. Click the 'Add Question' button to add your question to the Record of Questions. You can manage your questions by deleting them one by one.
71
- 3. Click the 'Extract Answers' button to extract the answer to your question.
72
-
73
- Results are presented in an easy-to-read table, visualized in an interactive tree map and are available for download.
74
-
75
- **Usage Limits:** You can request results unlimited times for one (1) month.
76
-
77
- **Supported Languages:** English
78
-
79
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
80
-
81
- For any errors or inquiries, please contact us at info@nlpblogs.com""")
82
-
83
  with st.sidebar:
84
  st.write("Use the following code to embed the InfoFinder web app on your website. Feel free to adjust the width and height values to fit your page.")
85
  code = '''
@@ -89,28 +81,23 @@ with st.sidebar:
89
  width="850"
90
  height="450"
91
  ></iframe>
92
-
93
- '''
94
  st.code(code, language="html")
95
  st.text("")
96
  st.text("")
97
  st.divider()
98
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
99
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
100
-
101
  # --- Comet ML Setup ---
102
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
103
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
104
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
105
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
106
-
107
  if not comet_initialized:
108
  st.warning("Comet ML not initialized. Check environment variables.")
109
-
110
  # --- Initialize session state for labels
111
  if 'user_labels' not in st.session_state:
112
  st.session_state.user_labels = []
113
-
114
  # --- Model Loading and Caching ---
115
  @st.cache_resource
116
  def load_gliner_model():
@@ -119,26 +106,20 @@ def load_gliner_model():
119
  This ensures the model is only loaded once, improving performance.
120
  """
121
  try:
122
- return GLiNER.from_pretrained("knowledgator/gliner-multitask-v1.0", device="cpu")
123
  except Exception as e:
124
  st.error(f"Error loading the GLiNER model: {e}")
125
  st.stop()
126
-
127
  # Load the model
128
  model = load_gliner_model()
129
-
130
  user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
131
-
132
  def clear_text():
133
  """Clears the text area by resetting its value in session state."""
134
  st.session_state['my_text_area'] = ""
135
  st.button("Clear text", on_click=clear_text)
136
-
137
  st.subheader("Question-Answering", divider = "violet")
138
-
139
  # Replaced two columns with a single text input
140
  question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
141
-
142
  if st.button("Add Question"):
143
  if question_input:
144
  if question_input not in st.session_state.user_labels:
@@ -148,88 +129,73 @@ if st.button("Add Question"):
148
  st.warning("This question has already been added.")
149
  else:
150
  st.warning("Please enter a question.")
151
-
152
- st.markdown("---")
153
- st.subheader("Record of Questions", divider="green")
154
-
155
- if st.session_state.user_labels:
156
- for i, label in enumerate(st.session_state.user_labels):
157
- col_list, col_delete = st.columns([0.9, 0.1])
158
- with col_list:
159
- st.write(f"- {label}", key=f"label_{i}")
160
- with col_delete:
161
- if st.button("Delete", key=f"delete_{i}"):
162
- st.session_state.user_labels.pop(i)
163
- st.rerun()
 
 
 
 
 
 
164
  else:
165
- st.info("No questions defined yet. Use the input above to add one.")
166
-
167
- st.divider()
168
-
169
- if st.button("Extract Answers"):
170
- if not user_text.strip():
171
- st.warning("Please enter some text to analyze.")
172
- elif not st.session_state.user_labels:
173
- st.warning("Please define at least one question.")
174
- else:
175
- if comet_initialized:
176
- experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
177
- experiment.log_parameter("input_text_length", len(user_text))
178
- experiment.log_parameter("defined_labels", st.session_state.user_labels)
179
-
180
- start_time = time.time()
181
- with st.spinner("Analyzing text...", show_time=True):
182
- try:
183
- entities = model_qa.predict_entities(user_text, st.session_state.user_labels)
184
- end_time = time.time()
185
- elapsed_time = end_time - start_time
186
- st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
187
-
188
- if entities:
189
- df1 = pd.DataFrame(entities)
190
- df2 = df1[['label', 'text', 'score']]
191
- df = df2.rename(columns={'label': 'question', 'text': 'answer'})
192
-
193
- st.subheader("Extracted Answers", divider="green")
194
- st.dataframe(df, use_container_width=True)
195
-
196
- st.subheader("Tree map", divider="green")
197
- all_labels = df['question'].unique()
198
- label_color_map = {label: get_stable_color(label) for label in all_labels}
199
- fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
200
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
201
- st.plotly_chart(fig_treemap)
202
-
203
- csv_data = df.to_csv(index=False).encode('utf-8')
204
- st.download_button(
205
- label="Download CSV",
206
- data=csv_data,
207
- file_name="nlpblogs_questions_answers.csv",
208
- mime="text/csv",
209
- )
210
-
211
- if comet_initialized:
212
- experiment.log_metric("processing_time_seconds", elapsed_time)
213
- experiment.log_table("predicted_entities", df)
214
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
215
- experiment.end()
216
- else:
217
- st.info("No answers were found in the text with the defined questions.")
218
- if comet_initialized:
219
- experiment.end()
220
- except Exception as e:
221
- st.error(f"An error occurred during processing: {e}")
222
- st.write(f"Error details: {e}")
223
  if comet_initialized:
224
- experiment.log_text(f"Error: {e}")
 
 
225
  experiment.end()
226
-
227
-
228
-
229
-
230
-
231
-
232
-
 
 
 
233
 
234
 
235
 
 
5
  import pandas as pd
6
  import io
7
  import plotly.express as px
 
8
  import hashlib
9
  from gliner import GLiNER
10
  from streamlit_extras.stylable_container import stylable_container
11
  from comet_ml import Experiment
12
 
13
+ # A new function to generate a stable color for a given string (label)
14
+ def get_stable_color(s):
15
+ """
16
+ Generates a consistent, stable color for a given string.
17
+ This ensures the same label always has the same color in the treemap.
18
+ """
19
+ hash_object = hashlib.sha256(s.encode('utf-8'))
20
+ hex_digest = hash_object.hexdigest()
21
+ # Use the first 6 hex digits for RGB color
22
+ return f'#{hex_digest[:6]}'
23
+
24
  st.markdown(
25
  """
26
  <style>
 
65
  }
66
  </style>
67
  """,
68
+ unsafe_allow_html=True)
 
 
 
69
  # --- Page Configuration and UI Elements
70
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
71
  st.subheader("InfoFinder", divider="violet")
72
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
73
  expander = st.expander("**Important notes**")
74
+ expander.write("""**How to Use:** 1. Type or paste your text into the text area below, then press Ctrl + Enter. 2. Click the 'Add Question' button to add your question to the Record of Questions. You can manage your questions by deleting them one by one.3. Click the 'Extract Answers' button to extract the answer to your question. Results are presented in an easy-to-read table, visualized in an interactive tree map and are available for download.**Usage Limits:** You can request results unlimited times for one (1) month.**Supported Languages:** English **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  with st.sidebar:
76
  st.write("Use the following code to embed the InfoFinder web app on your website. Feel free to adjust the width and height values to fit your page.")
77
  code = '''
 
81
  width="850"
82
  height="450"
83
  ></iframe>
84
+ '''
 
85
  st.code(code, language="html")
86
  st.text("")
87
  st.text("")
88
  st.divider()
89
  st.subheader("🚀 Ready to build your own AI Web App?", divider="violet")
90
  st.link_button("AI Web App Builder", "https://nlpblogs.com/build-your-named-entity-recognition-app/", type="primary")
 
91
  # --- Comet ML Setup ---
92
  COMET_API_KEY = os.environ.get("COMET_API_KEY")
93
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
94
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
95
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
96
  if not comet_initialized:
97
  st.warning("Comet ML not initialized. Check environment variables.")
 
98
  # --- Initialize session state for labels
99
  if 'user_labels' not in st.session_state:
100
  st.session_state.user_labels = []
 
101
  # --- Model Loading and Caching ---
102
  @st.cache_resource
103
  def load_gliner_model():
 
106
  This ensures the model is only loaded once, improving performance.
107
  """
108
  try:
109
+ return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", device="cpu")
110
  except Exception as e:
111
  st.error(f"Error loading the GLiNER model: {e}")
112
  st.stop()
 
113
  # Load the model
114
  model = load_gliner_model()
 
115
  user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area')
 
116
  def clear_text():
117
  """Clears the text area by resetting its value in session state."""
118
  st.session_state['my_text_area'] = ""
119
  st.button("Clear text", on_click=clear_text)
 
120
  st.subheader("Question-Answering", divider = "violet")
 
121
  # Replaced two columns with a single text input
122
  question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
 
123
  if st.button("Add Question"):
124
  if question_input:
125
  if question_input not in st.session_state.user_labels:
 
129
  st.warning("This question has already been added.")
130
  else:
131
  st.warning("Please enter a question.")
132
+ st.markdown("---")
133
+ st.subheader("Record of Questions", divider="green")
134
+ if st.session_state.user_labels:
135
+ for i, label in enumerate(st.session_state.user_labels):
136
+ col_list, col_delete = st.columns([0.9, 0.1])
137
+ with col_list:
138
+ st.write(f"- {label}", key=f"label_{i}")
139
+ with col_delete:
140
+ if st.button("Delete", key=f"delete_{i}"):
141
+ st.session_state.user_labels.pop(i)
142
+ st.rerun()
143
+ else:
144
+ st.info("No questions defined yet. Use the input above to add one.")
145
+ st.divider()
146
+ if st.button("Extract Answers"):
147
+ if not user_text.strip():
148
+ st.warning("Please enter some text to analyze.")
149
+ elif not st.session_state.user_labels:
150
+ st.warning("Please define at least one question.")
151
  else:
152
+ if comet_initialized:
153
+ experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
154
+ experiment.log_parameter("input_text_length", len(user_text))
155
+ experiment.log_parameter("defined_labels", st.session_state.user_labels)
156
+ start_time = time.time()
157
+ with st.spinner("Analyzing text...", show_time=True):
158
+ try:
159
+ # Corrected: Changed model_qa to model
160
+ entities = model.predict_entities(user_text, st.session_state.user_labels)
161
+ end_time = time.time()
162
+ elapsed_time = end_time - start_time
163
+ st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
164
+ if entities:
165
+ df1 = pd.DataFrame(entities)
166
+ df2 = df1[['label', 'text', 'score']]
167
+ df = df2.rename(columns={'label': 'question', 'text': 'answer'})
168
+
169
+ st.subheader("Extracted Answers", divider="green")
170
+ st.dataframe(df, use_container_width=True)
171
+ st.subheader("Tree map", divider="green")
172
+ all_labels = df['question'].unique()
173
+ label_color_map = {label: get_stable_color(label) for label in all_labels}
174
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
175
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
176
+ st.plotly_chart(fig_treemap)
177
+ csv_data = df.to_csv(index=False).encode('utf-8')
178
+ st.download_button(
179
+ label="Download CSV",
180
+ data=csv_data,
181
+ file_name="nlpblogs_questions_answers.csv",
182
+ mime="text/csv",
183
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  if comet_initialized:
185
+ experiment.log_metric("processing_time_seconds", elapsed_time)
186
+ experiment.log_table("predicted_entities", df)
187
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
188
  experiment.end()
189
+ else:
190
+ st.info("No answers were found in the text with the defined questions.")
191
+ if comet_initialized:
192
+ experiment.end()
193
+ except Exception as e:
194
+ st.error(f"An error occurred during processing: {e}")
195
+ st.write(f"Error details: {e}")
196
+ if comet_initialized:
197
+ experiment.log_text(f"Error: {e}")
198
+ experiment.end()
199
 
200
 
201