ToluClassics commited on
Commit
79ca28d
1 Parent(s): cce51a6
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -144,9 +144,21 @@ def scisearch(query, language, num_results=10):
144
 
145
  return results, highlight_terms
146
 
 
 
 
 
 
 
 
 
 
 
 
147
  def highlight_string(paragraph: str, highlight_terms: list) -> str:
148
  for term in highlight_terms:
149
- paragraph = re.sub(f"\\b{term}\\b", f"<mark>{term}</mark>", paragraph, flags=re.I)
 
150
  return paragraph
151
 
152
  def process_results(hits: list, highlight_terms: list) -> str:
@@ -154,7 +166,7 @@ def process_results(hits: list, highlight_terms: list) -> str:
154
  for i, hit in enumerate(hits):
155
  res_head = f"""
156
  <div class="searchresult">
157
- <h2>{i}. Document ID: {hit['docid']}</h2>
158
  <p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
159
  """
160
  for subhit in hit['meta']['docs']:
@@ -221,8 +233,8 @@ if st.sidebar.button("Search"):
221
  }
222
 
223
  .searchresult h2 {
224
- font-size: 15px;
225
- line-height: 14px;
226
  font-weight: normal;
227
  color: rgb(7, 111, 222);
228
  margin-bottom: 0px;
 
144
 
145
  return results, highlight_terms
146
 
147
+ PII_TAGS = {"KEY", "EMAIL", "USER", "IP_ADDRESS", "ID", "IPv4", "IPv6"}
148
+ PII_PREFIX = "PI:"
149
+
150
+ def process_pii(text):
151
+ for tag in PII_TAGS:
152
+ text = text.replace(
153
+ PII_PREFIX + tag,
154
+ """<b><mark style="background: Fuchsia; color: Lime;">REDACTED {}</mark></b>""".format(tag),
155
+ )
156
+ return text
157
+
158
  def highlight_string(paragraph: str, highlight_terms: list) -> str:
159
  for term in highlight_terms:
160
+ paragraph = re.sub(f"\\b{term}\\b", f"<b>{term}</b>", paragraph, flags=re.I)
161
+ paragraph = process_pii(paragraph)
162
  return paragraph
163
 
164
  def process_results(hits: list, highlight_terms: list) -> str:
 
166
  for i, hit in enumerate(hits):
167
  res_head = f"""
168
  <div class="searchresult">
169
+ <h2>{i+1}. Document ID: {hit['docid']}</h2>
170
  <p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
171
  """
172
  for subhit in hit['meta']['docs']:
 
233
  }
234
 
235
  .searchresult h2 {
236
+ font-size: 19px;
237
+ line-height: 18px;
238
  font-weight: normal;
239
  color: rgb(7, 111, 222);
240
  margin-bottom: 0px;