Change article titles to actual names, change some text
Browse files- SecondExampleParsing.svg +93 -0
- app.py +138 -56
- dependency-images/{article11.txt β Mark Levinson Airpods.txt} +0 -0
- dependency-images/{article9.txt β Novak Djokovic.txt} +0 -0
- dependency-images/{article13.txt β OnePlus 10 Pro.txt} +0 -0
- dependency-images/{article16.txt β Tencent Holdings.txt} +0 -0
- dependency-images/{article4.txt β White House.txt} +0 -0
- dependency-specific-text/{article11.txt β Mark Levinson Airpods.txt} +3 -3
- dependency-specific-text/{article9.txt β Novak Djokovic.txt} +4 -4
- dependency-specific-text/{article13.txt β OnePlus 10 Pro.txt} +6 -6
- dependency-specific-text/{article16.txt β Tencent Holdings.txt} +0 -0
- dependency-specific-text/{article4.txt β White House.txt} +3 -3
- entity-specific-text/Mark Levinson Airpods.txt +2 -0
- entity-specific-text/{article4.txt β Novak Djokovic.txt} +0 -0
- entity-specific-text/OnePlus 10 Pro.txt +3 -0
- entity-specific-text/Tencent Holdings.txt +2 -0
- entity-specific-text/{article9.txt β White House.txt} +0 -0
- entity-specific-text/article11.txt +0 -4
- entity-specific-text/article13.txt +0 -5
- entity-specific-text/article16.txt +0 -4
- sample-articles-temp/numbers_to_text.txt +6 -0
- sample-articles/{article11.txt β Mark Levinson Airpods.txt} +0 -0
- sample-articles/{article9.txt β Novak Djokovic.txt} +0 -0
- sample-articles/{article13.txt β OnePlus 10 Pro.txt} +0 -0
- sample-articles/{article16.txt β Tencent Holdings.txt} +0 -0
- sample-articles/{article4.txt β White House.txt} +0 -0
- sample-summaries/{article11.txt β Mark Levinson Airpods.txt} +0 -0
- sample-summaries/{article9.txt β Novak Djokovic.txt} +0 -0
- sample-summaries/{article13.txt β OnePlus 10 Pro.txt} +0 -0
- sample-summaries/{article16.txt β Tencent Holdings.txt} +0 -0
- sample-summaries/{article4.txt β White House.txt} +0 -0
SecondExampleParsing.svg
ADDED
app.py
CHANGED
@@ -22,6 +22,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
22 |
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
23 |
margin-bottom: 2.5rem">{}</div> """
|
24 |
|
|
|
25 |
@st.experimental_singleton
|
26 |
def get_sentence_embedding_model():
|
27 |
return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
@@ -76,31 +77,31 @@ def list_all_article_names() -> list:
|
|
76 |
def fetch_article_contents(filename: str) -> AnyStr:
|
77 |
if filename == "Provide your own input":
|
78 |
return " "
|
79 |
-
with open(f'./sample-articles/{filename
|
80 |
data = f.read()
|
81 |
return data
|
82 |
|
83 |
|
84 |
def fetch_summary_contents(filename: str) -> AnyStr:
|
85 |
-
with open(f'./sample-summaries/{filename
|
86 |
data = f.read()
|
87 |
return data
|
88 |
|
89 |
|
90 |
def fetch_entity_specific_contents(filename: str) -> AnyStr:
|
91 |
-
with open(f'./entity-specific-text/{filename
|
92 |
data = f.read()
|
93 |
return data
|
94 |
|
95 |
|
96 |
def fetch_dependency_specific_contents(filename: str) -> AnyStr:
|
97 |
-
with open(f'./dependency-specific-text/{filename
|
98 |
data = f.read()
|
99 |
return data
|
100 |
|
101 |
|
102 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
103 |
-
with open(f'./dependency-images/{filename
|
104 |
lines = [line.rstrip() for line in f]
|
105 |
return lines
|
106 |
|
@@ -145,10 +146,14 @@ def get_all_entities(text):
|
|
145 |
return list(itertools.chain.from_iterable(all_entities_per_sentence))
|
146 |
|
147 |
|
148 |
-
def get_and_compare_entities():
|
149 |
-
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
152 |
|
153 |
summary_content = st.session_state.summary_output
|
154 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
@@ -166,6 +171,18 @@ def get_and_compare_entities():
|
|
166 |
matched_entities.append(entity)
|
167 |
else:
|
168 |
unmatched_entities.append(entity)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
return matched_entities, unmatched_entities
|
170 |
|
171 |
|
@@ -175,7 +192,7 @@ def highlight_entities():
|
|
175 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
176 |
markdown_end = "</mark>"
|
177 |
|
178 |
-
matched_entities, unmatched_entities = get_and_compare_entities()
|
179 |
|
180 |
for entity in matched_entities:
|
181 |
summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end)
|
@@ -263,32 +280,43 @@ def generate_abstractive_summary(text, type, min_len=120, max_len=512, **kwargs)
|
|
263 |
return summary
|
264 |
|
265 |
|
266 |
-
|
267 |
-
# Page
|
268 |
-
st.title('π Summarization fact checker π')
|
269 |
-
|
270 |
-
# INTRODUCTION
|
271 |
-
st.header("Introduction")
|
272 |
-
st.markdown("""Recent work using transformers on large text corpora has shown great success when fine-tuned on
|
273 |
-
several different downstream NLP tasks. One such task is that of text summarization. The goal of text summarization
|
274 |
-
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive
|
275 |
-
and abstractive. **Extractive summarization** merely copies informative fragments from the input,
|
276 |
-
whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal
|
277 |
-
information in the input and has to be linguistically fluent. This interactive blogpost will focus on this more difficult task of
|
278 |
-
abstractive summary generation.""")
|
279 |
-
|
280 |
-
st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
|
281 |
-
model, producing abstractive summaries from large articles. These summaries often contain sentences with different
|
282 |
-
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to detect errors
|
283 |
-
from the generated summaries. Throughout this blog, we will also explain the results for some methods on specific
|
284 |
-
examples. These text blocks will be indicated and they change according to the currently selected article.""")
|
285 |
-
|
286 |
# Load all different models (cached) at start time of the hugginface space
|
287 |
sentence_embedding_model = get_sentence_embedding_model()
|
288 |
ner_model = get_transformer_pipeline()
|
289 |
nlp = get_spacy()
|
290 |
summarization_model = get_summarizer_model()
|
291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
# GENERATING SUMMARIES PART
|
293 |
st.header("πͺΆ Generating summaries")
|
294 |
st.markdown("Letβs start by selecting an article text for which we want to generate a summary, or you can provide "
|
@@ -296,26 +324,28 @@ st.markdown("Letβs start by selecting an article text for which we want to gen
|
|
296 |
"summary generated from it might not be optimal, leading to suboptimal performance of the post-processing "
|
297 |
"steps. However, too long articles will be truncated and might miss information in the summary.")
|
298 |
|
|
|
299 |
selected_article = st.selectbox('Select an article or provide your own:',
|
300 |
-
list_all_article_names())
|
301 |
st.session_state.article_text = fetch_article_contents(selected_article)
|
302 |
article_text = st.text_area(
|
303 |
label='Full article text',
|
304 |
value=st.session_state.article_text,
|
305 |
-
height=
|
306 |
)
|
307 |
|
308 |
-
summarize_button = st.button(label='Process article content',
|
309 |
help="Start interactive blogpost")
|
310 |
|
311 |
if summarize_button:
|
312 |
st.session_state.article_text = article_text
|
|
|
313 |
st.markdown(
|
314 |
-
"Below you can find the generated summary for the article. We will discuss two approaches that we found are "
|
315 |
"able to detect some common errors. Based on errors, one could then score different summaries, indicating how "
|
316 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
317 |
"summaries for the same article, with different parameters (or even different models). By using "
|
318 |
-
"post-processing error detection, we can then select the best possible summary
|
319 |
if st.session_state.article_text:
|
320 |
with st.spinner('Generating summary, this might take a while...'):
|
321 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
@@ -327,18 +357,25 @@ if summarize_button:
|
|
327 |
no_repeat_ngram_size=4)
|
328 |
st.session_state.unchanged_text = False
|
329 |
summary_displayed = display_summary(summary_content)
|
330 |
-
st.write("**Generated summary:**", summary_displayed, unsafe_allow_html=True)
|
331 |
else:
|
332 |
st.error('**Error**: No comment to classify. Please provide a comment.')
|
333 |
|
334 |
# ENTITY MATCHING PART
|
335 |
-
st.header("Entity matching")
|
336 |
st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of "
|
337 |
"identifying and categorising key information (entities) in text. An entity can be a singular word or a "
|
338 |
"series of words that consistently refers to the same thing. Common entity classes are person names, "
|
339 |
"organisations, locations and so on. By applying NER to both the article and its summary, we can spot "
|
340 |
-
"possible **hallucinations**.
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
"should also be present in the article. Thus we can extract all entities from the summary and compare "
|
343 |
"them to the entities of the original article, spotting potential hallucinations. The more unmatched "
|
344 |
"entities we find, the lower the factualness score of the summary. ")
|
@@ -354,7 +391,7 @@ if summarize_button:
|
|
354 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
355 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
356 |
st.markdown(
|
357 |
-
"We call this technique
|
358 |
"method on the summary. Entities in the summary are marked " + green_text + " when the entity also "
|
359 |
"exists in the article, "
|
360 |
"while unmatched entities "
|
@@ -365,10 +402,11 @@ if summarize_button:
|
|
365 |
if st.session_state.unchanged_text:
|
366 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
367 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
|
|
368 |
st.write("π‘π **Specific example explanation** ππ‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
369 |
|
370 |
# DEPENDENCY PARSING PART
|
371 |
-
st.header("Dependency comparison")
|
372 |
st.markdown(
|
373 |
"The second method we use for post-processing is called **Dependency parsing**: the process in which the "
|
374 |
"grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
|
@@ -378,16 +416,23 @@ if summarize_button:
|
|
378 |
# TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise)
|
379 |
# st.image("ExampleParsing.svg")
|
380 |
st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True)
|
381 |
-
st.markdown(
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
"Furthermore, we only check dependencies between an existing **entity** and its direct connections. "
|
392 |
"Below we highlight all unmatched dependencies that satisfy the discussed constraints. We also "
|
393 |
"discuss the specific results for the currently selected example article.")
|
@@ -410,14 +455,51 @@ if summarize_button:
|
|
410 |
render_dependency_parsing(current_drawing_list)
|
411 |
|
412 |
# OUTRO/CONCLUSION
|
413 |
-
st.header("
|
414 |
st.markdown("We have presented 2 methods that try to detect errors in summaries via post-processing steps. Entity "
|
415 |
"matching can be used to solve hallucinations, while dependency comparison can be used to filter out "
|
416 |
"some bad sentences (and thus worse summaries). These methods highlight the possibilities of "
|
417 |
"post-processing AI-made summaries, but are only a first introduction. As the methods were "
|
418 |
"empirically tested they are definitely not sufficiently robust for general use-cases.")
|
419 |
st.markdown("####")
|
420 |
-
st.markdown(
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
23 |
margin-bottom: 2.5rem">{}</div> """
|
24 |
|
25 |
+
|
26 |
@st.experimental_singleton
|
27 |
def get_sentence_embedding_model():
|
28 |
return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
|
|
77 |
def fetch_article_contents(filename: str) -> AnyStr:
|
78 |
if filename == "Provide your own input":
|
79 |
return " "
|
80 |
+
with open(f'./sample-articles/{filename}.txt', 'r') as f:
|
81 |
data = f.read()
|
82 |
return data
|
83 |
|
84 |
|
85 |
def fetch_summary_contents(filename: str) -> AnyStr:
|
86 |
+
with open(f'./sample-summaries/{filename}.txt', 'r') as f:
|
87 |
data = f.read()
|
88 |
return data
|
89 |
|
90 |
|
91 |
def fetch_entity_specific_contents(filename: str) -> AnyStr:
|
92 |
+
with open(f'./entity-specific-text/{filename}.txt', 'r') as f:
|
93 |
data = f.read()
|
94 |
return data
|
95 |
|
96 |
|
97 |
def fetch_dependency_specific_contents(filename: str) -> AnyStr:
|
98 |
+
with open(f'./dependency-specific-text/{filename}.txt', 'r') as f:
|
99 |
data = f.read()
|
100 |
return data
|
101 |
|
102 |
|
103 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
104 |
+
with open(f'./dependency-images/{filename}.txt', 'r') as f:
|
105 |
lines = [line.rstrip() for line in f]
|
106 |
return lines
|
107 |
|
|
|
146 |
return list(itertools.chain.from_iterable(all_entities_per_sentence))
|
147 |
|
148 |
|
149 |
+
def get_and_compare_entities(first_time: bool):
|
150 |
+
if first_time:
|
151 |
+
article_content = st.session_state.article_text
|
152 |
+
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
153 |
+
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
154 |
+
st.session_state.entities_article = entities_article
|
155 |
+
else:
|
156 |
+
entities_article = st.session_state.entities_article
|
157 |
|
158 |
summary_content = st.session_state.summary_output
|
159 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
|
|
171 |
matched_entities.append(entity)
|
172 |
else:
|
173 |
unmatched_entities.append(entity)
|
174 |
+
|
175 |
+
matched_entities = list(dict.fromkeys(matched_entities))
|
176 |
+
unmatched_entities = list(dict.fromkeys(unmatched_entities))
|
177 |
+
for entity in matched_entities:
|
178 |
+
for substring_entity in matched_entities:
|
179 |
+
if entity != substring_entity and entity.lower() in substring_entity.lower():
|
180 |
+
matched_entities.remove(entity)
|
181 |
+
|
182 |
+
for entity in unmatched_entities:
|
183 |
+
for substring_entity in unmatched_entities:
|
184 |
+
if entity != substring_entity and entity.lower() in substring_entity.lower():
|
185 |
+
unmatched_entities.remove(entity)
|
186 |
return matched_entities, unmatched_entities
|
187 |
|
188 |
|
|
|
192 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
193 |
markdown_end = "</mark>"
|
194 |
|
195 |
+
matched_entities, unmatched_entities = get_and_compare_entities(True)
|
196 |
|
197 |
for entity in matched_entities:
|
198 |
summary_content = summary_content.replace(entity, markdown_start_green + entity + markdown_end)
|
|
|
280 |
return summary
|
281 |
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
# Load all different models (cached) at start time of the hugginface space
|
284 |
sentence_embedding_model = get_sentence_embedding_model()
|
285 |
ner_model = get_transformer_pipeline()
|
286 |
nlp = get_spacy()
|
287 |
summarization_model = get_summarizer_model()
|
288 |
|
289 |
+
# Page
|
290 |
+
st.title('π Summarization fact checker π')
|
291 |
+
|
292 |
+
# INTRODUCTION
|
293 |
+
st.header("π§βπ« Introduction")
|
294 |
+
|
295 |
+
introduction_checkbox = st.checkbox("Show introduction text", value = True)
|
296 |
+
if introduction_checkbox:
|
297 |
+
st.markdown("""
|
298 |
+
Recent work using π€ **transformers** π€ on large text corpora has shown great success when fine-tuned on
|
299 |
+
several different downstream NLP tasks. One such task is that of text summarization. The goal of text summarization
|
300 |
+
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization:
|
301 |
+
|
302 |
+
- **Extractive summarization** merely copies informative fragments from the input
|
303 |
+
- **Abstractive summarization** may generate novel words. A good abstractive summary should cover principal
|
304 |
+
information in the input and has to be linguistically fluent. This interactive blogpost will focus on this more difficult task of
|
305 |
+
abstractive summary generation. Furthermore we will focus on factual errors in summaries, and less sentence fluency.""")
|
306 |
+
|
307 |
+
st.markdown("###")
|
308 |
+
st.markdown("π€ **Why is this important?** π€ Let's say we want to summarize news articles for a popular "
|
309 |
+
"newspaper. If an article tells the story of **Putin** invading Ukraine, we don't want our summarization "
|
310 |
+
"model to say that **Biden** is invading Ukraine. Summarization could also be done for financial reports "
|
311 |
+
"for example. In such environments, these errors can be very critical, so we want to find a way to "
|
312 |
+
"detect them.")
|
313 |
+
st.markdown("###")
|
314 |
+
st.markdown("""To generate summaries we will use the π [PEGASUS](https://huggingface.co/google/pegasus-cnn_dailymail) π
|
315 |
+
model, producing abstractive summaries from large articles. These summaries often contain sentences with different
|
316 |
+
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to detect errors
|
317 |
+
from the generated summaries. Throughout this blog, we will also explain the results for some methods on specific
|
318 |
+
examples. These text blocks will be indicated and they change according to the currently selected article.""")
|
319 |
+
|
320 |
# GENERATING SUMMARIES PART
|
321 |
st.header("πͺΆ Generating summaries")
|
322 |
st.markdown("Letβs start by selecting an article text for which we want to generate a summary, or you can provide "
|
|
|
324 |
"summary generated from it might not be optimal, leading to suboptimal performance of the post-processing "
|
325 |
"steps. However, too long articles will be truncated and might miss information in the summary.")
|
326 |
|
327 |
+
st.markdown("####")
|
328 |
selected_article = st.selectbox('Select an article or provide your own:',
|
329 |
+
list_all_article_names(), index=2)
|
330 |
st.session_state.article_text = fetch_article_contents(selected_article)
|
331 |
article_text = st.text_area(
|
332 |
label='Full article text',
|
333 |
value=st.session_state.article_text,
|
334 |
+
height=250
|
335 |
)
|
336 |
|
337 |
+
summarize_button = st.button(label='π€― Process article content',
|
338 |
help="Start interactive blogpost")
|
339 |
|
340 |
if summarize_button:
|
341 |
st.session_state.article_text = article_text
|
342 |
+
st.markdown("####")
|
343 |
st.markdown(
|
344 |
+
"*Below you can find the generated summary for the article. We will discuss two approaches that we found are "
|
345 |
"able to detect some common errors. Based on errors, one could then score different summaries, indicating how "
|
346 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
347 |
"summaries for the same article, with different parameters (or even different models). By using "
|
348 |
+
"post-processing error detection, we can then select the best possible summary.*")
|
349 |
if st.session_state.article_text:
|
350 |
with st.spinner('Generating summary, this might take a while...'):
|
351 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
|
|
357 |
no_repeat_ngram_size=4)
|
358 |
st.session_state.unchanged_text = False
|
359 |
summary_displayed = display_summary(summary_content)
|
360 |
+
st.write("β **Generated summary:** β", summary_displayed, unsafe_allow_html=True)
|
361 |
else:
|
362 |
st.error('**Error**: No comment to classify. Please provide a comment.')
|
363 |
|
364 |
# ENTITY MATCHING PART
|
365 |
+
st.header("1οΈβ£ Entity matching")
|
366 |
st.markdown("The first method we will discuss is called **Named Entity Recognition** (NER). NER is the task of "
|
367 |
"identifying and categorising key information (entities) in text. An entity can be a singular word or a "
|
368 |
"series of words that consistently refers to the same thing. Common entity classes are person names, "
|
369 |
"organisations, locations and so on. By applying NER to both the article and its summary, we can spot "
|
370 |
+
"possible **hallucinations**. ")
|
371 |
+
|
372 |
+
st.markdown("Hallucinations are words generated by the model that are not supported by "
|
373 |
+
"the source input. Deep learning based generation is [prone to hallucinate]("
|
374 |
+
"https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade "
|
375 |
+
"system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem"
|
376 |
+
" for the downstream task of summary generation.")
|
377 |
+
|
378 |
+
st.markdown(" In theory all entities in the summary (such as dates, locations and so on), "
|
379 |
"should also be present in the article. Thus we can extract all entities from the summary and compare "
|
380 |
"them to the entities of the original article, spotting potential hallucinations. The more unmatched "
|
381 |
"entities we find, the lower the factualness score of the summary. ")
|
|
|
391 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
392 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
393 |
st.markdown(
|
394 |
+
"We call this technique **entity matching** and here you can see what this looks like when we apply this "
|
395 |
"method on the summary. Entities in the summary are marked " + green_text + " when the entity also "
|
396 |
"exists in the article, "
|
397 |
"while unmatched entities "
|
|
|
402 |
if st.session_state.unchanged_text:
|
403 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
404 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
405 |
+
st.markdown("####")
|
406 |
st.write("π‘π **Specific example explanation** ππ‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
407 |
|
408 |
# DEPENDENCY PARSING PART
|
409 |
+
st.header("2οΈβ£ Dependency comparison")
|
410 |
st.markdown(
|
411 |
"The second method we use for post-processing is called **Dependency parsing**: the process in which the "
|
412 |
"grammatical structure in a sentence is analysed, to find out related words as well as the type of the "
|
|
|
416 |
# TODO: I wonder why the first doesn't work but the second does (it doesn't show deps otherwise)
|
417 |
# st.image("ExampleParsing.svg")
|
418 |
st.write(render_svg('ExampleParsing.svg'), unsafe_allow_html=True)
|
419 |
+
st.markdown(
|
420 |
+
"Here, *βJanβ* is the *βpossβ* (possession modifier) of *βwifeβ*. If suddenly the summary would read *βJanβs"
|
421 |
+
" husbandβ¦β*, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
422 |
+
"*βJanβ* is the βpossβ of *βhusbandβ*)."
|
423 |
+
"However, often new dependencies are introduced in the summary that "
|
424 |
+
"are still correct, as can be seen in the example below. ")
|
425 |
+
st.write(render_svg('SecondExampleParsing.svg'), unsafe_allow_html=True)
|
426 |
+
|
427 |
+
st.markdown("*βThe borders of Ukraineβ* have a different dependency between *βbordersβ* and "
|
428 |
+
"*βUkraineβ* "
|
429 |
+
"than *βUkraineβs bordersβ*, while both descriptions have the same meaning. So just matching all "
|
430 |
+
"dependencies between article and summary (as we did with entity matching) would not be a robust method."
|
431 |
+
" More on the different sorts of dependencies and their description can be found [here](https://universaldependencies.org/docs/en/dep/).")
|
432 |
+
st.markdown("However, we have found that **there are specific dependencies that are often an "
|
433 |
+
"indication of a wrongly constructed sentence** -when there is no article match. We (currently) use 2 "
|
434 |
+
"common dependencies which - when present in the summary but not in the article - are highly "
|
435 |
+
"indicative of factualness errors. "
|
436 |
"Furthermore, we only check dependencies between an existing **entity** and its direct connections. "
|
437 |
"Below we highlight all unmatched dependencies that satisfy the discussed constraints. We also "
|
438 |
"discuss the specific results for the currently selected example article.")
|
|
|
455 |
render_dependency_parsing(current_drawing_list)
|
456 |
|
457 |
# OUTRO/CONCLUSION
|
458 |
+
st.header("π€ Bringing it together")
|
459 |
st.markdown("We have presented 2 methods that try to detect errors in summaries via post-processing steps. Entity "
|
460 |
"matching can be used to solve hallucinations, while dependency comparison can be used to filter out "
|
461 |
"some bad sentences (and thus worse summaries). These methods highlight the possibilities of "
|
462 |
"post-processing AI-made summaries, but are only a first introduction. As the methods were "
|
463 |
"empirically tested they are definitely not sufficiently robust for general use-cases.")
|
464 |
st.markdown("####")
|
465 |
+
st.markdown(
|
466 |
+
"(TODO) Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
|
467 |
+
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
|
468 |
+
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
|
469 |
+
"the actual parameters or something? ")
|
470 |
+
|
471 |
+
# with st.spinner("Calculating more summaries and scoring them, might take while..."):
|
472 |
+
# # ENTITIES
|
473 |
+
# _, amount_unmatched = get_and_compare_entities(False)
|
474 |
+
# st.write(len(amount_unmatched))
|
475 |
+
# st.write(amount_unmatched)
|
476 |
+
#
|
477 |
+
# # DEPS
|
478 |
+
# summary_deps = check_dependency(False)
|
479 |
+
# article_deps = check_dependency(True)
|
480 |
+
# total_unmatched_deps = []
|
481 |
+
# for summ_dep in summary_deps:
|
482 |
+
# if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
|
483 |
+
# total_unmatched_deps.append(summ_dep)
|
484 |
+
#
|
485 |
+
# st.write(len(total_unmatched_deps))
|
486 |
+
# st.write(total_unmatched_deps)
|
487 |
+
#
|
488 |
+
# # FOR NEW GENERATED SUMMARY
|
489 |
+
# st.session_state.summary_output = generate_abstractive_summary(st.session_state.article_text,
|
490 |
+
# type="beam",
|
491 |
+
# do_sample=True, num_beams=15,
|
492 |
+
# no_repeat_ngram_size=5)
|
493 |
+
# _, amount_unmatched = get_and_compare_entities(False)
|
494 |
+
# st.write(len(amount_unmatched))
|
495 |
+
# st.write(amount_unmatched)
|
496 |
+
#
|
497 |
+
# summary_deps = check_dependency(False)
|
498 |
+
# article_deps = check_dependency(True)
|
499 |
+
# total_unmatched_deps = []
|
500 |
+
# for summ_dep in summary_deps:
|
501 |
+
# if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
|
502 |
+
# total_unmatched_deps.append(summ_dep)
|
503 |
+
#
|
504 |
+
# st.write(len(total_unmatched_deps))
|
505 |
+
# st.write(total_unmatched_deps)
|
dependency-images/{article11.txt β Mark Levinson Airpods.txt}
RENAMED
File without changes
|
dependency-images/{article9.txt β Novak Djokovic.txt}
RENAMED
File without changes
|
dependency-images/{article13.txt β OnePlus 10 Pro.txt}
RENAMED
File without changes
|
dependency-images/{article16.txt β Tencent Holdings.txt}
RENAMED
File without changes
|
dependency-images/{article4.txt β White House.txt}
RENAMED
File without changes
|
dependency-specific-text/{article11.txt β Mark Levinson Airpods.txt}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "pobj"
|
2 |
-
Furthermore, we only match *pobj* dependencies when the target word is "in"
|
3 |
-
In this case it's obvious that "in U.S." is not found in the article, as "U.S." is a hallucinated entity itself as discussed in the entity matching paragraph.
|
4 |
So technically we don't need dependency comparison to spot the error from this summary.
|
|
|
1 |
+
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the *"pobj"*(object of preposition) dependency.
|
2 |
+
Furthermore, we only match *pobj* dependencies when the target word is *"in"*, as in this example.
|
3 |
+
In this case it's obvious that *"in U.S."* is not found in the article, as *"U.S."* is a hallucinated entity itself as discussed in the entity matching paragraph.
|
4 |
So technically we don't need dependency comparison to spot the error from this summary.
|
dependency-specific-text/{article9.txt β Novak Djokovic.txt}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "pobj" (object of preposition) dependency.
|
2 |
-
Furthermore, we only match *pobj* dependencies when the target word is "in"
|
3 |
-
The sentence here is not a factual error per se, but rather a readability issue. The "in" should be dropped to make the sentence correct.
|
4 |
-
For better examples with this specific dependency, try choosing another article.
|
|
|
1 |
+
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the *"pobj"* (object of preposition) dependency.
|
2 |
+
Furthermore, we only match *pobj* dependencies when the target word is *"in"*, as in this example.
|
3 |
+
The sentence here is not a factual error per se, but rather a readability issue. The *"in"* should be dropped to make the sentence correct.
|
4 |
+
For better examples with this specific dependency, try choosing another article.
|
dependency-specific-text/{article13.txt β OnePlus 10 Pro.txt}
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
-
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "amod" (adjectival modifier) dependency.
|
2 |
-
Applied to this summary, we have "First" as the entity, and it is the adjectival modifier of the word "phone"
|
3 |
And indeed, this unmatched dependency indicates an actual error here. The sentence is not factual, since the article talks about a **new** type of flagship phone,
|
4 |
and not the **first** flagship phone. This error was found by filtering on this specific kind of dependency. Empirical results showed that unmatched *amod* dependencies often suggest
|
5 |
that the summary sentence contains an error. <br> <br>
|
6 |
-
Another dependency that we use is the "pobj" (object of preposition) dependency.
|
7 |
-
Furthermore, we only match *pobj* dependencies when the target word is "in"
|
8 |
-
In this case the sentence itself contains a factual error (because the article states "there's no word on a US release date yet").
|
9 |
-
However, this could have been found
|
|
|
1 |
+
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the *"amod"* (adjectival modifier) dependency.
|
2 |
+
Applied to this summary, we have *"First"* as the entity, and it is the adjectival modifier of the word *"phone"*.
|
3 |
And indeed, this unmatched dependency indicates an actual error here. The sentence is not factual, since the article talks about a **new** type of flagship phone,
|
4 |
and not the **first** flagship phone. This error was found by filtering on this specific kind of dependency. Empirical results showed that unmatched *amod* dependencies often suggest
|
5 |
that the summary sentence contains an error. <br> <br>
|
6 |
+
Another dependency that we use is the *"pobj"* (object of preposition) dependency.
|
7 |
+
Furthermore, we only match *pobj* dependencies when the target word is *"in"*, as in this example.
|
8 |
+
In this case the sentence itself contains a factual error (because the article states *"there's no word on a US release date yet"*).
|
9 |
+
However, this could have already been found with entity matching (as *January 18* is unmatched), and the unmatched dependency can not be completely blamed for this error here.
|
dependency-specific-text/{article16.txt β Tencent Holdings.txt}
RENAMED
File without changes
|
dependency-specific-text/{article4.txt β White House.txt}
RENAMED
@@ -1,7 +1,7 @@
|
|
1 |
-
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the "amod" (adjectival modifier) dependency.
|
2 |
-
Applied to this summary, we have "Democratic" as the entity, and it is the adjectival modifier of the word "member"
|
3 |
And indeed, this unmatched dependency indicates an actual error here. The sentence is not factual for two reasons. <br> <br>
|
4 |
-
First, the article talks about "democrats" and "members of the committee"
|
5 |
that can be seen as not completely factual. Second, the statement itself was not made by a democrat (nor a member of the committee), and even though the dependency can't be
|
6 |
directly linked to this error, empirical results showed that unmatched *amod* dependencies often suggest
|
7 |
that the summary sentence is incorrect.
|
|
|
1 |
+
One of the dependencies that, when found in the summary but not in the article, indicates a possible error is the *"amod"* (adjectival modifier) dependency.
|
2 |
+
Applied to this summary, we have *"Democratic"* as the entity, and it is the adjectival modifier of the word *"member"*.
|
3 |
And indeed, this unmatched dependency indicates an actual error here. The sentence is not factual for two reasons. <br> <br>
|
4 |
+
First, the article talks about *"democrats"* and *"members of the committee"*, which are two separate things. The summary combines those two in a way
|
5 |
that can be seen as not completely factual. Second, the statement itself was not made by a democrat (nor a member of the committee), and even though the dependency can't be
|
6 |
directly linked to this error, empirical results showed that unmatched *amod* dependencies often suggest
|
7 |
that the summary sentence is incorrect.
|
entity-specific-text/Mark Levinson Airpods.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
As you can see we have 1 unmatched entity: *"U.S."* is a hallucinated entity in the summary, that does not exist in the article.
|
2 |
+
This error can be found/solved by applying entity matching.
|
entity-specific-text/{article4.txt β Novak Djokovic.txt}
RENAMED
File without changes
|
entity-specific-text/OnePlus 10 Pro.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
As you can see we have 2 unmatched entities: *"January 18"* and *"U.S"*. The first one is a hallucinated entity in the summary, that does not exist in the article.
|
2 |
+
*U.S*. **does** occur in the article, but as *"US"* instead of *"U.S."*. This could be solved
|
3 |
+
by comparing to a list of abbreviations or with a specific embedder for abbreviations but is currently not implemented.
|
entity-specific-text/Tencent Holdings.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
As you can see we have 1 unmatched entity: *"Six9"* is a hallucinated entity in the summary, that does not exist in the article.
|
2 |
+
This error can be found/solved by applying entity matching.
|
entity-specific-text/{article9.txt β White House.txt}
RENAMED
File without changes
|
entity-specific-text/article11.txt
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
As you can see we have 1 unmatched entity: "U.S." is a hallucinated entity in the summary, that does not exist in the article.
|
2 |
-
Deep learning based generation is [prone to hallucinate](https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade
|
3 |
-
system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem
|
4 |
-
for the downstream task of summary generation.
|
|
|
|
|
|
|
|
|
|
entity-specific-text/article13.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
As you can see we have 2 unmatched entities: "January 18" and "U.S". The first one is a hallucinated entity in the summary, that does not exist in the article.
|
2 |
-
Deep learning based generation is [prone to hallucinate](https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade
|
3 |
-
system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem
|
4 |
-
for the downstream task of summary generation. U.S. **does** occur in the article, but as "US" instead of "U.S.". This could be solved
|
5 |
-
by comparing to a list of abbreviations or with a specific embedder for abbreviations but is currently not implemented.
|
|
|
|
|
|
|
|
|
|
|
|
entity-specific-text/article16.txt
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
As you can see we have 1 unmatched entity: "Six9" is a hallucinated entity in the summary, that does not exist in the article.
|
2 |
-
Deep learning based generation is [prone to hallucinate](https://arxiv.org/pdf/2202.03629.pdf) unintended text. These hallucinations degrade
|
3 |
-
system performance and fail to meet user expectations in many real-world scenarios. By applying entity matching, we can improve this problem
|
4 |
-
for the downstream task of summary generation.
|
|
|
|
|
|
|
|
|
|
sample-articles-temp/numbers_to_text.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
4: White House
|
2 |
+
9: Novak Djokovic
|
3 |
+
11: Mark Levinson Airpods
|
4 |
+
13: OnePlus 10 Pro
|
5 |
+
16: Tencent Holdings
|
6 |
+
|
sample-articles/{article11.txt β Mark Levinson Airpods.txt}
RENAMED
File without changes
|
sample-articles/{article9.txt β Novak Djokovic.txt}
RENAMED
File without changes
|
sample-articles/{article13.txt β OnePlus 10 Pro.txt}
RENAMED
File without changes
|
sample-articles/{article16.txt β Tencent Holdings.txt}
RENAMED
File without changes
|
sample-articles/{article4.txt β White House.txt}
RENAMED
File without changes
|
sample-summaries/{article11.txt β Mark Levinson Airpods.txt}
RENAMED
File without changes
|
sample-summaries/{article9.txt β Novak Djokovic.txt}
RENAMED
File without changes
|
sample-summaries/{article13.txt β OnePlus 10 Pro.txt}
RENAMED
File without changes
|
sample-summaries/{article16.txt β Tencent Holdings.txt}
RENAMED
File without changes
|
sample-summaries/{article4.txt β White House.txt}
RENAMED
File without changes
|