Spaces:

CZLC
/

rouge_raw

Runtime error

App Files Files Community

Martin Dočekal commited on Feb 2, 2024

Commit

732e363

1 Parent(s): d38d998

init. code for ROUGERaw wrapper

Browse files

Files changed (3) hide show

README.md +87 -6
app.py +12 -0
rouge_raw.py +231 -0

README.md CHANGED Viewed

@@ -1,12 +1,93 @@
 ---
-title: Rouge Raw
-emoji: 🏢
-colorFrom: yellow
-colorTo: gray
 sdk: gradio
-sdk_version: 4.16.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: RougeRaw
+emoji: 🤗
+colorFrom: blue
+colorTo: red
 sdk: gradio
+sdk_version: 3.19.1
 app_file: app.py
 pinned: false
+tags:
+- evaluate
+- metric
+description: >-
+  ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas.
+  This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
 ---
+# Metric Card for RougeRaw
+## Metric Description
+ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas.
+  This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
+## How to Use
+```python
+>>> rougeraw = evaluate.load('CZLC/rouge_raw')
+>>> predictions = ["the cat is on the mat", "hello there"]
+>>> references = ["the cat is on the mat", "hello there"]
+>>> results = rougeraw.compute(predictions=predictions, references=references)
+>>> print(results)
+{'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
+```
+### Inputs
+predictions: list of predictions to evaluate. Each prediction should be a string with tokens separated by spaces.
+references: list of reference for each prediction. Each reference should be a string with tokens separated by space
+### Output Values
+- rougeraw1_precision
+- rougeraw1_recall
+- rougeraw1_fmeasure
+- rougeraw2_precision
+- rougeraw2_recall
+- rougeraw2_fmeasure
+- rougerawl_precision
+- rougerawl_recall
+- rougerawl_fmeasure
+Output Example(s):
+```python
+{'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
+```
+This metric outputs a dictionary, containing the scores.
+## Citation(s)
+```bibtex
+@inproceedings{straka-etal-2018-sumeczech,
+    title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
+    author = "Straka, Milan  and
+      Mediankin, Nikita  and
+      Kocmi, Tom  and
+      {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k  and
+      Hude{\v{c}}ek, Vojt{\v{e}}ch  and
+      Haji{\v{c}}, Jan",
+    editor = "Calzolari, Nicoletta  and
+      Choukri, Khalid  and
+      Cieri, Christopher  and
+      Declerck, Thierry  and
+      Goggi, Sara  and
+      Hasida, Koiti  and
+      Isahara, Hitoshi  and
+      Maegaard, Bente  and
+      Mariani, Joseph  and
+      Mazo, H{\'e}l{\`e}ne  and
+      Moreno, Asuncion  and
+      Odijk, Jan  and
+      Piperidis, Stelios  and
+      Tokunaga, Takenobu",
+    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
+    month = may,
+    year = "2018",
+    address = "Miyazaki, Japan",
+    publisher = "European Language Resources Association (ELRA)",
+    url = "https://aclanthology.org/L18-1551",
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# -*- coding: UTF-8 -*-
+"""
+Created on 02.02.24
+:author:     Martin Dočekal
+"""
+import evaluate
+from evaluate.utils import launch_gradio_widget
+module = evaluate.load("accuracy")
+launch_gradio_widget(module)

rouge_raw.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# -*- coding: UTF-8 -*-
+"""
+Created on 02.02.24
+Module for raw ROUGE score calculation from:
+@inproceedings{straka-etal-2018-sumeczech,
+    title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
+    author = "Straka, Milan  and
+      Mediankin, Nikita  and
+      Kocmi, Tom  and
+      {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k  and
+      Hude{\v{c}}ek, Vojt{\v{e}}ch  and
+      Haji{\v{c}}, Jan",
+    editor = "Calzolari, Nicoletta  and
+      Choukri, Khalid  and
+      Cieri, Christopher  and
+      Declerck, Thierry  and
+      Goggi, Sara  and
+      Hasida, Koiti  and
+      Isahara, Hitoshi  and
+      Maegaard, Bente  and
+      Mariani, Joseph  and
+      Mazo, H{\'e}l{\`e}ne  and
+      Moreno, Asuncion  and
+      Odijk, Jan  and
+      Piperidis, Stelios  and
+      Tokunaga, Takenobu",
+    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
+    month = may,
+    year = "2018",
+    address = "Miyazaki, Japan",
+    publisher = "European Language Resources Association (ELRA)",
+    url = "https://aclanthology.org/L18-1551",
+}
+:author:     Martin Dočekal
+"""
+import re
+from typing import Sequence
+import datasets
+import evaluate
+class RougeRaw:
+    """
+    This is the original implementation of the ROUGERaw metric.
+    Compute RougeRAW-1, RougeRAW-2, RougeRAW-L metrics.
+    """
+    class FScore:
+        """F1 score representation."""
+        def __init__(self, correct, gold, system):
+            self.p = correct / system if system else 0.
+            self.r = correct / gold if gold else 0.
+            self.f = 2 * correct / (system + gold) if system + gold else 0.
+    def _rouge_n(self, n, gold_words, system_words):
+        """Compute Rouge-n for given words."""
+        def n_grams(n, words):
+            ngrams = {}
+            total = 0
+            for i in range(len(words) - n + 1):
+                ngram = "\t".join(words[i:i + n])
+                ngrams[ngram] = 1 + ngrams.get(ngram, 0)
+                total += 1
+            return ngrams, total
+        gold_ngrams, gold_total = n_grams(n, gold_words)
+        system_ngrams, system_total = n_grams(n, system_words)
+        intersection = 0
+        for ngram in system_ngrams:
+            intersection += min(system_ngrams[ngram], gold_ngrams.get(ngram, 0))
+        return self.FScore(intersection, gold_total, system_total)
+    def _rouge_l(self, gold_words, system_words):
+        """Compute Rouge-L for given words."""
+        lcs = [[0] * len(system_words) for _ in gold_words]
+        for r in range(len(gold_words)):
+            for s in range(len(system_words)):
+                if gold_words[r] == system_words[s]:
+                    lcs[r][s] = 1 + (lcs[r - 1][s - 1] if r and s else 0)
+                lcs[r][s] = max(lcs[r][s], lcs[r - 1][s] if r else 0)
+                lcs[r][s] = max(lcs[r][s], lcs[r][s - 1] if s else 0)
+        return self.FScore(lcs[-1][-1], len(gold_words), len(system_words))
+    def _tokenize(self, text):
+        """Tokenize given text."""
+        return re.sub(r"\s+", " ", re.sub(r"\b", " ", text, re.UNICODE), re.UNICODE).strip().split(" ")
+    def document(self, gold, system):
+        """Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given documents.
+        Each document should be a string.
+        """
+        assert isinstance(gold, str) and isinstance(system, str), "Expected string arguments"
+        lc_gold_words = [word.lower() for word in self._tokenize(gold)]
+        lc_system_words = [word.lower() for word in self._tokenize(system)]
+        return {
+            "1": self._rouge_n(1, lc_gold_words, lc_system_words),
+            "2": self._rouge_n(2, lc_gold_words, lc_system_words),
+            "L": self._rouge_l(lc_gold_words, lc_system_words),
+        }
+    def corpus(self, gold, system):
+        """Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given corpora.
+        Each corpus should be a collection of documents, each document a string.
+        """
+        assert isinstance(gold, list) and isinstance(system, list), "Expected list arguments"
+        assert len(gold) == len(system), "Given corpora should be of the same length"
+        rouge = {key: self.FScore(0, 0, 0) for key in ["1", "2", "L"]}
+        if len(gold):
+            for gold_document, system_document in zip(gold, system):
+                for key, value in self.document(gold_document, system_document).items():
+                    rouge[key].p += value.p
+                    rouge[key].r += value.r
+                    rouge[key].f += value.f
+            for key in rouge:
+                rouge[key].p /= len(gold)
+                rouge[key].r /= len(gold)
+                rouge[key].f /= len(gold)
+        return rouge
+_CITATION = """\
+@inproceedings{straka-etal-2018-sumeczech,
+    title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
+    author = "Straka, Milan  and
+      Mediankin, Nikita  and
+      Kocmi, Tom  and
+      {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k  and
+      Hude{\v{c}}ek, Vojt{\v{e}}ch  and
+      Haji{\v{c}}, Jan",
+    editor = "Calzolari, Nicoletta  and
+      Choukri, Khalid  and
+      Cieri, Christopher  and
+      Declerck, Thierry  and
+      Goggi, Sara  and
+      Hasida, Koiti  and
+      Isahara, Hitoshi  and
+      Maegaard, Bente  and
+      Mariani, Joseph  and
+      Mazo, H{\'e}l{\`e}ne  and
+      Moreno, Asuncion  and
+      Odijk, Jan  and
+      Piperidis, Stelios  and
+      Tokunaga, Takenobu",
+    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
+    month = may,
+    year = "2018",
+    address = "Miyazaki, Japan",
+    publisher = "European Language Resources Association (ELRA)",
+    url = "https://aclanthology.org/L18-1551",
+}
+"""
+_DESCRIPTION = """\
+ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas.
+This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
+"""
+_KWARGS_DESCRIPTION = """
+ROCUE RAW metric for list of predictions and references.
+Args:
+    predictions: list of predictions to evaluate. Each prediction should be a string with tokens separated by spaces.
+    references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces.
+Returns:
+    rougeraw1_precision
+    rougeraw1_recall
+    rougeraw1_fmeasure
+    rougeraw2_precision
+    rougeraw2_recall
+    rougeraw2_fmeasure
+    rougerawl_precision
+    rougerawl_recall
+    rougerawl_fmeasure
+Examples:
+    >>> rougeraw = evaluate.load('CZLC/rouge_raw')
+    >>> predictions = ["the cat is on the mat", "hello there"]
+    >>> references = ["the cat is on the mat", "hello there"]
+    >>> results = rougeraw.compute(predictions=predictions, references=references)
+    >>> print(results)
+    {'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
+"""
+@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Rouge(evaluate.Metric):
+    def _info(self):
+        return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=[
+                datasets.Features(
+                    {
+                        "predictions": datasets.Value("string", id="sequence"),
+                        "references": datasets.Value("string", id="sequence"),
+                    }
+                ),
+            ],
+            reference_urls=[
+                "http://hdl.handle.net/11234/1-2615",
+            ],
+        )
+    def _compute(self, predictions: Sequence[str], references: Sequence[str]):
+        res = RougeRaw().corpus(references, predictions)
+        return {
+            "rougeraw1_precision": res["1"].p,
+            "rougeraw1_recall": res["1"].r,
+            "rougeraw1_fmeasure": res["1"].f,
+            "rougeraw2_precision": res["2"].p,
+            "rougeraw2_recall": res["2"].r,
+            "rougeraw2_fmeasure": res["2"].f,
+            "rougerawl_precision": res["L"].p,
+            "rougerawl_recall": res["L"].r,
+            "rougerawl_fmeasure": res["L"].f,
+        }