Spaces:
Build error
Build error
Add logger
Browse filesAdd more debug messages
Add more information
Add logo and header
- app.py +13 -3
- assets/header.html +1 -0
- assets/logo.png +0 -0
- npc_bert_models/app_logger.py +34 -0
- npc_bert_models/cls_module.py +4 -0
- npc_bert_models/mlm_module.py +4 -0
- npc_bert_models/summary_module.py +6 -3
app.py
CHANGED
|
@@ -1,20 +1,23 @@
|
|
| 1 |
from logging import PlaceHolder
|
| 2 |
import gradio as gr
|
| 3 |
-
import os
|
| 4 |
from npc_bert_models.gradio_demo import *
|
| 5 |
from npc_bert_models.mlm_module import NpcBertMLM
|
| 6 |
from npc_bert_models.cls_module import NpcBertCLS
|
| 7 |
from npc_bert_models.summary_module import NpcBertGPT2
|
|
|
|
| 8 |
import json
|
| 9 |
|
| 10 |
-
|
| 11 |
class main_window():
|
|
|
|
| 12 |
def __init__(self):
|
| 13 |
self.interface = None
|
| 14 |
self.examples = json.load(open("examples.json", 'r'))
|
|
|
|
| 15 |
|
| 16 |
def initialize(self):
|
| 17 |
#! Initialize MLM
|
|
|
|
| 18 |
self.npc_mlm = NpcBertMLM()
|
| 19 |
self.npc_mlm.load()
|
| 20 |
|
|
@@ -40,6 +43,7 @@ class main_window():
|
|
| 40 |
inp.submit(fn=self.npc_mlm.__call__, inputs=inp, outputs=out)
|
| 41 |
|
| 42 |
#! Initialize report classification
|
|
|
|
| 43 |
self.npc_cls = NpcBertCLS()
|
| 44 |
self.npc_cls.load()
|
| 45 |
|
|
@@ -75,6 +79,7 @@ class main_window():
|
|
| 75 |
inp.submit(fn=self.npc_cls.__call__, inputs=inp, outputs=out)
|
| 76 |
|
| 77 |
#! Initialize report conclusion generation
|
|
|
|
| 78 |
self.npc_summary = NpcBertGPT2()
|
| 79 |
self.npc_summary.load()
|
| 80 |
|
|
@@ -85,6 +90,8 @@ class main_window():
|
|
| 85 |
In this example we explored how the fine-tunned BERT can aid summarizing the reported items and
|
| 86 |
generates a conclusion, which includes providing stages of the written reports.
|
| 87 |
|
|
|
|
|
|
|
| 88 |
# Disclaimer
|
| 89 |
|
| 90 |
Again, similar to the last experiement, the examples we list here are mock reports that are created
|
|
@@ -112,6 +119,9 @@ class main_window():
|
|
| 112 |
|
| 113 |
#! Create tab interface
|
| 114 |
with gr.Blocks() as self.interface:
|
|
|
|
|
|
|
|
|
|
| 115 |
gr.Markdown("""
|
| 116 |
# Introduction
|
| 117 |
|
|
@@ -137,7 +147,7 @@ class main_window():
|
|
| 137 |
tab_names=["Masked Language Model", "Report classification", "Report conclusion generation"])
|
| 138 |
|
| 139 |
def lauch(self):
|
| 140 |
-
self.interface.launch()
|
| 141 |
pass
|
| 142 |
|
| 143 |
def _set_report_file_helper(self, file_in):
|
|
|
|
| 1 |
from logging import PlaceHolder
|
| 2 |
import gradio as gr
|
| 3 |
+
import os, sys
|
| 4 |
from npc_bert_models.gradio_demo import *
|
| 5 |
from npc_bert_models.mlm_module import NpcBertMLM
|
| 6 |
from npc_bert_models.cls_module import NpcBertCLS
|
| 7 |
from npc_bert_models.summary_module import NpcBertGPT2
|
| 8 |
+
from npc_bert_models.app_logger import get_logger
|
| 9 |
import json
|
| 10 |
|
|
|
|
| 11 |
class main_window():
|
| 12 |
+
logger = get_logger('main')
|
| 13 |
def __init__(self):
|
| 14 |
self.interface = None
|
| 15 |
self.examples = json.load(open("examples.json", 'r'))
|
| 16 |
+
self.logger.info(f"Created {__class__.__name__} instance.")
|
| 17 |
|
| 18 |
def initialize(self):
|
| 19 |
#! Initialize MLM
|
| 20 |
+
self.logger.info("Loading MLM interface...")
|
| 21 |
self.npc_mlm = NpcBertMLM()
|
| 22 |
self.npc_mlm.load()
|
| 23 |
|
|
|
|
| 43 |
inp.submit(fn=self.npc_mlm.__call__, inputs=inp, outputs=out)
|
| 44 |
|
| 45 |
#! Initialize report classification
|
| 46 |
+
self.logger.info("Loading BERTCLS interface...")
|
| 47 |
self.npc_cls = NpcBertCLS()
|
| 48 |
self.npc_cls.load()
|
| 49 |
|
|
|
|
| 79 |
inp.submit(fn=self.npc_cls.__call__, inputs=inp, outputs=out)
|
| 80 |
|
| 81 |
#! Initialize report conclusion generation
|
| 82 |
+
self.logger.info("Loading Bert-GPT2 interface...")
|
| 83 |
self.npc_summary = NpcBertGPT2()
|
| 84 |
self.npc_summary.load()
|
| 85 |
|
|
|
|
| 90 |
In this example we explored how the fine-tunned BERT can aid summarizing the reported items and
|
| 91 |
generates a conclusion, which includes providing stages of the written reports.
|
| 92 |
|
| 93 |
+
> On this cloud node withonly 2 cpu, it takes ~60 second for this task.
|
| 94 |
+
|
| 95 |
# Disclaimer
|
| 96 |
|
| 97 |
Again, similar to the last experiement, the examples we list here are mock reports that are created
|
|
|
|
| 119 |
|
| 120 |
#! Create tab interface
|
| 121 |
with gr.Blocks() as self.interface:
|
| 122 |
+
# Logo
|
| 123 |
+
gr.HTML(open("./assets/header.html", 'r').read())
|
| 124 |
+
|
| 125 |
gr.Markdown("""
|
| 126 |
# Introduction
|
| 127 |
|
|
|
|
| 147 |
tab_names=["Masked Language Model", "Report classification", "Report conclusion generation"])
|
| 148 |
|
| 149 |
def lauch(self):
|
| 150 |
+
self.interface.launch(allowed_paths=['assets'])
|
| 151 |
pass
|
| 152 |
|
| 153 |
def _set_report_file_helper(self, file_in):
|
assets/header.html
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
<h1 style="border-bottom: 0.1em solid;background-image: url('/file=assets/logo.png');background-position: center right;background-size: auto 100%;background-repeat: no-repeat;padding-top:0.5em;font-size:32pt"> CUHK NPC-NLP Pilot Demo </h1>
|
assets/logo.png
ADDED
|
npc_bert_models/app_logger.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import inspect
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
log_format = "[%(asctime)-12s-%(levelname)s] (%(name)s) %(message)s"
|
| 7 |
+
|
| 8 |
+
def get_logger(logger_name: str = None, log_level: str = 'info') -> logging.Logger:
|
| 9 |
+
# Set up levels
|
| 10 |
+
log_levels = logging._nameToLevel
|
| 11 |
+
log_level = log_levels[log_level.upper()]
|
| 12 |
+
|
| 13 |
+
# Set up logger format
|
| 14 |
+
logger_name = logger_name or Path(inspect.getmodule(inspect.stack()[1][0]).__file__).name
|
| 15 |
+
logger = logging.getLogger(logger_name.rstrip('.py'))
|
| 16 |
+
logger.setLevel(log_level)
|
| 17 |
+
formatter = logging.Formatter(fmt = log_format)
|
| 18 |
+
handler = logging.StreamHandler(sys.stdout)
|
| 19 |
+
handler.setFormatter(formatter)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Set up exception hook
|
| 23 |
+
if logger_name == 'main':
|
| 24 |
+
def exception_hook(*args):
|
| 25 |
+
gettrace = getattr(sys, 'gettrace', None)
|
| 26 |
+
if not gettrace():
|
| 27 |
+
logger.error('Uncaught exception:')
|
| 28 |
+
logger.exception(args[-1], exc_info=args)
|
| 29 |
+
sys.excepthook = exception_hook
|
| 30 |
+
|
| 31 |
+
# Add handler to logger
|
| 32 |
+
logger.addHandler(handler)
|
| 33 |
+
logger.info(f"Created {logger = }")
|
| 34 |
+
return logger
|
npc_bert_models/cls_module.py
CHANGED
|
@@ -2,6 +2,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
| 2 |
from transformers import pipeline as hf_pipeline
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Any, Dict
|
|
|
|
| 5 |
|
| 6 |
class NpcBertCLS():
|
| 7 |
r"""A class for performing report classification with BERT.
|
|
@@ -21,12 +22,14 @@ class NpcBertCLS():
|
|
| 21 |
pretrained_model (str):
|
| 22 |
The path to the directory containing the fine-tuned model.
|
| 23 |
"""
|
|
|
|
| 24 |
def __init__(self):
|
| 25 |
self.model = None
|
| 26 |
self.tokenizer = None
|
| 27 |
self.pipeline = None
|
| 28 |
# relative to app.py
|
| 29 |
self.pretrained_model = "./models/npc-bert-cls"
|
|
|
|
| 30 |
|
| 31 |
def load(self) -> None:
|
| 32 |
"""Loads the fine-tuned BERT model and related components.
|
|
@@ -61,6 +64,7 @@ class NpcBertCLS():
|
|
| 61 |
Raises:
|
| 62 |
BrokenPipeError: If the model has not been loaded before calling this method.
|
| 63 |
"""
|
|
|
|
| 64 |
if self.pipeline is None:
|
| 65 |
msg = "Model was not initialized, have you run load()?"
|
| 66 |
raise BrokenPipeError(msg)
|
|
|
|
| 2 |
from transformers import pipeline as hf_pipeline
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Any, Dict
|
| 5 |
+
from .app_logger import get_logger
|
| 6 |
|
| 7 |
class NpcBertCLS():
|
| 8 |
r"""A class for performing report classification with BERT.
|
|
|
|
| 22 |
pretrained_model (str):
|
| 23 |
The path to the directory containing the fine-tuned model.
|
| 24 |
"""
|
| 25 |
+
logger = get_logger()
|
| 26 |
def __init__(self):
|
| 27 |
self.model = None
|
| 28 |
self.tokenizer = None
|
| 29 |
self.pipeline = None
|
| 30 |
# relative to app.py
|
| 31 |
self.pretrained_model = "./models/npc-bert-cls"
|
| 32 |
+
self.logger.info(f"Created {__class__.__name__} instance.")
|
| 33 |
|
| 34 |
def load(self) -> None:
|
| 35 |
"""Loads the fine-tuned BERT model and related components.
|
|
|
|
| 64 |
Raises:
|
| 65 |
BrokenPipeError: If the model has not been loaded before calling this method.
|
| 66 |
"""
|
| 67 |
+
self.logger.info(f"Called with {args = }")
|
| 68 |
if self.pipeline is None:
|
| 69 |
msg = "Model was not initialized, have you run load()?"
|
| 70 |
raise BrokenPipeError(msg)
|
npc_bert_models/mlm_module.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
| 2 |
from transformers import pipeline as hf_pipeline
|
| 3 |
from pathlib import Path
|
|
|
|
| 4 |
|
| 5 |
class NpcBertMLM():
|
| 6 |
r"""A class for performing masked language modeling with BERT.
|
|
@@ -21,12 +22,14 @@ class NpcBertMLM():
|
|
| 21 |
pretrained_model (str): The path to
|
| 22 |
the directory containing the fine-tuned model.
|
| 23 |
"""
|
|
|
|
| 24 |
def __init__(self):
|
| 25 |
self.model = None
|
| 26 |
self.tokenizer = None
|
| 27 |
self.pipeline = None
|
| 28 |
# relative to app.py
|
| 29 |
self.pretrained_model = "./models/npc-bert-best"
|
|
|
|
| 30 |
|
| 31 |
def load(self):
|
| 32 |
"""Loads the fine-tuned BERT model and related components.
|
|
@@ -61,6 +64,7 @@ class NpcBertMLM():
|
|
| 61 |
Raises:
|
| 62 |
BrokenPipeError: If the model has not been loaded before calling this method.
|
| 63 |
"""
|
|
|
|
| 64 |
if self.pipeline is None:
|
| 65 |
msg = "Model was not initialized, have you run load()?"
|
| 66 |
raise BrokenPipeError(msg)
|
|
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
| 2 |
from transformers import pipeline as hf_pipeline
|
| 3 |
from pathlib import Path
|
| 4 |
+
from .app_logger import get_logger
|
| 5 |
|
| 6 |
class NpcBertMLM():
|
| 7 |
r"""A class for performing masked language modeling with BERT.
|
|
|
|
| 22 |
pretrained_model (str): The path to
|
| 23 |
the directory containing the fine-tuned model.
|
| 24 |
"""
|
| 25 |
+
logger = get_logger()
|
| 26 |
def __init__(self):
|
| 27 |
self.model = None
|
| 28 |
self.tokenizer = None
|
| 29 |
self.pipeline = None
|
| 30 |
# relative to app.py
|
| 31 |
self.pretrained_model = "./models/npc-bert-best"
|
| 32 |
+
self.logger.info(f"Created {__class__.__name__} instance.")
|
| 33 |
|
| 34 |
def load(self):
|
| 35 |
"""Loads the fine-tuned BERT model and related components.
|
|
|
|
| 64 |
Raises:
|
| 65 |
BrokenPipeError: If the model has not been loaded before calling this method.
|
| 66 |
"""
|
| 67 |
+
self.logger.info(f"Called with {args = }")
|
| 68 |
if self.pipeline is None:
|
| 69 |
msg = "Model was not initialized, have you run load()?"
|
| 70 |
raise BrokenPipeError(msg)
|
npc_bert_models/summary_module.py
CHANGED
|
@@ -2,16 +2,17 @@ from transformers import AutoTokenizer, EncoderDecoderModel
|
|
| 2 |
from transformers import pipeline as hf_pipeline
|
| 3 |
from pathlib import Path
|
| 4 |
import re
|
| 5 |
-
|
| 6 |
|
| 7 |
class NpcBertGPT2():
|
| 8 |
-
|
| 9 |
def __init__(self):
|
| 10 |
self.model = None
|
| 11 |
self.tokenizer = None
|
| 12 |
self.pipeline = None
|
| 13 |
# relative to app.py
|
| 14 |
self.pretrained_model = "./models/npc-bert-gpt2-best"
|
|
|
|
| 15 |
|
| 16 |
def load(self):
|
| 17 |
"""Loads the fine-tuned EncoderDecoder model and related components.
|
|
@@ -59,8 +60,10 @@ class NpcBertGPT2():
|
|
| 59 |
if self.pipeline is None:
|
| 60 |
msg = "Model was not initialized, have you run load()?"
|
| 61 |
raise BrokenPipeError(msg)
|
|
|
|
|
|
|
|
|
|
| 62 |
pipe_out, = self.pipeline(*args)
|
| 63 |
-
|
| 64 |
pipe_out = pipe_out['generated_text']
|
| 65 |
|
| 66 |
# remove repeated lines by hard coding
|
|
|
|
| 2 |
from transformers import pipeline as hf_pipeline
|
| 3 |
from pathlib import Path
|
| 4 |
import re
|
| 5 |
+
from .app_logger import get_logger
|
| 6 |
|
| 7 |
class NpcBertGPT2():
|
| 8 |
+
logger = get_logger()
|
| 9 |
def __init__(self):
|
| 10 |
self.model = None
|
| 11 |
self.tokenizer = None
|
| 12 |
self.pipeline = None
|
| 13 |
# relative to app.py
|
| 14 |
self.pretrained_model = "./models/npc-bert-gpt2-best"
|
| 15 |
+
self.logger.info(f"Created {__class__.__name__} instance.")
|
| 16 |
|
| 17 |
def load(self):
|
| 18 |
"""Loads the fine-tuned EncoderDecoder model and related components.
|
|
|
|
| 60 |
if self.pipeline is None:
|
| 61 |
msg = "Model was not initialized, have you run load()?"
|
| 62 |
raise BrokenPipeError(msg)
|
| 63 |
+
|
| 64 |
+
logger.info(f"Called with arguments {args = }")
|
| 65 |
+
|
| 66 |
pipe_out, = self.pipeline(*args)
|
|
|
|
| 67 |
pipe_out = pipe_out['generated_text']
|
| 68 |
|
| 69 |
# remove repeated lines by hard coding
|