Spaces:

DSJA
/

headlineindex

Sleeping

File size: 40,779 Bytes

# -*- coding: utf-8 -*-
"""제목주의지수 (4).ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1v2tMK6_NdEthlQJAU-Hipwkprq70y2jt
"""

import os
import re
import sys
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration
from sentence_transformers import SentenceTransformer, util

import re, math, json, numpy as np, pandas as pd, torch
from typing import List, Dict, Tuple, Any
from collections import Counter
import argparse

DEVICE = ("cuda" if torch.cuda.is_available()
          else "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available()
          else "cpu")
SIM_DEVICE = "cpu" if DEVICE == "mps" else DEVICE
print(f"[INFO] Gen Device: {DEVICE} | Sim Device: {SIM_DEVICE}")

exag = {'가득': 2, '가세': 2, '가속': 2, '강력': 1, '강하다': 1, '거품': 3, '격돌': 1, '격앙': 1, '격차': 1, '경악': 1, '고비': 2, '고삐': 1, '고조': 2, '고지': 3, '고통': 3, '공세': 1, '공포': 1, '과장': 1, '광폭': 2, '광풍': 3, '괴물': 2, '구원투수': 3, '굴욕': 3, '극적': 2, '극찬': 2, '글쎄': 2, '급감': 2, '급등': 2, '급발진': 2, '급속': 2, '기승': 1, '기적': 2, '깜짝': 1, '껑충': 2, '꼴찌': 3, '꼼수': 1, '꽁꽁': 2, '꽂히다': 1, '꿀꺽': 2, '꿈틀': 1, '끔찍': 1, '난리': 2, '난항': 1, '날다': 1, '날벼락': 3, '냉각': 2, '넘치다': 1, '논란': 1, '놀라다': 1, '눈덩이': 2, '눈물': 2, '당장': 2, '대규모': 2, '대란': 3, '대박': 3, '대반전': 2, '대폭': 2, '대환영': 2, '덕분': 1, '돌파구': 2, '돌풍': 4, '뒷걸음질': 2, '뒷북': 3, '든든한': 2, '들썩': 1, '떡락': 3, '떡상': 3, '뚝딱': 2, '뚝뚝': 2, '뜨겁다': 2, '러브콜': 3, '레전드': 4, '막차': 3, '만능': 1, '매우': 2, '맵다': 2, '멘붕': 2, '몸살': 3, '무더기': 2, '급물살': 1, '뭇매': 2, '뭉칫돈': 2, '밉다': 3, '바람': 2, '박살': 3, '반전': 1, '반짝': 2, '발칵': 1, '방긋': 2, '방점': 2, '배신': 3, '벌써': 1, '벼랑': 3, '봇물': 2, '부담': 1, '분노': 3, '분수령': 2, '불가피': 1, '불과': 2, '불금': 2, '불기둥': 2, '불꽃': 2, '불똥': 2, '불씨': 1, '불안하다': 2, '불투명': 1, '불확실': 1, '붕괴': 1, '비명': 3, '뻥튀기': 2, '사상': 2, '상급': 3, '상승': 1, '선방': 1, '설상가상': 3, '성큼': 2, '소름': 1, '속출': 2, '손절': 2, '솔솔': 1, '쇼크': 3, '수백': 2, '수상한': 2, '수혈': 2, '순항': 1, '승기': 3, '시름': 2, '신기록': 2, '실망': 1, '심각': 1, '싹쓸이': 3, '쏟아지다': 1, '쓰리다': 2, '아비규환': 2, '악몽': 3, '악재': 1, '안간힘': 2, '안갯속': 2, '안도': 1, '알짜': 1, '압도적': 3, '압승': 3, '야심작': 3, '얼어붙다': 2, '역대': 2, '역대 최고': 2, '역대 최다': 2, '역대 최소': 2, '역대 최저 ': 2, '역대최고': 2, '역대최다': 2, '역대최소': 2, '역대최저 ': 2, '열풍': 3, '영광': 2, '영웅': 3, '오락가락': 2, '온기': 2, '와르르': 3, '와우': 3, '완패': 3, '외면': 2, '외환위기 이후': 2, '외환 위기 이후': 2, '요동치다': 2, '우뚝': 2, '우려': 1, '울다': 2, '위기급': 4, '위기': 3, '위축': 1, '위태': 2, '위협': 1, '유력': 2, '육박': 2, '의혹': 1, '잔치': 3, '잘나가다': 2, '재난급': 4, '저격': 3, '전격': 1, '전설': 3, '절대': 2, '절벽': 4, '족쇄': 2, '주의보': 2, '줄줄이': 2, '중증': 3, '증발': 2, '직격탄': 2, '진통': 2, '질타': 3, '쪽박': 2, '참담': 2, '척척': 2, '초대형': 2, '초비상': 2, '초유': 2, '초토화': 2, '촉각': 2, '최대': 2, '최상': 2, '최선': 2, '최악': 2, '최애': 2, '최저': 2, '최적': 1, '최초': 2, '최후': 2, '추락': 4, '출혈': 2, '충격': 1, '코앞': 3, '털썩': 2, '톡톡': 2, '투톱': 3, '특급': 4, '파격': 1, '편법': 1, '폭락': 3, '폭발': 2, '폭주': 2, '폭증': 2, '폭탄': 2, '폭풍': 2, '하락': 1, '한숨': 2, '함박': 3, '함정': 2, '허리띠': 1, '헌정 사상': 2, '헌정사상': 2, '혁명': 2, '호소': 1, '호평일색': 2, '호평 일색': 2, '호황': 3, '혼돈': 2, '홈런': 2, '확대': 1, '활기': 2, '활발': 1, '활짝': 2, '활활': 2, '후끈': 2, '훨훨': 2, '휩쓸다': 2, '흔들다': 2, 'imf 이후': 2, '역대급': 4, '무궁무진': 2, '1보': 1, '2보': 1, '3보': 1, '단독': 1, '속보': 1, '패닉': 3, '불패': 3, '제동': 2, '조짐': 1, '초긴장': 2, '급제동': 2, '뚝': 2, '복병': 2, '아우성': 3, '좌불안석': 3, '빈손': 2, '대세': 3, '생트집': 3, '주춤': 2, '끄덕': 2, '맞불': 2, '장벽': 2, '썰렁': 2, '먹구름': 3, '부메랑': 2, '롤러코스터': 2, '발목': 2, '반토막': 2, '휘청': 2, '곤두박질': 3, '울상': 2, '위풍당당': 3, '싸늘': 2, '주저': 1, '우수수': 2, '골머리': 2, '공화국': 3, '고공행진': 4}
econ_list = ['(?<![가-힣])정부(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])한국은행(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])기준금리(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])인플레이션(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])디스인플레이션(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])환율(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])재정적자(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])국채(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])세제개편(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])복지지출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])복지[\\s-]?지출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])복지[\\s-]?지출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])복지[\\s-]?지출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])복지\\-지출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])긴축(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])확장재정(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])통화정책(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])금통위(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기[\\s-]?둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기[\\s-]?둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기[\\s-]?둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기\\-둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기침체(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경기반등(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])잠재성장률(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])생산성(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])반도체(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])배터리(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전기차(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])데이터센터(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])AI(?![A-Za-z])', '(?<![A-Za-z])AI(?![A-Za-z])', '(?<![가-힣])인공지능(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])로봇(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])플랫폼(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])빅테크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])스타트업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])구조조정(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])M\\&A(?![A-Za-z])', '(?<![A-Za-z])IPO(?![A-Za-z])', '(?<![A-Za-z])IPO(?![A-Za-z])', '(?<![가-힣])상장(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])리콜(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])리쇼어링\\(reshoring\\)(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공급망\\(supply\\ chain\\)(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])중국리스크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])임금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])최저임금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])실업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])고용지표(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])비정규직(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])노동시간(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])노동[\\s-]?시간(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])노동[\\s-]?시간(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주거비(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계부채(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계[\\s-]?부채(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계[\\s-]?부채(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주담대(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])연체율(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])파산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])자영업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])청년실업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])청년[\\s-]?실업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])청년[\\s-]?실업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])여성고용(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])여성[\\s-]?고용(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])여성[\\s-]?고용(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])근로시간제(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])증시(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])코스피(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])코스닥(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])채권(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])은행(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])예대금리차(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])예대[\\s-]?금리차(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])예대[\\s-]?금리차(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])예금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])대출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])유동성(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])PF(?![A-Za-z])', '(?<![A-Za-z])PF(?![A-Za-z])', '(?<![가-힣])프로젝트파이낸싱(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])증권사(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])자본확충(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])자본[\\s-]?확충(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])자본[\\s-]?확충(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공매도(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])ETF(?![A-Za-z])', '(?<![가-힣])디지털자산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])디지털[\\s-]?자산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])디지털[\\s-]?자산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])암호화폐(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])스테이블코인(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])규제(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])부동산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주택공급(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주택[\\s-]?공급(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주택[\\s-]?공급(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])분양가상한제(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])분양가[\\s-]?상한제(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])분양가[\\s-]?상한제(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])재건축(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])재개발(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])용도지역(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])신도시(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])역세권(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공임대(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공[\\s-]?임대(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공[\\s-]?임대(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])토지거래허가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣A-Za-z])건설사\\s*PF(?![가-힣A-Za-z])', '(?<![가-힣A-Za-z])건설사\\s*PF(?![가-힣A-Za-z])', '(?<![가-힣A-Za-z])건설사\\s*PF(?![가-힣A-Za-z])', '(?<![A-Za-z])SOC(?![A-Za-z])', '(?<![가-힣])교통망(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])미분양(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])월세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])외식물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])생활물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])국제유가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])곡물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전기요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전기[\\s-]?요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전기[\\s-]?요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가스요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가스[\\s-]?요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가스[\\s-]?요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공[\\s-]?요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공[\\s-]?요금(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전력시장(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전력[\\s-]?시장(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전력[\\s-]?시장(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])SMP(?![A-Za-z])', '(?<![A-Za-z])SMP(?![A-Za-z])', '(?<![가-힣])전력도매가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원전(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])태양광(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])풍력(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수소(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])탄소배출권\\(ETS\\)(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])RE100(?![A-Za-z])', '(?<![가-힣])수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])무역수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])무역[\\s-]?수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])무역[\\s-]?수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경상수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경상[\\s-]?수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경상[\\s-]?수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])달러인덱스(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])달러[\\s-]?인덱스(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])달러[\\s-]?인덱스(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원화[\\s-]?약세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원화[\\s-]?약세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원화[\\s-]?약세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원화[\\s-]?강세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원화[\\s-]?강세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])원화[\\s-]?강세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])통상마찰(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])관세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])대미\\(IRA\\)(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])대EU\\(CBAM\\)(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])대중수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])대중[\\s-]?수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])대중[\\s-]?수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])반도체수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])반도체[\\s-]?수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])반도체[\\s-]?수출(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])프렌드쇼어링\\(friend\\-shoring\\)(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])오픈뱅킹(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])오픈[\\s-]?뱅킹(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])오픈[\\s-]?뱅킹(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])핀테크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])마이데이터(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])마이[\\s-]?데이터(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])마이[\\s-]?데이터(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])디지털세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])규제샌드박스(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])클라우드(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])SaaS(?![A-Za-z])', '(?<![가-힣])데이터경제(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])개인정보(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])양극화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])자산격차(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])소득분배(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])청년부담(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])노인빈곤(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])세대갈등(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])지역균형(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])지방소멸(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주거불안(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])시스템리스크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])시스템[\\s-]?리스크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])시스템[\\s-]?리스크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])그림자금융(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])그림자[\\s-]?금융(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])그림자[\\s-]?금융(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])역전세(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])연쇄부도(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])연쇄[\\s-]?부도(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])연쇄[\\s-]?부도(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])디폴트(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])신용스프레드(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])신용[\\s-]?스프레드(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])신용[\\s-]?스프레드(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])CDS프리미엄(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])신용경색(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])고금리[\\s-]?장기화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])고금리[\\s-]?장기화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])고금리[\\s-]?장기화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계부채\\ 관리\\ 강화(?![가-힣])', '(?<![가-힣])부동산\\ PF\\ 부실(?![가-힣])', '(?<![가-힣])공공요금[\\s-]?인상(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공요금[\\s-]?인상(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])공공요금[\\s-]?인상(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출[\\s-]?반등(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출[\\s-]?반등(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출[\\s-]?반등(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출[\\s-]?둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출[\\s-]?둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출[\\s-]?둔화(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])정책[\\s-]?불확실성(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])정책[\\s-]?불확실성(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])정책[\\s-]?불확실성(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])관치금융(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])밸류업(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])임금\\-물가[\\s-]?악순환(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])임금\\-물가[\\s-]?악순환(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])임금\\-물가[\\s-]?악순환(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])투자[\\s-]?위축(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])투자[\\s-]?위축(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])투자[\\s-]?위축(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])인바운드\\ 관광\\ 회복(?![가-힣])', '(?<![가-힣])기후리스크(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])CPI(?![A-Za-z])', '(?<![A-Za-z])CPI(?![A-Za-z])', '(?<![가-힣])소비자[\\s-]?물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])소비자[\\s-]?물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])소비자[\\s-]?물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])근원물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])근원[\\s-]?물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])근원[\\s-]?물가(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])PPI(?![A-Za-z])', '(?<![A-Za-z])PMI(?![A-Za-z])', '(?<![A-Za-z])GDP(?![A-Za-z])','(?<![A-Za-z])IPI(?![A-Za-z])', '(?<![A-Za-z])IPI(?![A-Za-z])', '(?<![가-힣])광공업[\\s-]?생산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])광공업[\\s-]?생산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])광공업[\\s-]?생산(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])소비자심리지수(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])기대인플레(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])고용동향(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계신용(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계[\\s-]?신용(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])가계[\\s-]?신용(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경상수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경상[\\s-]?수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])경상[\\s-]?수지(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출입[\\s-]?통계(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출입[\\s-]?통계(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])수출입[\\s-]?통계(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])주택가격지수(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])전세가격지수(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])미분양통계(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])미분양[\\s-]?통계(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])미분양[\\s-]?통계(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![A-Za-z])FOMC(?![A-Za-z])', '(?<![A-Za-z])ECB(?![A-Za-z])', '(?<![A-Za-z])BOJ(?![A-Za-z])', '(?<![가-힣])금통위(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])OPEC\\+[\\s-]?회의(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])OPEC\\+[\\s-]?회의(?:은|는|이|가|을|를)?(?![가-힣])', '(?<![가-힣])OPEC\\+[\\s-]?회의(?:은|는|이|가|을|를)?(?![가-힣])']

RE_BULLETS = re.compile(r"[■◆◇]")
RE_GUIDE   = re.compile(r"^\*.*$|^※.*$", flags=re.MULTILINE)
RE_ROLES   = re.compile(r"^(진행|앵커|출연)\s*:\s*.*$", flags=re.MULTILINE)
RE_EMAIL   = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
RE_EXTRA   = re.compile(r"대담 발췌\s*:\s*.*$", flags=re.MULTILINE)
RE_MULTINL = re.compile(r"\n+")
RE_LSTRIP  = re.compile(r"^\s+", flags=re.MULTILINE)

def preprocess_text(text: str) -> str:
    if not isinstance(text, str):
        return ""
    text = RE_BULLETS.sub("", text)
    text = RE_GUIDE.sub("", text)
    text = RE_ROLES.sub("", text)
    text = RE_EMAIL.sub("", text)
    text = RE_EXTRA.sub("", text)
    text = RE_MULTINL.sub("\n", text).strip()
    text = RE_LSTRIP.sub("", text)
    return text

def sentence_split(text: str):
    if not isinstance(text, str):
        text = "" if text is None else str(text)
    text = text.replace("\n", ".")
    text = re.sub(r"\.{2,}", ".", text)
    return [s.strip() for s in text.split("다.") if s.strip()]

def top5_title_body_sim(title: str, body_text: str, sbert) -> float:
    sents = sentence_split(body_text)
    if not sents:
        return float("nan")
    title_emb = sbert.encode(title, convert_to_tensor=True, normalize_embeddings=True)
    sent_embs = sbert.encode(sents, convert_to_tensor=True, normalize_embeddings=True)
    sims = util.pytorch_cos_sim(title_emb, sent_embs)[0].detach().cpu().numpy().tolist()
    sims.sort(reverse=True)
    return float(np.mean(sims[:5])) if sims else float("nan")

# 필요한 모델 불러오기
_tok = _bart = _sbert = None
def load_models():
    global _tok, _bart, _sbert
    if _tok is None or _bart is None:
        _tok = PreTrainedTokenizerFast.from_pretrained("digit82/kobart-summarization")
        if _tok.pad_token is None:
            _tok.pad_token = _tok.eos_token
        _tok.model_max_length = 1024
        _bart = BartForConditionalGeneration.from_pretrained("digit82/kobart-summarization")
        _bart.eval().to(DEVICE)
        if DEVICE == "cuda":
            try:
                _bart.half()
            except Exception:
                pass
    if _sbert is None:
        _sbert = SentenceTransformer("snunlp/KR-SBERT-V40K-klueNLI-augSTS", device=SIM_DEVICE)
    return _tok, _bart, _sbert

@torch.inference_mode()
def summarize(tok, model, text: str, max_new_tokens: int = 160) -> str:
    if not text:
        return ""
    enc = tok(text, return_tensors="pt", truncation=True, max_length=1024, padding=False)
    out = model.generate(
        input_ids=enc["input_ids"].to(DEVICE),
        attention_mask=enc["attention_mask"].to(DEVICE),
        max_new_tokens=max_new_tokens,
        num_beams=4,
        no_repeat_ngram_size=3,
        length_penalty=1.0,
        early_stopping=True,
        use_cache=True
    )
    return tok.decode(out[0], skip_special_tokens=True)

# 과장 표현 정규화 예외 설정
NORM_RULES = [
    (r'외환\s*위기\s*이후', '외환위기이후'),
    (r'IMF\s*이후', 'IMF이후'),
    (r'imf\s*이후', 'imf이후'),
    (r'IMF\s*급', 'IMF급'),
    (r'imf\s*급', 'imf급'),
    (r'호평\s*일색', '호평일색'),
    (r'헌정\s*사상', '헌정사상'),
    (r'역대\s*최고', '역대최고'),
    (r'역대\s*최다', '역대최다'),
    (r'역대\s*최소', '역대최소'),
    (r'역대\s*최저', '역대최저'),
]
USER_TERMS = [
    '대반전',
    '외환위기이후',
    '위기급', '재난급', '급물살',
    'IMF이후', 'imf이후',
    'IMF급', 'imf급',
    '역대최고', '역대최다', '역대최소', '역대최저',
    '역대급',
    '떡상', '떡락',
    '호평일색',
    '헌정사상',
]
def normalize_expressions(text: str) -> str:
    t = text if isinstance(text, str) else ""
    for pat, rep in NORM_RULES:
        t = re.sub(pat, rep, t)
    return t

_score_map: Dict[str, int] = None
_unique_expr: List[str] = None
_lex_pats: List[re.Pattern] = None
_kiwi = None

def _load_label_score_map_from_dict(exag_dict: Dict[str, int]) -> Tuple[Dict[str,int], List[str]]:
    """ exag 딕셔너리에서 점수 맵/표현 리스트 생성 """
    score_map: Dict[str, int] = {}
    for k, v in (exag_dict or {}).items():
        key = re.sub(r"\s+", "", str(k)).strip()
        try:
            val = int(v)
        except Exception:
            val = 0
        if key:
            if key in score_map:
                score_map[key] = max(score_map[key], val)
            else:
                score_map[key] = val
    unique_expr = sorted(score_map.keys())
    return score_map, unique_expr

def _compile_patterns_from_list(regex_list: List[str]) -> List[re.Pattern]:
    """ econ_list 문자열 배열에서 정규식 패턴 컴파일 """
    pats: List[re.Pattern] = []
    for p in (regex_list or []):
        if not isinstance(p, str):
            continue
        pat = p.strip()
        if not pat:
            continue
        try:
            pats.append(re.compile(pat, re.I))
        except re.error:
            # 잘못된 패턴은 무시
            pass
    return pats

def _build_kiwi(unique_expr: List[str]):
    """ Kiwi > Okt > regex 순으로 형태소/토큰 추출기 준비 """
    # 1) kiwipiepy
    try:
        from kiwipiepy import Kiwi
        kiwi = Kiwi()
        for w in USER_TERMS:
            kiwi.add_user_word(w, 'NNG', 10)
        for w in unique_expr:
            if isinstance(w, str) and len(w) >= 2:
                kiwi.add_user_word(w, 'NNG', 9)
        return kiwi, "kiwi"
    except Exception:
        pass
    # 2) konlpy Okt
    try:
        from konlpy.tag import Okt
        _okt = Okt()
        def _okt_extract(text: str):
            norm = normalize_expressions(text)
            # 명사/동사만
            return [w for w, t in _okt.pos(norm, norm=True, stem=True) if t in ("Noun","Verb")]
        return _okt_extract, "okt"
    except Exception:
        # 3) 정규식 토큰 나누기
        def _regex_extract(text: str):
            norm = normalize_expressions(text)
            return re.findall(r"[가-힣A-Za-z0-9]+", norm)
        return _regex_extract, "regex"

def _ensure_resources():
    global _score_map, _unique_expr, _lex_pats, _kiwi
    try:
        exag_dict = exag
    except NameError:
        raise RuntimeError("exag 딕셔너리가 정의되어 있지 않습니다. exag = {'표현': 점수, ...} 형태로 먼저 정의하세요.")
    if _score_map is None or _unique_expr is None:
        _score_map, _unique_expr = _load_label_score_map_from_dict(exag_dict)
    if _lex_pats is None:
        try:
            econ = econ_list
        except NameError:
            econ = []
        _lex_pats = _compile_patterns_from_list(econ)
    if _kiwi is None:
        _kiwi, _ = _build_kiwi(_unique_expr)

# 형태소 추출(명사+동사)
def extract_noun_verb_kiwi(text: str) -> List[str]:
    _ensure_resources()
    norm = normalize_expressions(text)
    try:
        from kiwipiepy import Kiwi
        if isinstance(_kiwi, Kiwi):
            toks = []
            for tok in _kiwi.tokenize(norm):
                tag = tok.tag
                if tag.startswith("NN"):
                    toks.append(tok.form)
                elif tag == "VV":
                    toks.append(tok.lemma if tok.lemma else tok.form)
            return toks
    except Exception:
        pass
    return _kiwi(norm)

# 과장 라벨 점수 계산 및 가중치 산출
def _calc_raw_and_count(tokens: List[str]) -> Tuple[int, int]:
    _ensure_resources()
    if not isinstance(tokens, (list, tuple)):
        return 0, 0
    toks = [str(t).strip() for t in tokens if (t is not None) and str(t).strip() != ""]
    joined = "".join(toks)
    total_count, total_score = 0, 0
    for expr, sc in _score_map.items():
        c = joined.count(expr)  # non-overlapping
        if c:
            total_count += c
            total_score += c * int(sc)
    return int(total_score), int(total_count)

def _bin_label(total_raw: int) -> int:
    # (-inf,0] -> 0, [1,2] -> 1, [3,4] -> 2, [5, inf) -> 3
    if total_raw <= 0: return 0
    if 1 <= total_raw <= 2: return 1
    if 3 <= total_raw <= 4: return 2
    return 3

def _weight_by_count(n: int) -> float:
    if n == 1: return 1.0
    if n == 2: return 1.3
    if n == 3: return 1.5
    if n >= 4: return 1.7
    return 0.0

def _has_keyword_and_matches(text: str) -> Tuple[bool, List[str]]:
    _ensure_resources()
    t = text or ""
    seen, out = set(), []
    has_any = False
    for pat in _lex_pats:
        m = pat.search(t)
        if m:
            has_any = True
            s = m.group(0)
            if s not in seen:
                seen.add(s)
                out.append(s)
    return has_any, out

import math
def title_attention_index(score: float) -> str:
    if score is None or (isinstance(score, float) and math.isnan(score)):
        return "점수 없음. \n다시 제목과 본문을 입력해주세요"
    # 구간: [0,0.95) 양호, [0.95,2.25) 관심, [2.25,3.70) 주의, [3.70,5) 매우 주의
    if score < 0.95:
        return "양호✅ \n본문이 제목에 잘 반영되어 있는 양호한 기사로 그대로 읽기를 권장합니다."
    if score < 2.25:
        return "관심📌 \n경미한 과장 또는 제목-본문 간의 불일치가 있으나 경미한 수준입니다. \n제목 뿐만 아니라 본문 확인을 권장합니다."
    if score < 3.70:
        return "주의⚠️ \n제목에 과장표현의 빈도가 높거나 제목-본문 간의 불일치가 높아 본문을 꼼꼼히 살펴보길 권장합니다."
    return "매우 주의🚨 \n제목 내 심한 과장표현은 물론, 제목-본문 간의 불일치가 우려됩니다. \n보다 유의하여 기사의 본문을 살펴보시길 권장합니다."

# 메인 파이프라인
def run_once(title: str, body: str,
             short_pass_len: int = 50, max_new_tokens: int = 160):
    _ensure_resources()

    # 모델
    tok, bart, sbert = load_models()

    # 본문 전처리
    body_clean = preprocess_text(body)

    # 요약
    if len(body_clean) < short_pass_len:
        summ = body_clean
    else:
        try:
            @torch.inference_mode()
            def _summarize(tok, model, text, max_new_tokens=160):
                enc = tok(text, return_tensors="pt", truncation=True, max_length=1024, padding=False)
                out = model.generate(
                    input_ids=enc["input_ids"].to(DEVICE),
                    attention_mask=enc["attention_mask"].to(DEVICE),
                    max_new_tokens=max_new_tokens,
                    num_beams=4,
                    no_repeat_ngram_size=3,
                    length_penalty=1.0,
                    early_stopping=True,
                    use_cache=True
                )
                return tok.decode(out[0], skip_special_tokens=True)
            summ = _summarize(tok, bart, body_clean, max_new_tokens=max_new_tokens)
        except Exception as e:
            print(f"[WARN] summarization failed: {e}")
            summ = ""

    # 유사도
    try:
        if summ:
            tvec = sbert.encode(title, convert_to_tensor=True, normalize_embeddings=True)
            svec = sbert.encode(summ,  convert_to_tensor=True, normalize_embeddings=True)
            sim_sy = float(util.pytorch_cos_sim(tvec, svec).item())
        else:
            sim_sy = float("nan")
    except Exception as e:
        print(f"[WARN] title-summary sim failed: {e}")
        sim_sy = float("nan")

    try:
        sim_b5 = top5_title_body_sim(title, body_clean, sbert)
    except Exception as e:
        print(f"[WARN] title-body top5 sim failed: {e}")
        sim_b5 = float("nan")

    # 제목 형태소(명사/동사)
    try:
        title_nv = extract_noun_verb_kiwi(title)
    except Exception as e:
        print(f"[WARN] kiwi extract failed: {e}")
        title_nv = re.findall(r"[가-힣A-Za-z0-9]+", normalize_expressions(title or ""))

    # 라벨 원점수/등장횟수 → 라벨점수 → 가중치 최종점수
    raw_score, cnt = _calc_raw_and_count(title_nv)
    label_score = _bin_label(raw_score)              # 0/1/2/3
    weight = _weight_by_count(cnt)                   # 1.0/1.3/1.5/1.7
    label_final = float(label_score) * float(weight) # df['최종점수']

    # 본문 키워드 여부/매칭 → 1.15배
    has_kw, matches = _has_keyword_and_matches(body_clean)
    exag_score = label_final * (1.15 if has_kw else 1.0)   # df["과장점수"]

    # 불일치도 & log10
    summary_mismatch = (1 - sim_sy) if not np.isnan(sim_sy) else np.nan
    body_mismatch    = (1 - sim_b5) if not np.isnan(sim_b5) else np.nan
    exag_log10       = float(np.log10(exag_score + 1.0))

    # 최종 기사 점수
    if not (np.isnan(summary_mismatch) or np.isnan(body_mismatch)):
        final_article_score = round((exag_log10*0.5 + summary_mismatch*0.25 + body_mismatch*0.25) * 5, 2)
    else:
        final_article_score = np.nan

    return {
        "요약": summ,
        "요약유사도": sim_sy,
        "본문 일치도(Top5 평균)": sim_b5,
        "title_nv": title_nv, #형태소 분석된 제목 리스트
        "원점수": raw_score,  #과장 표현 원점수
        "등장횟수": cnt,
        "라벨점수": int(label_score),
        "가중치": float(weight),
        "라벨최종점수": float(label_final),
        "has_keyword": bool(has_kw), #가중 키워드 본문 포함 여부
        "matches": matches, # 가중 키워드로 선정된 키워드 리스트
        "과장점수": float(exag_score),
        "과장점수_log10": exag_log10, #과장 최종 점수
        "요약 불일치도": summary_mismatch,
        "본문 불일치도": body_mismatch,
        "최종 기사 점수": final_article_score
    }


def run_cli():
    print("제목을 입력하세요:")
    title = input().strip()
    print("본문을 입력하세요:")
    body = input().strip()
    r = run_once(title, body)
    print("\n===== 결과 =====")
    # print("본문 요약:\n", r["요약"])
    print("제목과 본문 요약 유사도:", round(r["요약유사도"], 4))
    print("제목과 본문 일치도(Top5 평균):", round(r["본문 일치도(Top5 평균)"], 4))
    print("과장점수(log화):", round(r["과장점수_log10"], 4))
    print("\n최종 제목 주의 점수는", r["최종 기사 점수"], "입니다")

def run_ui():
    import gradio as gr
    # ... (기존 내용 동일)

    def predict(title, body):
        r = run_once(title, body)
        final_score = r["최종 기사 점수"]
        grade = title_attention_index(final_score)
        return (
            r["요약유사도"],
            r["본문 일치도(Top5 평균)"],
            r["과장점수"],
            final_score,
            grade,
        )

    demo = gr.Interface(
        fn=predict,
        inputs=[
            gr.Textbox(label="제목", lines=2),
            gr.Textbox(label="본문", lines=18, placeholder="여기에 기사 본문을 붙여넣으세요"),
        ],
        outputs=[
            # gr.Textbox(label="요약", lines=10),
            gr.Number(label="요약유사도"),
            gr.Number(label="본문 일치도(Top5 평균)"),
            gr.Number(label="과장점수"),
            gr.Number(label="최종 기사 점수"),
            gr.Textbox(label="제목 주의 지수", interactive=False),
        ],
        title="제목 주의 지수",
        description=(
                "제목/본문을 입력하면 제목-본문 유사도, 과장 점수를 바탕으로 '제목 주의 지수'를 계산합니다.\n\n"
                "ℹ️ **자세한 설명이 궁금하다면 [여기를 클릭하세요](https://www.notion.so/25cb058cee088026badfcab340e9966d?source=copy_link)**"
            ),
    )

    demo.launch(server_name="0.0.0.0", server_port=7861, share=True)



if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--ui", action="store_true", help="Gradio UI 실행")
    args, _ = parser.parse_known_args()
    if args.ui:
        run_ui()
    else:
        run_cli()