Quintino Fernandes commited on
Commit
33ef834
Β·
1 Parent(s): 379003a

Simple bug fix and more commenting

Browse files
Files changed (4) hide show
  1. README.md +6 -3
  2. app.py +1 -1
  3. database/query.py +13 -6
  4. models/summarization.py +2 -0
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Test
3
- emoji: πŸ“ˆ
4
  colorFrom: red
5
  colorTo: indigo
6
  sdk: docker
7
- pinned: false
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: KairosNews
3
+ emoji: πŸ“²πŸ“°
4
  colorFrom: red
5
  colorTo: indigo
6
  sdk: docker
7
+ pinned: true
8
+ preload_from_hub:
9
+ - unicamp-dl/ptt5-base-portuguese-vocab
10
+ - recogna-nlp/ptt5-base-summ
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -11,7 +11,7 @@ from models.embedding import EmbeddingModel
11
  from models.summarization import SummarizationModel
12
  from models.nlp import NLPModel
13
  from database.query import DatabaseService
14
- from KairosNews.main import QueryProcessor
15
 
16
  # Configure logging
17
  logging.basicConfig(
 
11
  from models.summarization import SummarizationModel
12
  from models.nlp import NLPModel
13
  from database.query import DatabaseService
14
+ from main import QueryProcessor
15
 
16
  # Configure logging
17
  logging.basicConfig(
database/query.py CHANGED
@@ -6,7 +6,7 @@ from psycopg2 import sql
6
 
7
  class DatabaseService:
8
  def __init__(self):
9
- # Connection parameters
10
  self.DB_HOST = os.getenv("SUPABASE_HOST", "aws-0-eu-west-3.pooler.supabase.com")
11
  self.DB_PORT = os.getenv("DB_PORT", "6543")
12
  self.DB_NAME = os.getenv("DB_NAME", "postgres")
@@ -14,6 +14,7 @@ class DatabaseService:
14
  self.DB_PASSWORD = os.getenv("DB_PASSWORD")
15
 
16
  async def semantic_search(
 
17
  self,
18
  query_embedding: List[float],
19
  start_date: Optional[datetime] = None,
@@ -23,7 +24,9 @@ class DatabaseService:
23
  limit: int = 10
24
  ) -> List[Dict[str, any]]:
25
 
 
26
  print(f"Extracted entities2: {entities}")
 
27
  try:
28
  with psycopg2.connect(
29
  user=self.DB_USER,
@@ -33,7 +36,7 @@ class DatabaseService:
33
  dbname=self.DB_NAME
34
  ) as conn:
35
  with conn.cursor() as cursor:
36
- # Base query with date range and topic filters
37
  base_query = sql.SQL('''
38
  WITH filtered_articles AS (
39
  SELECT article_id
@@ -74,7 +77,8 @@ class DatabaseService:
74
  sql.Literal(e[1]) # Original entity label (case-sensitive)
75
  ) for e in entities
76
  )
77
-
 
78
  final_query = sql.SQL('''
79
  {base_query},
80
  target_articles AS (
@@ -99,7 +103,10 @@ class DatabaseService:
99
  embedding=sql.Literal(query_embedding),
100
  limit=sql.Literal(limit)
101
  )
 
 
102
  else:
 
103
  final_query = sql.SQL('''
104
  {base_query}
105
  SELECT
@@ -121,9 +128,9 @@ class DatabaseService:
121
  cursor.execute(final_query)
122
  articles = cursor.fetchall()
123
 
124
- # Fallback: Retry with fewer filters if no results
125
  if not articles:
126
- print("No articles found with entities...")
127
  fallback_query = sql.SQL('''
128
  SELECT
129
  content,
@@ -160,5 +167,5 @@ class DatabaseService:
160
  return []
161
 
162
  async def close(self):
163
- # No persistent connection to close in psycopg2
164
  pass
 
6
 
7
  class DatabaseService:
8
  def __init__(self):
9
+ # Supabase Connection Parameters
10
  self.DB_HOST = os.getenv("SUPABASE_HOST", "aws-0-eu-west-3.pooler.supabase.com")
11
  self.DB_PORT = os.getenv("DB_PORT", "6543")
12
  self.DB_NAME = os.getenv("DB_NAME", "postgres")
 
14
  self.DB_PASSWORD = os.getenv("DB_PASSWORD")
15
 
16
  async def semantic_search(
17
+ #Query parameters
18
  self,
19
  query_embedding: List[float],
20
  start_date: Optional[datetime] = None,
 
24
  limit: int = 10
25
  ) -> List[Dict[str, any]]:
26
 
27
+ # Entity log Checking
28
  print(f"Extracted entities2: {entities}")
29
+
30
  try:
31
  with psycopg2.connect(
32
  user=self.DB_USER,
 
36
  dbname=self.DB_NAME
37
  ) as conn:
38
  with conn.cursor() as cursor:
39
+ # Base query
40
  base_query = sql.SQL('''
41
  WITH filtered_articles AS (
42
  SELECT article_id
 
77
  sql.Literal(e[1]) # Original entity label (case-sensitive)
78
  ) for e in entities
79
  )
80
+
81
+ # Final query with entity conditions and all filters
82
  final_query = sql.SQL('''
83
  {base_query},
84
  target_articles AS (
 
103
  embedding=sql.Literal(query_embedding),
104
  limit=sql.Literal(limit)
105
  )
106
+
107
+ # Final query with all filters but no entities
108
  else:
109
+ print("No articles found with entities...")
110
  final_query = sql.SQL('''
111
  {base_query}
112
  SELECT
 
128
  cursor.execute(final_query)
129
  articles = cursor.fetchall()
130
 
131
+ # Fallback: Retry with no filters if no results, only semantic search
132
  if not articles:
133
+ print("No articles found with the filters applied. Trying fallback query...")
134
  fallback_query = sql.SQL('''
135
  SELECT
136
  content,
 
167
  return []
168
 
169
  async def close(self):
170
+ # No explicit close needed with context manager
171
  pass
models/summarization.py CHANGED
@@ -8,6 +8,8 @@ class SummarizationModel:
8
  self.model = T5ForConditionalGeneration.from_pretrained('recogna-nlp/ptt5-base-summ').to(self.device)
9
 
10
  def summarize(self, text: str, max_length: int = 256, min_length: int = 128) -> str:
 
 
11
  inputs = self.tokenizer.encode(
12
  text,
13
  max_length=512,
 
8
  self.model = T5ForConditionalGeneration.from_pretrained('recogna-nlp/ptt5-base-summ').to(self.device)
9
 
10
  def summarize(self, text: str, max_length: int = 256, min_length: int = 128) -> str:
11
+ """Summarize the input text using T5 model"""
12
+ # Model and tokenization parameters
13
  inputs = self.tokenizer.encode(
14
  text,
15
  max_length=512,