My Duong commited on
Commit
a3507d8
·
1 Parent(s): ead6406

embedding ready

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -19,7 +19,6 @@ from huggingface_hub import HfApi
19
  hf_token = os.getenv("HF_TOKEN")
20
  if hf_token is None:
21
  raise ValueError("HF_TOKEN not in the .env file")
22
- print("Loaded HF_TOKEN:", hf_token[:5] + "..." if hf_token else "None")
23
 
24
  # Wrapper for embedding
25
  class SentenceTransformerWrapper:
@@ -72,7 +71,7 @@ def process_html_files(directory, file_pattern="full_*.html"):
72
  documents, metadata = [], []
73
 
74
  html_files = list(directory.glob(file_pattern))
75
- for file_path in tqdm(html_files[:1], desc="Loading and cleaning documents"):
76
  text = load_and_clean_html(file_path)
77
  documents.append(text)
78
  metadata.append({"file_path": str(file_path)})
 
19
  hf_token = os.getenv("HF_TOKEN")
20
  if hf_token is None:
21
  raise ValueError("HF_TOKEN not in the .env file")
 
22
 
23
  # Wrapper for embedding
24
  class SentenceTransformerWrapper:
 
71
  documents, metadata = [], []
72
 
73
  html_files = list(directory.glob(file_pattern))
74
+ for file_path in tqdm(html_files, desc="Loading and cleaning documents"):
75
  text = load_and_clean_html(file_path)
76
  documents.append(text)
77
  metadata.append({"file_path": str(file_path)})