Spaces:
Running
Running
My Duong
commited on
Commit
·
a3507d8
1
Parent(s):
ead6406
embedding ready
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@ from huggingface_hub import HfApi
|
|
19 |
hf_token = os.getenv("HF_TOKEN")
|
20 |
if hf_token is None:
|
21 |
raise ValueError("HF_TOKEN not in the .env file")
|
22 |
-
print("Loaded HF_TOKEN:", hf_token[:5] + "..." if hf_token else "None")
|
23 |
|
24 |
# Wrapper for embedding
|
25 |
class SentenceTransformerWrapper:
|
@@ -72,7 +71,7 @@ def process_html_files(directory, file_pattern="full_*.html"):
|
|
72 |
documents, metadata = [], []
|
73 |
|
74 |
html_files = list(directory.glob(file_pattern))
|
75 |
-
for file_path in tqdm(html_files
|
76 |
text = load_and_clean_html(file_path)
|
77 |
documents.append(text)
|
78 |
metadata.append({"file_path": str(file_path)})
|
|
|
19 |
hf_token = os.getenv("HF_TOKEN")
|
20 |
if hf_token is None:
|
21 |
raise ValueError("HF_TOKEN not in the .env file")
|
|
|
22 |
|
23 |
# Wrapper for embedding
|
24 |
class SentenceTransformerWrapper:
|
|
|
71 |
documents, metadata = [], []
|
72 |
|
73 |
html_files = list(directory.glob(file_pattern))
|
74 |
+
for file_path in tqdm(html_files, desc="Loading and cleaning documents"):
|
75 |
text = load_and_clean_html(file_path)
|
76 |
documents.append(text)
|
77 |
metadata.append({"file_path": str(file_path)})
|