Add LLM Developer source to project
Browse files
data/scraping_scripts/add_context_to_nodes.py
CHANGED
|
@@ -162,13 +162,6 @@ async def process(
|
|
| 162 |
|
| 163 |
results: List[TextNode] = await tqdm.gather(*tasks, desc="Processing chunks")
|
| 164 |
|
| 165 |
-
# results: List[TextNode] = []
|
| 166 |
-
# # Add tqdm progress bar with semaphore limit
|
| 167 |
-
# for task in tqdm(
|
| 168 |
-
# asyncio.as_completed(tasks), total=len(tasks), desc="Processing chunks"
|
| 169 |
-
# ):
|
| 170 |
-
# result = await task
|
| 171 |
-
# results.append(result)
|
| 172 |
# pdb.set_trace()
|
| 173 |
|
| 174 |
return results
|
|
|
|
| 162 |
|
| 163 |
results: List[TextNode] = await tqdm.gather(*tasks, desc="Processing chunks")
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
# pdb.set_trace()
|
| 166 |
|
| 167 |
return results
|
data/scraping_scripts/process_md_files.py
CHANGED
|
@@ -416,6 +416,18 @@ SOURCE_CONFIGS = {
|
|
| 416 |
"included_root_files": [],
|
| 417 |
"url_extension": "",
|
| 418 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
}
|
| 420 |
|
| 421 |
|
|
|
|
| 416 |
"included_root_files": [],
|
| 417 |
"url_extension": "",
|
| 418 |
},
|
| 419 |
+
"llm_developer": {
|
| 420 |
+
"base_url": "",
|
| 421 |
+
"input_directory": "data/llm_developer",
|
| 422 |
+
"output_file": "data/llm_developer_data.jsonl", # From Beginner to Advanced LLM Developer
|
| 423 |
+
"source_name": "llm_developer",
|
| 424 |
+
"use_include_list": False,
|
| 425 |
+
"included_dirs": [],
|
| 426 |
+
"excluded_dirs": [],
|
| 427 |
+
"excluded_root_files": [],
|
| 428 |
+
"included_root_files": [],
|
| 429 |
+
"url_extension": "",
|
| 430 |
+
},
|
| 431 |
}
|
| 432 |
|
| 433 |
|
scripts/main.py
CHANGED
|
@@ -129,6 +129,7 @@ def generate_completion(
|
|
| 129 |
"OpenAI Cookbooks": "openai_cookbooks",
|
| 130 |
"Towards AI Blog": "tai_blog",
|
| 131 |
"8 Hour Primer": "8-hour_primer",
|
|
|
|
| 132 |
}
|
| 133 |
|
| 134 |
for source in sources:
|
|
@@ -245,6 +246,7 @@ sources = gr.CheckboxGroup(
|
|
| 245 |
"OpenAI Cookbooks",
|
| 246 |
"Towards AI Blog",
|
| 247 |
"8 Hour Primer",
|
|
|
|
| 248 |
# "All Sources",
|
| 249 |
],
|
| 250 |
interactive=True,
|
|
|
|
| 129 |
"OpenAI Cookbooks": "openai_cookbooks",
|
| 130 |
"Towards AI Blog": "tai_blog",
|
| 131 |
"8 Hour Primer": "8-hour_primer",
|
| 132 |
+
"Advanced LLM Developer": "llm_developer",
|
| 133 |
}
|
| 134 |
|
| 135 |
for source in sources:
|
|
|
|
| 246 |
"OpenAI Cookbooks",
|
| 247 |
"Towards AI Blog",
|
| 248 |
"8 Hour Primer",
|
| 249 |
+
"Advanced LLM Developer",
|
| 250 |
# "All Sources",
|
| 251 |
],
|
| 252 |
interactive=True,
|
scripts/setup.py
CHANGED
|
@@ -178,6 +178,7 @@ AVAILABLE_SOURCES_UI = [
|
|
| 178 |
"OpenAI Cookbooks",
|
| 179 |
"Towards AI Blog",
|
| 180 |
"8 Hour Primer",
|
|
|
|
| 181 |
# "All Sources",
|
| 182 |
]
|
| 183 |
|
|
@@ -190,6 +191,7 @@ AVAILABLE_SOURCES = [
|
|
| 190 |
"openai_cookbooks",
|
| 191 |
"tai_blog",
|
| 192 |
"8-hour_primer",
|
|
|
|
| 193 |
# "all_sources",
|
| 194 |
]
|
| 195 |
|
|
|
|
| 178 |
"OpenAI Cookbooks",
|
| 179 |
"Towards AI Blog",
|
| 180 |
"8 Hour Primer",
|
| 181 |
+
"Advanced LLM Developer",
|
| 182 |
# "All Sources",
|
| 183 |
]
|
| 184 |
|
|
|
|
| 191 |
"openai_cookbooks",
|
| 192 |
"tai_blog",
|
| 193 |
"8-hour_primer",
|
| 194 |
+
"llm_developer",
|
| 195 |
# "all_sources",
|
| 196 |
]
|
| 197 |
|