Spaces:
Runtime error
Runtime error
processor: | |
verbose: True | |
output_dir: './output' | |
num_processes: 2 | |
reprocess: False | |
sources: | |
local: | |
recursive: True | |
confluence: | |
api_token: 'your_confluence_api_token' | |
user_email: '[email protected]' | |
url: 'https://your-confluence-url.atlassian.net' | |
github: | |
url: 'owner/repo' | |
branch: 'main' | |
google_drive: | |
service_account_key: 'path/to/service_account_key.json' | |
recursive: True | |
drive_id: 'your_drive_id' | |
partitioning: | |
skip_infer_table_types: [] | |
strategy: 'auto' | |
hi_res_model_name: 'yolox' | |
ocr_languages: ['eng'] | |
encoding: 'utf-8' | |
fields_include: ['element_id', 'text', 'type', 'metadata', 'embeddings'] | |
flatten_metadata: False | |
metadata_exclude: [] | |
metadata_include: [] | |
partition_endpoint: 'http://localhost' | |
unstructured_port: 8005 | |
partition_by_api: False # set as true if using API server | |
default_unstructured_api_key: 123456789abcde | |
chunking: | |
enabled: True | |
strategy: 'by_title' | |
chunk_max_characters: 1500 | |
chunk_overlap: 300 | |
combine_under_n_chars: 1500 | |
embedding: | |
enabled: False | |
provider: 'langchain-huggingface' | |
model_name: 'intfloat/e5-large-v2' | |
destination_connectors: | |
enabled: False | |
type: 'chroma' | |
batch_size: 80 | |
chroma: | |
host: 'localhost' | |
port: 8004 | |
collection_name: 'snconf' | |
tenant: 'default_tenant' | |
database: 'default_database' | |
qdrant: | |
location: 'http://localhost:6333' | |
collection_name: 'test' | |
additional_processing: | |
enabled: True | |
extend_metadata: True | |
replace_table_text: True | |
table_text_key: 'text_as_html' | |
return_langchain_docs: True | |
convert_metadata_keys_to_string: True | |