# CodeMap Configuration File # ------------------------- # This file configures CodeMap's behavior. Uncomment and modify settings as needed. # LLM Configuration - Controls which model is used for AI operations llm: # Format: "provider:model-name", e.g., "openai:gpt-4o", "anthropic:claude-3-opus" model: "google-gla:gemini-2.0-flash-lite" temperature: 0.5 # Lower for more deterministic outputs, higher for creativity max_input_tokens: 1000000 # Maximum tokens in input max_output_tokens: 10000 # Maximum tokens in responses max_requests: 25 # Maximum number of requests # Embedding Configuration - Controls vector embedding behavior embedding: # Recommended models: "minishlab/potion-base-8M3", Only Model2Vec static models are supported model_name: "minishlab/potion-base-8M" dimension: 256 # dimension_metric: "cosine" # Metric for dimension calculation (e.g., "cosine", "euclidean") # max_retries: 3 # Maximum retries for embedding requests # retry_delay: 5 # Delay in seconds between retries # max_content_length: 5000 # Maximum characters per file chunk # Qdrant (Vector DB) settings # qdrant_batch_size: 100 # Batch size for Qdrant uploads # url: "http://localhost:6333" # Qdrant server URL # timeout: 30 # Qdrant client timeout in seconds # prefer_grpc: true # Prefer gRPC for Qdrant communication # Advanced chunking settings - controls how code is split # chunking: # max_hierarchy_depth: 2 # Maximum depth of code hierarchy to consider # max_file_lines: 1000 # Maximum lines per file before splitting # Clustering settings for embeddings # clustering: # method: "agglomerative" # Clustering method: "agglomerative", "dbscan" # agglomerative: # Settings for Agglomerative Clustering # metric: "precomputed" # Metric: "cosine", "euclidean", "manhattan", "l1", "l2", "precomputed" # distance_threshold: 0.3 # Distance threshold for forming clusters # linkage: "complete" # Linkage criterion: "ward", "complete", "average", "single" # dbscan: # Settings for DBSCAN Clustering # eps: 0.3 # The maximum distance between two samples for one to be considered as in the neighborhood of the other # min_samples: 2 # The number of samples in a neighborhood for a point to be considered as a core point # algorithm: "auto" # Algorithm to compute pointwise distances: "auto", "ball_tree", "kd_tree", "brute" # metric: "precomputed" # Metric for distance computation: "cityblock", "cosine", "euclidean", "l1", "l2", "manhattan", "precomputed" # RAG (Retrieval Augmented Generation) Configuration rag: max_context_length: 8000 # Maximum context length for the LLM max_context_results: 100 # Maximum number of context results to return similarity_threshold: 0.75 # Minimum similarity score (0-1) for relevance # system_prompt: null # Optional system prompt to guide the RAG model (leave commented or set if needed) include_file_content: true # Include file content in context include_metadata: true # Include file metadata in context # Sync Configuration - Controls which files are excluded from processing sync: exclude_patterns: - "^node_modules/" - "^\\.venv/" - "^venv/" - "^env/" - "^__pycache__/" - "^\\.mypy_cache/" - "^\\.pytest_cache/" - "^\\.ruff_cache/" - "^dist/" - "^build/" - "^\\.git/" - "^typings/" - "^\\.pyc$" - "^\\.pyo$" - "^\\.so$" - "^\\.dll$" - "^\\.lib$" - "^\\.a$" - "^\\.o$" - "^\\.class$" - "^\\.jar$" # Generation Configuration - Controls documentation generation gen: max_content_length: 5000 # Maximum content length per file for generation use_gitignore: true # Use .gitignore patterns to exclude files output_dir: "documentation" # Directory to store generated documentation include_tree: true # Include directory tree in output include_entity_graph: true # Include entity relationship graph semantic_analysis: true # Enable semantic analysis lod_level: "skeleton" # Level of detail: "signatures", "structure", "docs", "skeleton", "full" # Mermaid diagram configuration for entity graphs # mermaid_entities: # - "module" # - "class" # - "function" # - "method" # - "constant" # - "variable" # - "import" # mermaid_relationships: # - "declares" # - "imports" # - "calls" mermaid_show_legend: false mermaid_remove_unconnected: true # Show isolated nodes mermaid_styled: false # Style the mermaid diagram # Processor Configuration - Controls code processing behavior processor: enabled: true # Enable the processor max_workers: 4 # Maximum number of parallel workers ignored_patterns: # Patterns to ignore during processing - "**/.git/**" - "**/__pycache__/**" - "**/.venv/**" - "**/node_modules/**" - "**/*.pyc" - "**/dist/**" - "**/build/**" default_lod_level: "signatures" # Default level of detail: "signatures", "structure", "docs", "full" # File watcher configuration # watcher: # enabled: true # Enable file watching # debounce_delay: 1.0 # Delay in seconds before processing changes # Commit Command Configuration commit: strategy: "semantic" # Strategy for splitting diffs: "file", "hunk", "semantic" bypass_hooks: false # Whether to bypass git hooks use_lod_context: true # Use level of detail context is_non_interactive: false # Run in non-interactive mode # Diff splitter configuration # diff_splitter: # similarity_threshold: 0.6 # Similarity threshold for grouping related changes # directory_similarity_threshold: 0.3 # Threshold for considering directories similar (e.g., for renames) # file_move_similarity_threshold: 0.85 # Threshold for detecting file moves/renames based on content # min_chunks_for_consolidation: 2 # Minimum number of small chunks to consider for consolidation # max_chunks_before_consolidation: 20 # Maximum number of chunks before forcing consolidation # max_file_size_for_llm: 50000 # Maximum file size (bytes) for LLM processing of individual files # max_log_diff_size: 1000 # Maximum size (lines) of diff log to pass to LLM for context # default_code_extensions: # File extensions considered as code for semantic splitting # - "js" # - "jsx" # - "ts" # - "tsx" # - "py" # - "java" # - "c" # - "cpp" # - "h" # - "hpp" # - "cc" # - "cs" # - "go" # - "rb" # - "php" # - "rs" # - "swift" # - "scala" # - "kt" # - "sh" # - "pl" # - "pm" # Commit convention configuration (Conventional Commits) convention: types: # Allowed commit types - "feat" - "fix" - "docs" - "style" - "refactor" - "perf" - "test" - "build" - "ci" - "chore" scopes: [] # Add project-specific scopes here, e.g., ["api", "ui", "db"] max_length: 72 # Maximum length of commit message header # Commit linting configuration (based on conventional-changelog-lint rules) # lint: # # Rules are defined as: {level: "ERROR"|"WARNING"|"DISABLED", rule: "always"|"never", value: } # header_max_length: # level: "ERROR" # rule: "always" # value: 100 # header_case: # e.g., 'lower-case', 'upper-case', 'camel-case', etc. # level: "DISABLED" # rule: "always" # value: "lower-case" # header_full_stop: # level: "ERROR" # rule: "never" # value: "." # type_enum: # Types must be from the 'convention.types' list # level: "ERROR" # rule: "always" # type_case: # level: "ERROR" # rule: "always" # value: "lower-case" # type_empty: # level: "ERROR" # rule: "never" # scope_case: # level: "ERROR" # rule: "always" # value: "lower-case" # scope_empty: # Set to "ERROR" if scopes are mandatory # level: "DISABLED" # rule: "never" # scope_enum: # Scopes must be from the 'convention.scopes' list if enabled # level: "DISABLED" # rule: "always" # # value: [] # Add allowed scopes here if rule is "always" and level is not DISABLED # subject_case: # Forbids specific cases in the subject # level: "ERROR" # rule: "never" # value: ["sentence-case", "start-case", "pascal-case", "upper-case"] # subject_empty: # level: "ERROR" # rule: "never" # subject_full_stop: # level: "ERROR" # rule: "never" # value: "." # subject_exclamation_mark: # level: "DISABLED" # rule: "never" # body_leading_blank: # Body must start with a blank line after subject # level: "WARNING" # rule: "always" # body_empty: # level: "DISABLED" # rule: "never" # body_max_line_length: # level: "ERROR" # rule: "always" # value: 100 # footer_leading_blank: # Footer must start with a blank line after body # level: "WARNING" # rule: "always" # footer_empty: # level: "DISABLED" # rule: "never" # footer_max_line_length: # level: "ERROR" # rule: "always" # value: 100 # Pull Request Configuration pr: defaults: base_branch: null # Default base branch (null = auto-detect, e.g., main, master, develop) feature_prefix: "feature/" # Default feature branch prefix strategy: "github-flow" # Git workflow: "github-flow", "gitflow", "trunk-based" # Branch mapping for different PR types (primarily used in gitflow strategy) # branch_mapping: # feature: # base: "develop" # prefix: "feature/" # release: # base: "main" # prefix: "release/" # hotfix: # base: "main" # prefix: "hotfix/" # bugfix: # base: "develop" # prefix: "bugfix/" # PR generation configuration generate: title_strategy: "llm" # Strategy for generating PR titles: "commits" (from commit messages), "llm" (AI generated) description_strategy: "llm" # Strategy for descriptions: "commits", "llm" # description_template: | # Template for PR description when using 'llm' strategy. Placeholders: {changes}, {testing_instructions}, {screenshots} # ## Changes # {changes} # # ## Testing # {testing_instructions} # # ## Screenshots # {screenshots} use_workflow_templates: true # Use workflow-specific templates if available (e.g., for GitHub PR templates) # Ask Command Configuration ask: interactive_chat: false # Enable interactive chat mode for the 'ask' command