codemalt / .codemap.yml
Sarthak
chore: update dependencies and configuration for improved training
7837959
# CodeMap Configuration File
# -------------------------
# This file configures CodeMap's behavior. Uncomment and modify settings as needed.
# LLM Configuration - Controls which model is used for AI operations
llm:
# Format: "provider:model-name", e.g., "openai:gpt-4o", "anthropic:claude-3-opus"
model: "google-gla:gemini-2.0-flash-lite"
temperature: 0.5 # Lower for more deterministic outputs, higher for creativity
max_input_tokens: 1000000 # Maximum tokens in input
max_output_tokens: 10000 # Maximum tokens in responses
max_requests: 25 # Maximum number of requests
# Embedding Configuration - Controls vector embedding behavior
embedding:
# Recommended models: "minishlab/potion-base-8M3", Only Model2Vec static models are supported
model_name: "minishlab/potion-base-8M"
dimension: 256
# dimension_metric: "cosine" # Metric for dimension calculation (e.g., "cosine", "euclidean")
# max_retries: 3 # Maximum retries for embedding requests
# retry_delay: 5 # Delay in seconds between retries
# max_content_length: 5000 # Maximum characters per file chunk
# Qdrant (Vector DB) settings
# qdrant_batch_size: 100 # Batch size for Qdrant uploads
# url: "http://localhost:6333" # Qdrant server URL
# timeout: 30 # Qdrant client timeout in seconds
# prefer_grpc: true # Prefer gRPC for Qdrant communication
# Advanced chunking settings - controls how code is split
# chunking:
# max_hierarchy_depth: 2 # Maximum depth of code hierarchy to consider
# max_file_lines: 1000 # Maximum lines per file before splitting
# Clustering settings for embeddings
# clustering:
# method: "agglomerative" # Clustering method: "agglomerative", "dbscan"
# agglomerative: # Settings for Agglomerative Clustering
# metric: "precomputed" # Metric: "cosine", "euclidean", "manhattan", "l1", "l2", "precomputed"
# distance_threshold: 0.3 # Distance threshold for forming clusters
# linkage: "complete" # Linkage criterion: "ward", "complete", "average", "single"
# dbscan: # Settings for DBSCAN Clustering
# eps: 0.3 # The maximum distance between two samples for one to be considered as in the neighborhood of the other
# min_samples: 2 # The number of samples in a neighborhood for a point to be considered as a core point
# algorithm: "auto" # Algorithm to compute pointwise distances: "auto", "ball_tree", "kd_tree", "brute"
# metric: "precomputed" # Metric for distance computation: "cityblock", "cosine", "euclidean", "l1", "l2", "manhattan", "precomputed"
# RAG (Retrieval Augmented Generation) Configuration
rag:
max_context_length: 8000 # Maximum context length for the LLM
max_context_results: 100 # Maximum number of context results to return
similarity_threshold: 0.75 # Minimum similarity score (0-1) for relevance
# system_prompt: null # Optional system prompt to guide the RAG model (leave commented or set if needed)
include_file_content: true # Include file content in context
include_metadata: true # Include file metadata in context
# Sync Configuration - Controls which files are excluded from processing
sync:
exclude_patterns:
- "^node_modules/"
- "^\\.venv/"
- "^venv/"
- "^env/"
- "^__pycache__/"
- "^\\.mypy_cache/"
- "^\\.pytest_cache/"
- "^\\.ruff_cache/"
- "^dist/"
- "^build/"
- "^\\.git/"
- "^typings/"
- "^\\.pyc$"
- "^\\.pyo$"
- "^\\.so$"
- "^\\.dll$"
- "^\\.lib$"
- "^\\.a$"
- "^\\.o$"
- "^\\.class$"
- "^\\.jar$"
# Generation Configuration - Controls documentation generation
gen:
max_content_length: 5000 # Maximum content length per file for generation
use_gitignore: true # Use .gitignore patterns to exclude files
output_dir: "documentation" # Directory to store generated documentation
include_tree: true # Include directory tree in output
include_entity_graph: true # Include entity relationship graph
semantic_analysis: true # Enable semantic analysis
lod_level: "skeleton" # Level of detail: "signatures", "structure", "docs", "skeleton", "full"
# Mermaid diagram configuration for entity graphs
# mermaid_entities:
# - "module"
# - "class"
# - "function"
# - "method"
# - "constant"
# - "variable"
# - "import"
# mermaid_relationships:
# - "declares"
# - "imports"
# - "calls"
mermaid_show_legend: false
mermaid_remove_unconnected: true # Show isolated nodes
mermaid_styled: false # Style the mermaid diagram
# Processor Configuration - Controls code processing behavior
processor:
enabled: true # Enable the processor
max_workers: 4 # Maximum number of parallel workers
ignored_patterns: # Patterns to ignore during processing
- "**/.git/**"
- "**/__pycache__/**"
- "**/.venv/**"
- "**/node_modules/**"
- "**/*.pyc"
- "**/dist/**"
- "**/build/**"
default_lod_level: "signatures" # Default level of detail: "signatures", "structure", "docs", "full"
# File watcher configuration
# watcher:
# enabled: true # Enable file watching
# debounce_delay: 1.0 # Delay in seconds before processing changes
# Commit Command Configuration
commit:
strategy: "semantic" # Strategy for splitting diffs: "file", "hunk", "semantic"
bypass_hooks: false # Whether to bypass git hooks
use_lod_context: true # Use level of detail context
is_non_interactive: false # Run in non-interactive mode
# Diff splitter configuration
# diff_splitter:
# similarity_threshold: 0.6 # Similarity threshold for grouping related changes
# directory_similarity_threshold: 0.3 # Threshold for considering directories similar (e.g., for renames)
# file_move_similarity_threshold: 0.85 # Threshold for detecting file moves/renames based on content
# min_chunks_for_consolidation: 2 # Minimum number of small chunks to consider for consolidation
# max_chunks_before_consolidation: 20 # Maximum number of chunks before forcing consolidation
# max_file_size_for_llm: 50000 # Maximum file size (bytes) for LLM processing of individual files
# max_log_diff_size: 1000 # Maximum size (lines) of diff log to pass to LLM for context
# default_code_extensions: # File extensions considered as code for semantic splitting
# - "js"
# - "jsx"
# - "ts"
# - "tsx"
# - "py"
# - "java"
# - "c"
# - "cpp"
# - "h"
# - "hpp"
# - "cc"
# - "cs"
# - "go"
# - "rb"
# - "php"
# - "rs"
# - "swift"
# - "scala"
# - "kt"
# - "sh"
# - "pl"
# - "pm"
# Commit convention configuration (Conventional Commits)
convention:
types: # Allowed commit types
- "feat"
- "fix"
- "docs"
- "style"
- "refactor"
- "perf"
- "test"
- "build"
- "ci"
- "chore"
scopes: [] # Add project-specific scopes here, e.g., ["api", "ui", "db"]
max_length: 72 # Maximum length of commit message header
# Commit linting configuration (based on conventional-changelog-lint rules)
# lint:
# # Rules are defined as: {level: "ERROR"|"WARNING"|"DISABLED", rule: "always"|"never", value: <specific_value_if_any>}
# header_max_length:
# level: "ERROR"
# rule: "always"
# value: 100
# header_case: # e.g., 'lower-case', 'upper-case', 'camel-case', etc.
# level: "DISABLED"
# rule: "always"
# value: "lower-case"
# header_full_stop:
# level: "ERROR"
# rule: "never"
# value: "."
# type_enum: # Types must be from the 'convention.types' list
# level: "ERROR"
# rule: "always"
# type_case:
# level: "ERROR"
# rule: "always"
# value: "lower-case"
# type_empty:
# level: "ERROR"
# rule: "never"
# scope_case:
# level: "ERROR"
# rule: "always"
# value: "lower-case"
# scope_empty: # Set to "ERROR" if scopes are mandatory
# level: "DISABLED"
# rule: "never"
# scope_enum: # Scopes must be from the 'convention.scopes' list if enabled
# level: "DISABLED"
# rule: "always"
# # value: [] # Add allowed scopes here if rule is "always" and level is not DISABLED
# subject_case: # Forbids specific cases in the subject
# level: "ERROR"
# rule: "never"
# value: ["sentence-case", "start-case", "pascal-case", "upper-case"]
# subject_empty:
# level: "ERROR"
# rule: "never"
# subject_full_stop:
# level: "ERROR"
# rule: "never"
# value: "."
# subject_exclamation_mark:
# level: "DISABLED"
# rule: "never"
# body_leading_blank: # Body must start with a blank line after subject
# level: "WARNING"
# rule: "always"
# body_empty:
# level: "DISABLED"
# rule: "never"
# body_max_line_length:
# level: "ERROR"
# rule: "always"
# value: 100
# footer_leading_blank: # Footer must start with a blank line after body
# level: "WARNING"
# rule: "always"
# footer_empty:
# level: "DISABLED"
# rule: "never"
# footer_max_line_length:
# level: "ERROR"
# rule: "always"
# value: 100
# Pull Request Configuration
pr:
defaults:
base_branch: null # Default base branch (null = auto-detect, e.g., main, master, develop)
feature_prefix: "feature/" # Default feature branch prefix
strategy: "github-flow" # Git workflow: "github-flow", "gitflow", "trunk-based"
# Branch mapping for different PR types (primarily used in gitflow strategy)
# branch_mapping:
# feature:
# base: "develop"
# prefix: "feature/"
# release:
# base: "main"
# prefix: "release/"
# hotfix:
# base: "main"
# prefix: "hotfix/"
# bugfix:
# base: "develop"
# prefix: "bugfix/"
# PR generation configuration
generate:
title_strategy: "llm" # Strategy for generating PR titles: "commits" (from commit messages), "llm" (AI generated)
description_strategy: "llm" # Strategy for descriptions: "commits", "llm"
# description_template: | # Template for PR description when using 'llm' strategy. Placeholders: {changes}, {testing_instructions}, {screenshots}
# ## Changes
# {changes}
#
# ## Testing
# {testing_instructions}
#
# ## Screenshots
# {screenshots}
use_workflow_templates: true # Use workflow-specific templates if available (e.g., for GitHub PR templates)
# Ask Command Configuration
ask:
interactive_chat: false # Enable interactive chat mode for the 'ask' command