codemalt / pyproject.toml
Sarthak
chore: update dependencies and configuration for improved training
7837959
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "gte-qwen2-7b-instruct-m2v"
version = "0.1.0"
description = "Model2Vec distillation pipeline for gte-Qwen2-7B-instruct"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"accelerate>=1.7.0",
"beam-client>=0.2.155",
"boto3>=1.38.23",
"datasets>=3.6.0",
"dotenv>=0.9.9",
"editables>=0.5",
"einops>=0.8.1",
"flash-attn>=2.7.4.post1",
"hatchling>=1.27.0",
"iso639>=0.1.4",
"jinja2>=3.0.0",
"joblib>=1.0.0",
"kaleido==1.0.0rc13",
"lightning>=2.5.1.post0",
"matplotlib>=3.10.3",
"more-itertools>=10.5.0",
"mteb>=1.14.15",
"numpy>=1.26.4",
"plotly>=6.1.1",
"psutil>=7.0.0",
"pydantic>=2.11.5",
"requests>=2.32.3",
"rich>=10.0.0",
"safetensors>=0.3.0",
"scikit-learn>=1.6.1",
"seaborn>=0.13.2",
"sentence-transformers>=4.1.0",
"setuptools>=80.8.0",
"skops>=0.11.0",
"smart-open[s3]>=7.1.0",
"statsmodels>=0.14.4",
"tokenizers>=0.20",
"torch>=2.7.0",
"transformers<=4.52.1",
"tqdm>=4.65.0",
"typer>=0.16.0",
]
[project.scripts]
distiller = "distiller.__main__:app"
[dependency-groups]
dev = [
"mypy>=1.15.0",
"ruff>=0.11.6",
]
[tool.hatch.build.targets.wheel]
packages = ["src/distiller"]
[tool.mypy]
exclude = [
".git",
".ruff_cache",
".venv",
"venv",
"__pycache__",
"build",
"dist",
"vendor",
]
follow_untyped_imports = true
[tool.ruff]
line-length = 120
target-version = "py312"
# Exclude files/directories
exclude = [
".git",
".ruff_cache",
".venv",
"venv",
"__pycache__",
"build",
"dist",
"vendor",
"src/distiller/model2vec",
"src/distiller/tokenlearn"
]
[tool.ruff.lint]
# Enable all rules by default, then selectively disable
select = ["ALL"]
ignore = [
# Rules that conflict with other tools/preferences
"D203", # one-blank-line-before-class
"D212", # multi-line-summary-first-line
"FBT001", # Boolean positional arg in function definition (required for typer)
"FBT002", # Boolean default value in function definition (required for typer)
"C901", # function too complex
"PLR0911", # too many return statements
"PLR0912", # too many branches
"PLR0913", # too many arguments in function definition
"PLR0915", # too many statements
"TRY300", # Consider moving this statement to an `else` block
"COM812", # Use a constant for the message in a raise statement
"TC001", # Move application import into a type-checking block
"ERA001", # Found commented-out code
"G004", # Logging statement uses f-string
"TD003", # Missing link in to-do
"TRY301", # Abstract raise to an inner function
# Disable rules that conflict with tab indentation
"E101", # Indentation contains mixed spaces and tabs
"W191", # indentation contains tabs
"D206", # indent with spaces, not tabs
"PD901", # Avoid using the generic variable name `df` for DataFrames
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed
"D103", # Missing docstring in public function
"BLE001", # Do not catch blind exception: `Exception`
"T201", # Use `logger.info` instead of `print`
"E501", # Line too long
"PLR2004",
"RUF001",
"D100", # Missing docstring in public module
"D101", # Missing docstring in public class
]
[tool.ruff.lint.mccabe]
max-complexity = 10
[tool.ruff.lint.pylint]
max-args = 5
max-branches = 12
max-statements = 50
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.ruff.format]
quote-style = "double"
indent-style = "tab"
skip-magic-trailing-comma = false
line-ending = "auto"