VoiceStar / copy_codebase.py
mrfakename's picture
Upload 51 files
82bc972 verified
import os
import shutil
import fnmatch
def parse_gitignore(gitignore_path):
"""Parse a .gitignore file and return a list of patterns."""
patterns = []
with open(gitignore_path, "r") as f:
for line in f:
# Ignore comments and blank lines
line = line.strip()
if not line or line.startswith("#"):
continue
# Handle wildcards and directory separators
patterns.append(line)
return patterns
def file_matches_patterns(file_path, patterns):
"""Check if a file matches any of the patterns in .gitignore."""
for pattern in patterns:
if fnmatch.fnmatch(file_path, pattern):
return True
return False
def copy_codebase(src, dst, max_size_mb=5, gitignore_path=None):
""" Copy files from src to dst, skipping files larger than max_size_mb and matching .gitignore patterns. """
if gitignore_path and os.path.exists(gitignore_path):
patterns = parse_gitignore(gitignore_path)
else:
patterns = []
print("patterns to ignore: ", patterns)
os.makedirs(dst, exist_ok=True)
for root, dirs, files in os.walk(src):
for file in files:
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, src)
dst_path = os.path.join(dst, relative_path)
# ignore .git because of permission issues
if "/.git/" in file_path:
continue
# Check .gitignore patterns
if file_matches_patterns(file_path, patterns):
# print(f"Skipping {file_path} because it matches a pattern in .gitignore")
continue
# Check file size
if os.path.getsize(file_path) > max_size_mb * 1024 * 1024:
print(f"Skipping {file_path} because it's larger than {max_size_mb}MB")
continue
# Make sure the destination directory exists
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
shutil.copy(file_path, dst_path)