1
1
from __future__ import annotations
2
2
import argparse , zipfile , hashlib , os
3
3
from pathlib import Path
4
+ from typing import Any
5
+
6
+ try :
7
+ import tomllib as _toml_loader
8
+ except ModuleNotFoundError :
9
+ try :
10
+ import tomli as _toml_loader
11
+ except ModuleNotFoundError :
12
+ _toml_loader = None
13
+
4
14
from .core import Config , generate_markdown_report
5
15
from . import __version__
6
16
7
17
# Load .env file if it exists (for Pro license and configuration)
8
18
def load_env_file ():
9
- # Try current directory first, then parent directories
10
19
current = Path .cwd ()
11
20
for parent in [current ] + list (current .parents ):
12
21
env_file = parent / '.env'
@@ -17,17 +26,44 @@ def load_env_file():
17
26
if line and not line .startswith ('#' ) and '=' in line :
18
27
key , value = line .split ('=' , 1 )
19
28
os .environ [key .strip ()] = value .strip ()
20
- break # Stop after first .env file found
29
+ break
21
30
except Exception :
22
- pass # Silently ignore .env file errors
31
+ pass
23
32
24
- # Load environment configuration on import
25
33
load_env_file ()
26
34
35
+ DEFAULT_OUTPUT = "PROJECT_BLUEPRINT.md"
27
36
DEFAULT_EXCLUDES = [
28
37
".git" , "__pycache__" , "node_modules" , ".venv" ,
29
38
"build" , "dist" , "*.pyc" , ".DS_Store" ,
30
39
]
40
+ _CONFIG_KEYS = {
41
+ "path" ,
42
+ "output" ,
43
+ "preset" ,
44
+ "llm_mode" ,
45
+ "budget_tokens" ,
46
+ "max_file_tokens" ,
47
+ "dedup" ,
48
+ "sample_head" ,
49
+ "sample_tail" ,
50
+ "explain" ,
51
+ "include_glob" ,
52
+ "exclude_glob" ,
53
+ "omit_glob" ,
54
+ "only_ext" ,
55
+ "respect_gitignore" ,
56
+ "follow_symlinks" ,
57
+ "max_bytes" ,
58
+ "max_lines" ,
59
+ "emit_manifest" ,
60
+ "stats" ,
61
+ "capsule" ,
62
+ "dry_run" ,
63
+ "no_timestamp" ,
64
+ "masking" ,
65
+ }
66
+
31
67
32
68
def positive_int (v : str ) -> int :
33
69
try :
@@ -38,75 +74,139 @@ def positive_int(v: str) -> int:
38
74
raise argparse .ArgumentTypeError ("Only positive integers are allowed." )
39
75
return iv
40
76
77
+
78
+ def _load_pyproject_config () -> dict [str , Any ]:
79
+ if _toml_loader is None :
80
+ return {}
81
+
82
+ decode_error = getattr (_toml_loader , "TOMLDecodeError" , ValueError )
83
+ current_dir = Path .cwd ()
84
+ for path in [current_dir ] + list (current_dir .parents ):
85
+ pyproject_path = path / "pyproject.toml"
86
+ if not pyproject_path .is_file ():
87
+ continue
88
+ try :
89
+ with pyproject_path .open ("rb" ) as handle :
90
+ data = _toml_loader .load (handle )
91
+ except (decode_error , OSError ):
92
+ return {}
93
+
94
+ tool_config = data .get ("tool" , {}).get ("dir2md" )
95
+ if not isinstance (tool_config , dict ):
96
+ return {}
97
+
98
+ sanitized : dict [str , Any ] = {}
99
+ for raw_key , value in tool_config .items ():
100
+ key = raw_key .replace ('-' , '_' )
101
+ if key not in _CONFIG_KEYS :
102
+ continue
103
+ if key in {"include_glob" , "exclude_glob" , "omit_glob" }:
104
+ if value is None :
105
+ continue
106
+ if isinstance (value , list ):
107
+ sanitized [key ] = [str (item ) for item in value ]
108
+ else :
109
+ sanitized [key ] = [str (value )]
110
+ continue
111
+ if key == "only_ext" :
112
+ if value is None :
113
+ continue
114
+ if isinstance (value , list ):
115
+ sanitized [key ] = "," .join (str (item ) for item in value )
116
+ else :
117
+ sanitized [key ] = str (value )
118
+ continue
119
+ if key in {"budget_tokens" , "max_file_tokens" , "dedup" , "sample_head" , "sample_tail" , "max_bytes" , "max_lines" }:
120
+ try :
121
+ sanitized [key ] = int (value )
122
+ except (TypeError , ValueError ):
123
+ continue
124
+ continue
125
+ if key in {"respect_gitignore" , "follow_symlinks" , "emit_manifest" , "stats" , "capsule" , "dry_run" , "no_timestamp" , "explain" }:
126
+ sanitized [key ] = bool (value )
127
+ continue
128
+ sanitized [key ] = value
129
+ return sanitized
130
+ return {}
131
+
132
+
41
133
def main (argv : list [str ] | None = None ) -> int :
42
- ap = argparse .ArgumentParser (prog = "dir2md" , description = "Directory → Markdown exporter with LLM optimization" )
134
+ config_from_file = _load_pyproject_config ()
135
+
136
+ ap = argparse .ArgumentParser (prog = "dir2md" , description = "Directory -> Markdown exporter with LLM optimization" )
43
137
ap .add_argument ("path" , nargs = "?" , default = "." )
44
- ap .add_argument ("-o" , "--output" , default = "PROJECT_BLUEPRINT.md" )
45
-
46
- # Preset options
47
- ap .add_argument ("--preset" , default = "raw" , choices = ["iceberg" ,"pro" ,"raw" ], help = "Preset mode: iceberg/pro/raw" )
48
-
49
- # Token and selection control
50
- ap .add_argument ("--llm-mode" , choices = ["off" ,"ref" ,"summary" ,"inline" ], default = None )
51
- ap .add_argument ("--budget-tokens" , type = int , default = 6000 )
52
- ap .add_argument ("--max-file-tokens" , type = int , default = 1200 )
53
- ap .add_argument ("--dedup" , type = int , default = 16 )
54
- ap .add_argument ("--sample-head" , type = int , default = 120 )
55
- ap .add_argument ("--sample-tail" , type = int , default = 40 )
138
+ ap .add_argument ("-o" , "--output" )
139
+
140
+ ap .add_argument ("--preset" , choices = ["iceberg" , "pro" , "raw" ], help = "Preset mode: iceberg/pro/raw" )
141
+
142
+ ap .add_argument ("--llm-mode" , choices = ["off" , "ref" , "summary" , "inline" ])
143
+ ap .add_argument ("--budget-tokens" , type = int )
144
+ ap .add_argument ("--max-file-tokens" , type = int )
145
+ ap .add_argument ("--dedup" , type = int )
146
+ ap .add_argument ("--sample-head" , type = int )
147
+ ap .add_argument ("--sample-tail" , type = int )
56
148
ap .add_argument ("--explain" , action = "store_true" , help = "Include selection rationale and drift_score in capsule comments" )
57
149
58
- # Filtering and safety controls
59
- ap .add_argument ("--include-glob" , action = "append" , default = [])
60
- ap .add_argument ("--exclude-glob" , action = "append" , default = [])
61
- ap .add_argument ("--omit-glob" , action = "append" , default = [])
62
- ap .add_argument ("--only-ext" , default = "" )
150
+ ap .add_argument ("--include-glob" , action = "append" , help = "Gitignore-style include pattern (gitwildmatch syntax)" )
151
+ ap .add_argument ("--exclude-glob" , action = "append" , help = "Gitignore-style exclude pattern" )
152
+ ap .add_argument ("--omit-glob" , action = "append" , help = "Gitignore-style omit pattern (skips content)" )
153
+ ap .add_argument ("--only-ext" , help = "Comma-separated extension list (e.g. py,md)" )
63
154
ap .add_argument ("--respect-gitignore" , action = "store_true" )
64
155
ap .add_argument ("--follow-symlinks" , action = "store_true" )
65
- ap .add_argument ("--max-bytes" , type = positive_int , default = 200_000 )
66
- ap .add_argument ("--max-lines" , type = positive_int , default = 2000 )
156
+ ap .add_argument ("--max-bytes" , type = positive_int )
157
+ ap .add_argument ("--max-lines" , type = positive_int )
67
158
68
- # Output options
69
- ap .add_argument ("--emit-manifest" , action = "store_true" )
159
+ ap .add_argument ("--emit-manifest" , action = "store_true" , help = "Write JSON manifest (raw preset overrides to off)" )
70
160
ap .add_argument ("--stats" , action = "store_true" )
71
161
ap .add_argument ("--capsule" , action = "store_true" , help = "Package md+manifest into zip" )
72
162
ap .add_argument ("--dry-run" , action = "store_true" )
73
163
ap .add_argument ("--no-timestamp" , action = "store_true" , help = "Omit timestamp for reproducible output" )
74
- ap .add_argument ("--masking" , choices = ["off" , "basic" , "advanced" ], default = "off" , help = "Secret masking mode (advanced requires Pro license)" )
164
+ ap .add_argument ("--masking" , choices = ["off" , "basic" , "advanced" ], help = "Secret masking mode (advanced requires Pro license)" )
75
165
76
166
ap .add_argument ("-V" , "--version" , action = "version" , version = f"dir2md { __version__ } " )
77
167
168
+ if config_from_file :
169
+ ap .set_defaults (** config_from_file )
170
+
78
171
ns = ap .parse_args (argv )
79
172
80
173
root = Path (ns .path ).resolve ()
81
- output = Path (ns .output )
82
- only_ext = {e .strip ().lstrip ('.' ) for e in ns .only_ext .split (',' ) if e .strip ()} or None
174
+
175
+ if ns .output :
176
+ output = Path (ns .output )
177
+ else :
178
+ if root .is_dir ():
179
+ output = root / f"{ root .name } .md"
180
+ else :
181
+ output = Path (DEFAULT_OUTPUT ).resolve ()
182
+ only_ext = {e .strip ().lstrip ('.' ) for e in (ns .only_ext or "" ).split (',' ) if e .strip ()} or None
83
183
84
184
cfg = Config (
85
185
root = root ,
86
186
output = output ,
87
- include_globs = list (ns .include_glob ),
88
- exclude_globs = list (ns .exclude_glob ) + DEFAULT_EXCLUDES ,
89
- omit_globs = list (ns .omit_glob ),
90
- respect_gitignore = bool (ns .respect_gitignore ),
91
- follow_symlinks = bool (ns .follow_symlinks ),
92
- max_bytes = int (ns .max_bytes ) if ns .max_bytes else None ,
93
- max_lines = int (ns .max_lines ) if ns .max_lines else None ,
187
+ include_globs = list (ns .include_glob or [] ),
188
+ exclude_globs = list (ns .exclude_glob or [] ) + DEFAULT_EXCLUDES ,
189
+ omit_globs = list (ns .omit_glob or [] ),
190
+ respect_gitignore = bool (ns .respect_gitignore or False ),
191
+ follow_symlinks = bool (ns .follow_symlinks or False ),
192
+ max_bytes = int (ns .max_bytes ) if ns .max_bytes is not None else 200_000 ,
193
+ max_lines = int (ns .max_lines ) if ns .max_lines is not None else 2000 ,
94
194
include_contents = True ,
95
195
only_ext = only_ext ,
96
- add_stats = bool (ns .stats ),
196
+ add_stats = bool (ns .stats or False ),
97
197
add_toc = False ,
98
198
llm_mode = (ns .llm_mode or "ref" ),
99
- budget_tokens = int (ns .budget_tokens ),
100
- max_file_tokens = int (ns .max_file_tokens ),
101
- dedup_bits = int (ns .dedup ),
102
- sample_head = int (ns .sample_head ),
103
- sample_tail = int (ns .sample_tail ),
199
+ budget_tokens = int (ns .budget_tokens ) if ns . budget_tokens is not None else 6000 ,
200
+ max_file_tokens = int (ns .max_file_tokens ) if ns . max_file_tokens is not None else 1200 ,
201
+ dedup_bits = int (ns .dedup ) if ns . dedup is not None else 16 ,
202
+ sample_head = int (ns .sample_head ) if ns . sample_head is not None else 120 ,
203
+ sample_tail = int (ns .sample_tail ) if ns . sample_tail is not None else 40 ,
104
204
strip_comments = False ,
105
- emit_manifest = bool (ns .emit_manifest ),
106
- preset = str (ns .preset ),
107
- explain_capsule = bool (ns .explain ),
108
- no_timestamp = bool (ns .no_timestamp ),
109
- masking_mode = str (ns .masking ),
205
+ emit_manifest = bool (ns .emit_manifest if ns . emit_manifest is not None else True ),
206
+ preset = str (ns .preset or "raw" ),
207
+ explain_capsule = bool (ns .explain or False ),
208
+ no_timestamp = bool (ns .no_timestamp or False ),
209
+ masking_mode = str (ns .masking or "off" ),
110
210
)
111
211
112
212
md = generate_markdown_report (cfg )
@@ -125,8 +225,12 @@ def main(argv: list[str] | None = None) -> int:
125
225
try :
126
226
print (f"[dir2md] Wrote: { output } " )
127
227
except UnicodeEncodeError :
128
- print (f "[dir2md] Wrote: (File path contains unprintable characters, but the file was likely created successfully)" )
228
+ print ("[dir2md] Wrote: (File path contains unprintable characters, but the file was likely created successfully)" )
129
229
return 0
130
230
231
+
131
232
if __name__ == "__main__" :
132
233
raise SystemExit (main ())
234
+
235
+
236
+
0 commit comments