Skip to content

Commit be05802

Browse files
authored
Merge pull request #48 from thiremani/codex/host-cpu-tuning
perf(codegen): tune LLVM for host CPU
2 parents 4ed0fb2 + a2e6a2a commit be05802

File tree

9 files changed

+314
-62
lines changed

9 files changed

+314
-62
lines changed

.github/workflows/ci.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,7 @@ jobs:
5151
- name: Run full test suite with leak detection
5252
run: |
5353
python3 --version
54-
python3 test.py --leak-check
54+
# Valgrind can SIGILL on host-tuned codegen from -mcpu/-march=native.
55+
# Keep native tuning as the default elsewhere, but force portable code
56+
# for the leak-check job so the memory checker can execute the binaries.
57+
PLUTO_TARGET_CPU=portable python3 test.py --leak-check

AGENTS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@
2020
- Clear cache: `./pluto -clean` (or `-c`, clears cache for current version)
2121

2222
Requirements: Go `1.25`, LLVM `21` on PATH (`clang`, `opt`, `llc`, `ld.lld`). macOS Homebrew paths: `/opt/homebrew/opt/llvm/bin` (ARM) or `/usr/local/opt/llvm/bin` (Intel).
23+
`PLUTO_TARGET_CPU` defaults to `native`; set it to a CPU name or `portable` to override host CPU tuning.
2324

2425
## Architecture Overview
2526
- Two phases: CodeCompiler for `.pt` (reusable funcs/consts) → IR; ScriptCompiler for `.spt` (programs) links code IR.
2627
- Pipeline: generate IR → optimize `-O3` (`opt`) → object (`llc`) → link with runtime via `clang`/`lld`.
2728
- Module resolution: walks up to find `pt.mod`; cache key based on module path.
2829
- Cache layout (versioned to isolate different compiler versions):
2930
- `<PTCACHE>/<version>/runtime/<hash>/` for compiled runtime objects
30-
- `<PTCACHE>/<version>/<module-path>/{code,script}` for IR/objects
31+
- Default host CPU builds: `<PTCACHE>/<version>/<module-path>/{code,script}`
32+
- Non-default `PLUTO_TARGET_CPU` builds: `<PTCACHE>/<version>/target_cpu-<setting>/<module-path>/{code,script}`
3133

3234
## Coding Style & Naming Conventions
3335
- Indentation: Use tabs for indentation across the repository; do not convert leading tabs to spaces. Preserve existing indentation when editing.
@@ -80,6 +82,7 @@ When reviewing PRs or preparing code for review, check:
8082
- Linux: `rm -rf "$HOME/.cache/pluto"`
8183
- Windows: `rd /s /q %LocalAppData%\pluto`
8284
- `PTCACHE` overrides cache location; ensure PATH includes LLVM 21 tools.
85+
- `PLUTO_TARGET_CPU` overrides host CPU tuning; set it to `portable` to disable the default `-mcpu=native`.
8386

8487
## Instructions for AI Assistants
8588
- Keep changes minimal and focused; avoid reflowing or reindenting unrelated lines.

CLAUDE.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ go test -race ./lexer ./parser ./compiler
4848
./pluto [directory] # Compiles .pt and .spt files in directory
4949
./pluto -version # Show version information (or -v)
5050
./pluto -clean # Clear cache for current version (or -c)
51+
# Override host CPU tuning (defaults to native)
52+
PLUTO_TARGET_CPU=portable ./pluto .
5153
```
5254
This will compile all `.pt` and `.spt` files in the specified directory and generate executables in the same directory.
5355

@@ -141,8 +143,10 @@ CI: GitHub Actions builds with Go 1.25, installs LLVM 21 + valgrind, and runs `p
141143
- Windows: `%LocalAppData%\pluto`
142144
- Cache layout (versioned to isolate different compiler versions):
143145
- `<PTCACHE>/<version>/runtime/<hash>/` for compiled runtime objects
144-
- `<PTCACHE>/<version>/<module-path>/{code,script}` for IR/objects
146+
- Default host CPU builds: `<PTCACHE>/<version>/<module-path>/{code,script}`
147+
- Non-default `PLUTO_TARGET_CPU` builds: `<PTCACHE>/<version>/target_cpu-<setting>/<module-path>/{code,script}`
145148
- `PTCACHE` overrides cache location; ensure PATH includes LLVM 21 tools
149+
- `PLUTO_TARGET_CPU` overrides host CPU tuning; set it to `portable` to disable the default `-mcpu=native`
146150
- Use `pluto -clean` to clear cache for current version
147151

148152
## Coding Style & Naming Conventions

GEMINI.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ On macOS with Homebrew, you can install LLVM with `brew install llvm` and add it
6868
./pluto [directory] # Compiles .pt and .spt files in directory
6969
./pluto -version # Show version information (or -v)
7070
./pluto -clean # Clear cache for current version (or -c)
71+
PLUTO_TARGET_CPU=portable ./pluto . # Override host CPU tuning (defaults to native)
7172
```
7273
This will compile all `.pt` and `.spt` files in the specified directory and generate executables in the same directory.
7374

@@ -91,7 +92,8 @@ The compilation process consists of two main phases:
9192
- Module resolution: walks up to find `pt.mod`; cache key based on module path.
9293
- Cache layout (versioned to isolate different compiler versions):
9394
- `<PTCACHE>/<version>/runtime/<hash>/` for compiled runtime objects
94-
- `<PTCACHE>/<version>/<module-path>/{code,script}` for IR/objects
95+
- Default host CPU builds: `<PTCACHE>/<version>/<module-path>/{code,script}`
96+
- Non-default `PLUTO_TARGET_CPU` builds: `<PTCACHE>/<version>/target_cpu-<setting>/<module-path>/{code,script}`
9597
9698
## Debugging & Configuration Tips
9799
@@ -107,6 +109,7 @@ To clear the cache for the current version, run `./pluto -clean`. To clear the e
107109
108110
- Quick smoke check: `./pluto tests/` to see compile/link output.
109111
- `PTCACHE` overrides cache location; ensure PATH includes LLVM 21 tools.
112+
- `PLUTO_TARGET_CPU` overrides host CPU tuning; set it to `portable` to disable the default `-mcpu=native`.
110113
- Use `pluto -clean` to clear cache for current version.
111114
112115
## Coding Style & Naming Conventions

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ Pluto walks up from the working directory to find `pt.mod` and treats that direc
280280

281281
- `./pluto -version` (or `-v`) — show version
282282
- `./pluto -clean` (or `-c`) — clear cache for current version
283+
- `PLUTO_TARGET_CPU` defaults to `native`; set it to a CPU name or `portable` to override host CPU tuning
283284

284285
---
285286

@@ -426,7 +427,8 @@ Pluto is under active development.
426427
- **Module resolution:** walks up from CWD to find `pt.mod` and derives module path.
427428
- **Cache layout** (versioned to isolate different compiler versions):
428429
- `<PTCACHE>/<version>/runtime/<hash>/` for compiled runtime objects
429-
- `<PTCACHE>/<version>/<module-path>/{code,script}` for IR/objects
430+
- Default host CPU builds: `<PTCACHE>/<version>/<module-path>/{code,script}`
431+
- Non-default `PLUTO_TARGET_CPU` builds: `<PTCACHE>/<version>/target_cpu-<setting>/<module-path>/{code,script}`
430432

431433
---
432434

@@ -448,6 +450,7 @@ Pluto is under active development.
448450
- Linux: `rm -rf "$HOME/.cache/pluto"`
449451
- Windows: `rd /s /q %LocalAppData%\pluto`
450452
- Override cache location with `PTCACHE` environment variable
453+
- Override host CPU tuning with `PLUTO_TARGET_CPU` (`portable` disables the default `-mcpu=native` path)
451454

452455
**Encoding issues on Windows:**
453456
- Run from the MSYS2 UCRT64 shell; the runner decodes output as UTF-8

main.go

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,19 +65,20 @@ type Pluto struct {
6565

6666
PtCache string // Root cache directory (PTCACHE)
6767
CacheDir string // Project-specific cache directory (<PTCACHE>/<modulePath>)
68+
Config buildConfig
6869

6970
Ctx llvm.Context // LLVM context and code‐compiler for "code" files
7071
}
7172

72-
// sanitizeVersion returns a filesystem-safe version string.
73-
// Returns error for path traversal attempts or empty versions.
74-
func sanitizeVersion(v string) (string, error) {
73+
// sanitizeCacheComponent returns a filesystem-safe cache path component.
74+
// Returns error for path traversal attempts or empty values.
75+
func sanitizeCacheComponent(v string) (string, error) {
7576
if v == "" {
76-
return "", fmt.Errorf("invalid version: empty string")
77+
return "", fmt.Errorf("invalid cache component: empty string")
7778
}
7879
escaped := url.PathEscape(v)
7980
if escaped == "." || escaped == ".." {
80-
return "", fmt.Errorf("invalid version: %q", v)
81+
return "", fmt.Errorf("invalid cache component: %q", v)
8182
}
8283
return escaped, nil
8384
}
@@ -330,19 +331,13 @@ func (p *Pluto) GenBinary(scriptLL, bin string, rtObjs []string) error {
330331
}
331332

332333
// 1) Optimize IR
333-
if out, err := exec.Command(OPT_BIN, OPT_LEVEL, "-S", scriptLL, "-o", optFile).CombinedOutput(); err != nil {
334+
if out, err := exec.Command(OPT_BIN, optCommandArgs(p.Config, scriptLL, optFile)...).CombinedOutput(); err != nil {
334335
fmt.Printf("optimization failed: %v\n%s\n", err, out)
335336
return err
336337
}
337338

338339
// 2) Lower to object
339-
llcArgs := []string{FILETYPE_OBJ}
340-
// PIC is ELF/Mach-O specific; avoid on Windows COFF
341-
if runtime.GOOS != OS_WINDOWS {
342-
llcArgs = append(llcArgs, RELOC_PIC)
343-
}
344-
llcArgs = append(llcArgs, optFile, "-o", objFile)
345-
if out, err := exec.Command(LLC_BIN, llcArgs...).CombinedOutput(); err != nil {
340+
if out, err := exec.Command(LLC_BIN, llcCommandArgs(p.Config, optFile, objFile)...).CombinedOutput(); err != nil {
346341
fmt.Printf("llc compilation failed: %v\n%s\n", err, out)
347342
return err
348343
}
@@ -377,6 +372,24 @@ func (p *Pluto) GenBinary(scriptLL, bin string, rtObjs []string) error {
377372
return nil
378373
}
379374

375+
func optCommandArgs(cfg buildConfig, scriptLL, optFile string) []string {
376+
args := []string{OPT_LEVEL}
377+
args = append(args, cfg.llvmCodegenFlags()...)
378+
args = append(args, "-S", scriptLL, "-o", optFile)
379+
return args
380+
}
381+
382+
func llcCommandArgs(cfg buildConfig, optFile, objFile string) []string {
383+
args := []string{FILETYPE_OBJ}
384+
args = append(args, cfg.llvmCodegenFlags()...)
385+
// PIC is ELF/Mach-O specific; avoid on Windows COFF
386+
if runtime.GOOS != OS_WINDOWS {
387+
args = append(args, RELOC_PIC)
388+
}
389+
args = append(args, optFile, "-o", objFile)
390+
return args
391+
}
392+
380393
func (p *Pluto) ScanPlutoFiles(specificScript string) ([]string, []string) {
381394
dirEntries, err := os.ReadDir(p.Cwd)
382395
if err != nil {
@@ -411,8 +424,9 @@ func New(cwd string) *Pluto {
411424
fmt.Println("Current working directory is", cwd)
412425

413426
ptcache := defaultPTCache()
427+
cfg := currentBuildConfig()
414428
// Include version in cache path to isolate different compiler versions
415-
safeVersion, err := sanitizeVersion(Version)
429+
safeVersion, err := sanitizeCacheComponent(Version)
416430
if err != nil {
417431
fmt.Printf("Error: %v\n", err)
418432
os.Exit(1)
@@ -427,6 +441,7 @@ func New(cwd string) *Pluto {
427441
p := &Pluto{
428442
Cwd: cwd,
429443
PtCache: versionedCache,
444+
Config: cfg,
430445
Ctx: llvm.NewContext(),
431446
}
432447

@@ -436,7 +451,15 @@ func New(cwd string) *Pluto {
436451
}
437452

438453
// Use module path (slashes) as unique cache key
454+
targetSegment, err := p.Config.targetCPUCacheSegment()
455+
if err != nil {
456+
fmt.Printf("Error: %v\n", err)
457+
os.Exit(1)
458+
}
439459
p.CacheDir = filepath.Join(p.PtCache, filepath.FromSlash(p.ModPath))
460+
if targetSegment != "" {
461+
p.CacheDir = filepath.Join(p.PtCache, targetSegment, filepath.FromSlash(p.ModPath))
462+
}
440463
fmt.Printf("Cache dir is %s\n", p.CacheDir)
441464
fmt.Println()
442465

@@ -474,7 +497,7 @@ func main() {
474497
// runClean removes the cache directory for the current version.
475498
func runClean() {
476499
ptcache := defaultPTCache()
477-
safeVersion, err := sanitizeVersion(Version)
500+
safeVersion, err := sanitizeCacheComponent(Version)
478501
if err != nil {
479502
fmt.Printf("Error: %v\n", err)
480503
os.Exit(1)
@@ -524,7 +547,7 @@ func runCompile() {
524547
p := New(cwd)
525548

526549
// Prepare runtime once (in PtCache root, shared across all projects)
527-
rtObjs, err := prepareRuntime(p.PtCache)
550+
rtObjs, err := prepareRuntime(p.PtCache, p.Config)
528551
if err != nil {
529552
fmt.Printf("Error preparing runtime: %v\n", err)
530553
os.Exit(1)

runtime.go

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,25 +30,12 @@ func isHashDir(name string) bool {
3030
//go:embed runtime
3131
var runtimeFS embed.FS
3232

33-
const runtimeMarchEnv = "PLUTO_RUNTIME_MARCH"
34-
35-
func runtimeMarchFlag() string {
36-
value := strings.TrimSpace(os.Getenv(runtimeMarchEnv))
37-
if value == "" {
38-
return ""
39-
}
40-
if strings.HasPrefix(value, "-march=") {
41-
return value
42-
}
43-
return "-march=" + value
44-
}
45-
4633
// runtimeCompileFlags returns the compiler flags used for runtime compilation.
4734
// Used by both compileRuntime and metadataHash to keep them in sync.
48-
func runtimeCompileFlags() []string {
35+
func (cfg buildConfig) runtimeCompileFlags() []string {
4936
flags := []string{OPT_LEVEL, C_STD}
50-
if march := runtimeMarchFlag(); march != "" {
51-
flags = append(flags, march)
37+
if target := cfg.clangTargetFlag(runtime.GOARCH); target != "" {
38+
flags = append(flags, target)
5239
}
5340
if runtime.GOOS != OS_WINDOWS {
5441
flags = append(flags, FPIC)
@@ -57,9 +44,9 @@ func runtimeCompileFlags() []string {
5744
}
5845

5946
// metadataHash hashes compiler settings and platform that affect runtime compilation.
60-
func metadataHash(h hash.Hash) {
47+
func metadataHash(h hash.Hash, cfg buildConfig) {
6148
h.Write([]byte(CC))
62-
for _, flag := range runtimeCompileFlags() {
49+
for _, flag := range cfg.runtimeCompileFlags() {
6350
h.Write([]byte(flag))
6451
}
6552
h.Write([]byte(runtime.GOOS))
@@ -70,9 +57,9 @@ func metadataHash(h hash.Hash) {
7057
// Hash includes all files (headers in subdirs matter) but only counts
7158
// top-level .c files since compileRuntime only compiles those.
7259
// Returns short hash (8 chars for directory name) and full hash (for collision check).
73-
func runtimeInfo() (shortHash, fullHash string, srcCount int, err error) {
60+
func runtimeInfo(cfg buildConfig) (shortHash, fullHash string, srcCount int, err error) {
7461
h := sha256.New()
75-
metadataHash(h)
62+
metadataHash(h, cfg)
7663
err = fs.WalkDir(runtimeFS, RUNTIME_DIR, func(path string, d fs.DirEntry, walkErr error) error {
7764
if walkErr != nil {
7865
return walkErr
@@ -123,7 +110,7 @@ func extractRuntime(rtDir string) error {
123110
}
124111

125112
// compileRuntime compiles .c files in rtDir and returns paths to .o files.
126-
func compileRuntime(rtDir string) ([]string, error) {
113+
func compileRuntime(rtDir string, cfg buildConfig) ([]string, error) {
127114
rtSrcs, err := filepath.Glob(filepath.Join(rtDir, "*.c"))
128115
if err != nil {
129116
return nil, fmt.Errorf("glob runtime sources: %w", err)
@@ -135,7 +122,7 @@ func compileRuntime(rtDir string) ([]string, error) {
135122
var rtObjs []string
136123
for _, src := range rtSrcs {
137124
outObj := filepath.Join(rtDir, filepath.Base(src)+OBJ_SUFFIX)
138-
args := append(runtimeCompileFlags(), "-I", rtDir, "-c", src, "-o", outObj)
125+
args := append(cfg.runtimeCompileFlags(), "-I", rtDir, "-c", src, "-o", outObj)
139126
if out, err := exec.Command(CC, args...).CombinedOutput(); err != nil {
140127
return nil, fmt.Errorf("compile %s: %v\n%s", src, err, out)
141128
}
@@ -191,7 +178,7 @@ func cleanupOldRuntimes(runtimeDir string, keep int, minAge int64) {
191178
// prepareRuntime extracts embedded runtime files and compiles them to object files.
192179
// Uses a hash-based directory to cache compiled objects across runs.
193180
// A file lock ensures concurrent processes see either fully compiled runtime or build it.
194-
func prepareRuntime(cacheDir string) ([]string, error) {
181+
func prepareRuntime(cacheDir string, cfg buildConfig) ([]string, error) {
195182
runtimeDir := filepath.Join(cacheDir, RUNTIME_DIR)
196183
if err := os.MkdirAll(runtimeDir, 0755); err != nil {
197184
return nil, fmt.Errorf("create runtime dir: %w", err)
@@ -204,7 +191,7 @@ func prepareRuntime(cacheDir string) ([]string, error) {
204191
}
205192
defer lock.Unlock()
206193

207-
shortHash, fullHash, srcCount, err := runtimeInfo()
194+
shortHash, fullHash, srcCount, err := runtimeInfo(cfg)
208195
if err != nil {
209196
return nil, err
210197
}
@@ -231,7 +218,7 @@ func prepareRuntime(cacheDir string) ([]string, error) {
231218
if err := extractRuntime(rtDir); err != nil {
232219
return nil, err
233220
}
234-
rtObjs, err := compileRuntime(rtDir)
221+
rtObjs, err := compileRuntime(rtDir, cfg)
235222
if err != nil {
236223
return nil, err
237224
}

0 commit comments

Comments
 (0)