diff --git a/.claude/rules.md b/.claude/rules.md index 8c6cf68f..49ad42b2 100644 --- a/.claude/rules.md +++ b/.claude/rules.md @@ -205,6 +205,7 @@ Existing bridges: `regex-bridge.c`, `yyjson-bridge.c`, `os-bridge.c`, `child-pro 5. **Type cast field order must match FULL struct layout** — when the type extends a parent interface, the struct includes ALL parent fields. `as { name, closureInfo }` on a `LiftedFunction extends FunctionNode` (10 fields) reads index 1 instead of index 9. Include every field. 6. **`ret void` not `unreachable`** at end of void functions 7. **Class structs: boolean is `i1`; Interface structs: boolean is `double`** +8. **Set feature flags when emitting gated extern calls** — runtime declarations for C bridges (yyjson, curl, etc.) are conditionally emitted behind flags like `usesJson`, `usesCurl`. Any code path that emits `call @csyyjson_*` must call `ctx.setUsesJson(true)`, etc. Missing this causes "undefined value" errors from `opt` because the `declare` is never emitted. ## Interface Field Iteration diff --git a/src/codegen/expressions/access/index.ts b/src/codegen/expressions/access/index.ts index 0c01d6b3..29ba6208 100644 --- a/src/codegen/expressions/access/index.ts +++ b/src/codegen/expressions/access/index.ts @@ -35,6 +35,7 @@ export interface IndexAccessGeneratorContext { isStringExpression(expr: Expression): boolean; readonly stringGen: IStringGenerator; ensureDouble(value: string): string; + setUsesJson(value: boolean): void; } /** @@ -427,6 +428,7 @@ export class IndexAccessGenerator { } private generateJSONArrayIndex(expr: IndexAccessNode, params: string[]): string { + this.ctx.setUsesJson(true); // Load JSON array pointer const varName = (expr.object as VariableNode).name; const jsonPtrPtr = this.ctx.getVariableAlloca(varName)!; @@ -514,6 +516,7 @@ export class IndexAccessGenerator { } private generateJSONMemberArrayIndex(expr: IndexAccessNode, params: string[]): string { + this.ctx.setUsesJson(true); const jsonPtr = this.ctx.generateExpression(expr.object, params); const ptrType = this.ctx.getVariableType(jsonPtr); diff --git a/src/codegen/llvm-generator.ts b/src/codegen/llvm-generator.ts index b0cc8f6b..23b698d9 100644 --- a/src/codegen/llvm-generator.ts +++ b/src/codegen/llvm-generator.ts @@ -117,6 +117,8 @@ import type { TypeChecker } from "../typescript/type-checker.js"; import { InterfaceStructGenerator } from "./types/interface-struct-generator.js"; import { JsonObjectMeta } from "./expressions/access/member.js"; import type { TargetInfo } from "../target-types.js"; +import { checkClosureMutations } from "../semantic/closure-mutation-checker.js"; +import { checkUnionTypes } from "../semantic/union-type-checker.js"; export interface SemaSymbolData { names: string[]; @@ -2340,6 +2342,10 @@ export class LLVMGenerator extends BaseGenerator implements IGeneratorContext { } generateParts(): string[] { + // Run semantic checks before emitting any IR. + checkClosureMutations(this.ast); + checkUnionTypes(this.ast); + const irParts: string[] = []; const interfaceStructDefs = this.interfaceStructGen.generateStructTypeDefinitions(); diff --git a/src/semantic/closure-mutation-checker.ts b/src/semantic/closure-mutation-checker.ts new file mode 100644 index 00000000..3936aa7b --- /dev/null +++ b/src/semantic/closure-mutation-checker.ts @@ -0,0 +1,348 @@ +// Closure mutation checker — semantic pass run before IR generation. +// ChadScript closures capture by value, so mutations to a variable after it has been +// captured produce silently incorrect results. This pass detects such mutations and +// turns them into a compile error with a clear message. + +import { ClosureAnalyzer } from "../codegen/infrastructure/closure-analyzer.js"; +import type { + AST, + Statement, + Expression, + BlockStatement, + VariableDeclaration, + AssignmentStatement, + IfStatement, + WhileStatement, + DoWhileStatement, + ForStatement, + ForOfStatement, + TryStatement, + SwitchStatement, + ReturnStatement, + ThrowStatement, + ArrowFunctionNode, + SourceLocation, +} from "../ast/types.js"; + +export function checkClosureMutations(ast: AST): void { + const checker = new ClosureMutationChecker(); + checker.checkAST(ast); +} + +class ClosureMutationChecker { + private analyzer: ClosureAnalyzer; + + constructor() { + this.analyzer = new ClosureAnalyzer(); + } + + checkAST(ast: AST): void { + // Walk all top-level items in source order. + // topLevelItems is the combined ordered list of declarations + expressions. + const items = ast.topLevelItems; + if (items && items.length > 0) { + this.walkStatements(items as Statement[], [], []); + } + + // Walk each standalone function body (fresh scope per function). + for (let i = 0; i < ast.functions.length; i++) { + const fn = ast.functions[i]; + // Function params are in scope for the entire body. + this.walkBlock(fn.body, fn.params.slice(), []); + } + + // Walk each class method body (fresh scope per method). + for (let i = 0; i < ast.classes.length; i++) { + const cls = ast.classes[i]; + for (let j = 0; j < cls.methods.length; j++) { + const method = cls.methods[j]; + this.walkBlock(method.body, method.params.slice(), []); + } + } + } + + private walkStatements( + stmts: Statement[], + scopeVarNames: string[], + capturedNames: string[], + ): void { + for (let i = 0; i < stmts.length; i++) { + this.walkStatement(stmts[i], scopeVarNames, capturedNames); + } + } + + private walkBlock(block: BlockStatement, scopeVarNames: string[], capturedNames: string[]): void { + this.walkStatements(block.statements, scopeVarNames, capturedNames); + } + + private walkStatement(stmt: Statement, scopeVarNames: string[], capturedNames: string[]): void { + const s = stmt as { type: string }; + const stype = s.type; + + if (stype === "variable_declaration") { + const decl = stmt as VariableDeclaration; + // Scan the initializer for arrow functions before adding the var to scope + // (the variable is not in scope inside its own initializer). + if (decl.value !== null && decl.value !== undefined) { + this.scanExprForCaptures(decl.value as Expression, scopeVarNames, capturedNames); + } + scopeVarNames.push(decl.name); + } else if (stype === "assignment") { + const assign = stmt as AssignmentStatement; + // Simple-name reassignment after capture is the error we're looking for. + // Member-access assignments (obj.x = y) don't reassign the binding itself. + if (capturedNames.indexOf(assign.name) !== -1) { + this.reportError(assign.name, assign.loc); + } + this.scanExprForCaptures(assign.value, scopeVarNames, capturedNames); + } else if (stype === "if") { + const ifStmt = stmt as IfStatement; + this.scanExprForCaptures(ifStmt.condition, scopeVarNames, capturedNames); + // Pass a scope copy into each branch so declarations there don't escape. + // capturedNames is shared: captures inside a branch still protect against + // mutations that appear later in the outer scope. + this.walkBlock(ifStmt.thenBlock, scopeVarNames.slice(), capturedNames); + if (ifStmt.elseBlock !== null && ifStmt.elseBlock !== undefined) { + this.walkBlock(ifStmt.elseBlock, scopeVarNames.slice(), capturedNames); + } + } else if (stype === "while") { + const whileStmt = stmt as WhileStatement; + this.scanExprForCaptures(whileStmt.condition, scopeVarNames, capturedNames); + this.walkBlock(whileStmt.body, scopeVarNames.slice(), capturedNames); + } else if (stype === "do_while") { + const doWhileStmt = stmt as DoWhileStatement; + this.walkBlock(doWhileStmt.body, scopeVarNames.slice(), capturedNames); + this.scanExprForCaptures(doWhileStmt.condition, scopeVarNames, capturedNames); + } else if (stype === "for") { + const forStmt = stmt as ForStatement; + // init can declare a new loop variable; give it a fresh scope copy. + const forScope = scopeVarNames.slice(); + if (forStmt.init !== null && forStmt.init !== undefined) { + this.walkStatement(forStmt.init as Statement, forScope, capturedNames); + } + if (forStmt.condition !== null && forStmt.condition !== undefined) { + this.scanExprForCaptures(forStmt.condition, forScope, capturedNames); + } + this.walkBlock(forStmt.body, forScope.slice(), capturedNames); + if (forStmt.update !== null && forStmt.update !== undefined) { + const upd = forStmt.update as { type: string }; + if (upd.type === "assignment") { + this.walkStatement(forStmt.update as Statement, forScope, capturedNames); + } else { + this.scanExprForCaptures(forStmt.update as Expression, forScope, capturedNames); + } + } + } else if (stype === "for_of") { + const forOfStmt = stmt as ForOfStatement; + this.scanExprForCaptures(forOfStmt.iterable, scopeVarNames, capturedNames); + const forOfScope = scopeVarNames.slice(); + forOfScope.push(forOfStmt.variableName); + if (forOfStmt.destructuredNames) { + for (let dn = 0; dn < forOfStmt.destructuredNames.length; dn++) { + forOfScope.push(forOfStmt.destructuredNames[dn]); + } + } + this.walkBlock(forOfStmt.body, forOfScope, capturedNames); + } else if (stype === "try") { + const tryStmt = stmt as TryStatement; + this.walkBlock(tryStmt.tryBlock, scopeVarNames.slice(), capturedNames); + if (tryStmt.catchBody !== null && tryStmt.catchBody !== undefined) { + const catchScope = scopeVarNames.slice(); + if (tryStmt.catchParam !== null && tryStmt.catchParam !== undefined) { + catchScope.push(tryStmt.catchParam as string); + } + this.walkBlock(tryStmt.catchBody, catchScope, capturedNames); + } + if (tryStmt.finallyBlock !== null && tryStmt.finallyBlock !== undefined) { + this.walkBlock(tryStmt.finallyBlock, scopeVarNames.slice(), capturedNames); + } + } else if (stype === "switch") { + const switchStmt = stmt as SwitchStatement; + this.scanExprForCaptures(switchStmt.discriminant, scopeVarNames, capturedNames); + for (let ci = 0; ci < switchStmt.cases.length; ci++) { + const c = switchStmt.cases[ci]; + if (c.test !== null && c.test !== undefined) { + this.scanExprForCaptures(c.test as Expression, scopeVarNames, capturedNames); + } + this.walkStatements(c.consequent, scopeVarNames.slice(), capturedNames); + } + } else if (stype === "return") { + const retStmt = stmt as ReturnStatement; + if (retStmt.value !== null && retStmt.value !== undefined) { + this.scanExprForCaptures(retStmt.value as Expression, scopeVarNames, capturedNames); + } + } else if (stype === "throw") { + const throwStmt = stmt as ThrowStatement; + this.scanExprForCaptures(throwStmt.argument, scopeVarNames, capturedNames); + } else if (stype === "block") { + this.walkBlock(stmt as BlockStatement, scopeVarNames.slice(), capturedNames); + } else if (stype !== "break" && stype !== "continue") { + // Expressions used as statements (call, method_call, new, await, etc.) + this.scanExprForCaptures(stmt as Expression, scopeVarNames, capturedNames); + } + } + + // Walk an expression searching for arrow function literals. When one is found: + // 1. Use ClosureAnalyzer to identify which outer-scope variables it captures. + // 2. Add those names to capturedNames so subsequent mutations are caught. + // 3. Recurse into the arrow body with a fresh scope (function boundary). + private scanExprForCaptures( + expr: Expression, + scopeVarNames: string[], + capturedNames: string[], + ): void { + const e = expr as { type: string }; + const etype = e.type; + + if (etype === "arrow_function") { + const arrow = expr as ArrowFunctionNode; + // Build a dummy-type parallel array — ClosureAnalyzer only uses names for + // free-variable detection; llvmType in the result is unused here. + const dummyTypes: string[] = []; + for (let i = 0; i < scopeVarNames.length; i++) { + dummyTypes.push("double"); + } + const info = this.analyzer.analyze( + arrow.params, + arrow.body, + scopeVarNames, + dummyTypes, + "check", + ); + for (let i = 0; i < info.captures.length; i++) { + const capName = info.captures[i].name; + if (capturedNames.indexOf(capName) === -1) { + capturedNames.push(capName); + } + } + // Recurse into the arrow body as a new function scope. + const arrowBodyTyped = arrow.body as { type: string }; + if (arrowBodyTyped.type === "block") { + this.walkBlock(arrow.body as BlockStatement, arrow.params.slice(), []); + } + } else if (etype === "binary") { + const binExpr = expr as { type: string; left: Expression; right: Expression }; + this.scanExprForCaptures(binExpr.left, scopeVarNames, capturedNames); + this.scanExprForCaptures(binExpr.right, scopeVarNames, capturedNames); + } else if (etype === "unary") { + const unaryExpr = expr as { type: string; operand: Expression }; + this.scanExprForCaptures(unaryExpr.operand, scopeVarNames, capturedNames); + } else if (etype === "call") { + const callExpr = expr as { type: string; args: Expression[] }; + for (let i = 0; i < callExpr.args.length; i++) { + this.scanExprForCaptures(callExpr.args[i], scopeVarNames, capturedNames); + } + } else if (etype === "method_call") { + const mcExpr = expr as { type: string; object: Expression; args: Expression[] }; + this.scanExprForCaptures(mcExpr.object, scopeVarNames, capturedNames); + for (let i = 0; i < mcExpr.args.length; i++) { + this.scanExprForCaptures(mcExpr.args[i], scopeVarNames, capturedNames); + } + } else if (etype === "member_access") { + const maExpr = expr as { type: string; object: Expression }; + this.scanExprForCaptures(maExpr.object, scopeVarNames, capturedNames); + } else if (etype === "index_access") { + const iaExpr = expr as { type: string; object: Expression; index: Expression }; + this.scanExprForCaptures(iaExpr.object, scopeVarNames, capturedNames); + this.scanExprForCaptures(iaExpr.index, scopeVarNames, capturedNames); + } else if (etype === "array") { + const arrExpr = expr as { type: string; elements: Expression[] }; + for (let i = 0; i < arrExpr.elements.length; i++) { + this.scanExprForCaptures(arrExpr.elements[i], scopeVarNames, capturedNames); + } + } else if (etype === "object") { + const objExpr = expr as { + type: string; + properties: Array<{ key: string; value: Expression }>; + }; + for (let i = 0; i < objExpr.properties.length; i++) { + this.scanExprForCaptures(objExpr.properties[i].value, scopeVarNames, capturedNames); + } + } else if (etype === "template_literal") { + const tlExpr = expr as { type: string; parts: (string | Expression)[] }; + for (let i = 0; i < tlExpr.parts.length; i++) { + const part = tlExpr.parts[i]; + // Raw string segments have no .type; Expression nodes do. + const partTyped = part as { type: string }; + if (partTyped.type) { + this.scanExprForCaptures(part as Expression, scopeVarNames, capturedNames); + } + } + } else if (etype === "conditional") { + const condExpr = expr as { + type: string; + condition: Expression; + consequent: Expression; + alternate: Expression; + }; + this.scanExprForCaptures(condExpr.condition, scopeVarNames, capturedNames); + this.scanExprForCaptures(condExpr.consequent, scopeVarNames, capturedNames); + this.scanExprForCaptures(condExpr.alternate, scopeVarNames, capturedNames); + } else if (etype === "await") { + const awaitExpr = expr as { type: string; argument: Expression }; + this.scanExprForCaptures(awaitExpr.argument, scopeVarNames, capturedNames); + } else if (etype === "new") { + const newExpr = expr as { type: string; args: Expression[] }; + for (let i = 0; i < newExpr.args.length; i++) { + this.scanExprForCaptures(newExpr.args[i], scopeVarNames, capturedNames); + } + } else if (etype === "type_assertion") { + const taExpr = expr as { type: string; expression: Expression }; + this.scanExprForCaptures(taExpr.expression, scopeVarNames, capturedNames); + } else if (etype === "spread_element") { + const seExpr = expr as { type: string; argument: Expression }; + this.scanExprForCaptures(seExpr.argument, scopeVarNames, capturedNames); + } else if (etype === "member_access_assignment") { + const maaExpr = expr as { type: string; object: Expression; value: Expression }; + this.scanExprForCaptures(maaExpr.object, scopeVarNames, capturedNames); + this.scanExprForCaptures(maaExpr.value, scopeVarNames, capturedNames); + } else if (etype === "index_access_assignment") { + const iaaExpr = expr as { + type: string; + object: Expression; + index: Expression; + value: Expression; + }; + this.scanExprForCaptures(iaaExpr.object, scopeVarNames, capturedNames); + this.scanExprForCaptures(iaaExpr.index, scopeVarNames, capturedNames); + this.scanExprForCaptures(iaaExpr.value, scopeVarNames, capturedNames); + } else if (etype === "map") { + const mapExpr = expr as { + type: string; + entries: Array<{ key: Expression; value: Expression }>; + }; + for (let i = 0; i < mapExpr.entries.length; i++) { + this.scanExprForCaptures(mapExpr.entries[i].key, scopeVarNames, capturedNames); + this.scanExprForCaptures(mapExpr.entries[i].value, scopeVarNames, capturedNames); + } + } else if (etype === "set") { + const setExpr = expr as { type: string; values: Expression[] }; + for (let i = 0; i < setExpr.values.length; i++) { + this.scanExprForCaptures(setExpr.values[i], scopeVarNames, capturedNames); + } + } + // Leaves: variable, number, string, boolean, null, undefined, regex, this, super — no sub-expressions. + } + + private reportError(varName: string, loc?: SourceLocation): void { + let msg = ""; + if (loc !== null && loc !== undefined) { + const file = loc.file || ""; + msg += + file + + ":" + + loc.line + + ":" + + (loc.column + 1) + + ": error: variable '" + + varName + + "' is captured by a closure but reassigned after capture\n"; + } else { + msg += + "error: variable '" + varName + "' is captured by a closure but reassigned after capture\n"; + } + msg += " note: ChadScript closures capture by value; the closure will not see this change\n"; + console.error(msg); + process.exit(1); + } +} diff --git a/src/semantic/union-type-checker.ts b/src/semantic/union-type-checker.ts new file mode 100644 index 00000000..8ab0408e --- /dev/null +++ b/src/semantic/union-type-checker.ts @@ -0,0 +1,136 @@ +// Union type checker — semantic pass that rejects unsafe union type aliases +// used as function/method parameter types. +// +// The existing checkUnsafeUnionType (called by SemanticAnalyzer) catches inline +// unions with different LLVM representations (e.g., `string | number`). But type +// alias unions like `type Mixed = string | number` bypass that check because the +// parameter type string is just "Mixed" (no " | " to split on). +// +// This pass resolves type aliases and checks whether their members would map to +// different LLVM types. When they do, the codegen emits the alias name literally +// as the LLVM param type, which defaults to i8* — causing a segfault if the +// caller passes a value with a different representation (e.g., double for number). + +import type { AST, SourceLocation } from "../ast/types.js"; +import { tsTypeToLlvm } from "../codegen/infrastructure/type-system.js"; + +export function checkUnionTypes(ast: AST): void { + const checker = new UnionTypeChecker(ast); + checker.check(); +} + +class UnionTypeChecker { + private ast: AST; + // Names of type aliases whose union members have different LLVM representations + private unsafeAliases: string[]; + + constructor(ast: AST) { + this.ast = ast; + this.unsafeAliases = []; + this.buildUnsafeAliasIndex(); + } + + // Pre-compute which type alias names resolve to unions with mixed LLVM types. + private buildUnsafeAliasIndex(): void { + if (!this.ast.typeAliases) return; + for (let i = 0; i < this.ast.typeAliases.length; i++) { + const alias = this.ast.typeAliases[i]; + const members = alias.unionMembers; + if (!members || members.length < 2) continue; + + // Collect LLVM types for non-null members + const llvmTypes: string[] = []; + for (let j = 0; j < members.length; j++) { + const m = members[j].trim(); + if (m === "null" || m === "undefined") continue; + llvmTypes.push(tsTypeToLlvm(m)); + } + if (llvmTypes.length < 2) continue; + + // Check if any member has a different LLVM type than the first + let hasMixed = false; + for (let j = 1; j < llvmTypes.length; j++) { + if (llvmTypes[j] !== llvmTypes[0]) { + hasMixed = true; + break; + } + } + if (hasMixed) { + this.unsafeAliases.push(alias.name); + } + } + } + + private isUnsafeAlias(typeName: string): boolean { + let name = typeName; + if (name.endsWith("[]")) { + name = name.substring(0, name.length - 2); + } + return this.unsafeAliases.indexOf(name) !== -1; + } + + check(): void { + // Check standalone function parameters + for (let i = 0; i < this.ast.functions.length; i++) { + const fn = this.ast.functions[i]; + if (fn.declare) continue; + this.checkParams(fn.name, fn.paramTypes, fn as { loc?: SourceLocation }); + } + + // Check class method parameters + for (let i = 0; i < this.ast.classes.length; i++) { + const cls = this.ast.classes[i]; + for (let j = 0; j < cls.methods.length; j++) { + const method = cls.methods[j]; + const qualName = cls.name + "." + method.name; + this.checkParams(qualName, method.paramTypes, method as { loc?: SourceLocation }); + } + } + } + + private checkParams( + funcName: string, + paramTypes: string[] | undefined, + locHolder: { loc?: SourceLocation }, + ): void { + if (!paramTypes) return; + for (let i = 0; i < paramTypes.length; i++) { + if (this.isUnsafeAlias(paramTypes[i])) { + this.reportError(funcName, paramTypes[i], locHolder.loc); + } + } + } + + private reportError(funcName: string, aliasName: string, loc: SourceLocation | undefined): void { + let msg = ""; + if (loc !== null && loc !== undefined) { + const file = loc.file || ""; + msg += + file + + ":" + + loc.line + + ":" + + (loc.column + 1) + + ": error: in function '" + + funcName + + "', parameter type '" + + aliasName + + "' is a union type alias with mixed representations\n"; + } else { + msg += + "error: in function '" + + funcName + + "', parameter type '" + + aliasName + + "' is a union type alias with mixed representations\n"; + } + msg += + " note: '" + + aliasName + + "' is a type alias for a union whose members have different native types (e.g., i8* vs double)\n"; + msg += + " note: this will be miscompiled and segfault at runtime. Use a common base interface or separate the types.\n"; + console.error(msg); + process.exit(1); + } +} diff --git a/tests/fixtures/closures/closure-capture-by-value-ok.ts b/tests/fixtures/closures/closure-capture-by-value-ok.ts new file mode 100644 index 00000000..b83cdbc7 --- /dev/null +++ b/tests/fixtures/closures/closure-capture-by-value-ok.ts @@ -0,0 +1,13 @@ +// @test-description: closure captures variable with no post-capture mutation — valid +// Reassignment is fine as long as it happens BEFORE the closure is created. +// The checker only errors on reassignment AFTER capture. +function runTest(): void { + let threshold = 3; + threshold = 5; // reassigned before any closure captures it — no error + const nums = [1, 2, 3, 4, 5, 6]; + const big = nums.filter((x) => x > threshold); // captures threshold (value 5) + if (big.length === 1 && big[0] === 6) { + console.log("TEST_PASSED"); + } +} +runTest(); diff --git a/tests/fixtures/closures/closure-capture-mutation-error.ts b/tests/fixtures/closures/closure-capture-mutation-error.ts new file mode 100644 index 00000000..b1fef8c1 --- /dev/null +++ b/tests/fixtures/closures/closure-capture-mutation-error.ts @@ -0,0 +1,6 @@ +// @test-compile-error: variable 'x' is captured by a closure but reassigned after capture +// @test-description: reassigning a captured variable is a compile error +let x = 1; +const f = () => console.log(x); +x = 2; +f(); diff --git a/tests/fixtures/types/union-non-nullable-error.ts b/tests/fixtures/types/union-non-nullable-error.ts new file mode 100644 index 00000000..353a419d --- /dev/null +++ b/tests/fixtures/types/union-non-nullable-error.ts @@ -0,0 +1,6 @@ +// @test-compile-error: has members with different native representations +// @test-description: inline union types with different representations are a compile error +function process(x: string | number): void { + console.log("unreachable"); +} +process("hello"); diff --git a/tests/fixtures/types/union-type-alias-error.ts b/tests/fixtures/types/union-type-alias-error.ts new file mode 100644 index 00000000..f14aa1b0 --- /dev/null +++ b/tests/fixtures/types/union-type-alias-error.ts @@ -0,0 +1,9 @@ +// @test-compile-error: union type alias with mixed representations +// @test-description: type alias unions with mixed representations are a compile error +type StringOrNumber = string | number; + +function display(val: StringOrNumber): void { + console.log("value"); +} + +display("hello");