From c354d0b434ff80d838a41b1154879fdd883eea3c Mon Sep 17 00:00:00 2001 From: saqut Date: Wed, 27 May 2026 09:39:47 +0300 Subject: [PATCH] feat: implement source location tracking and enhance parser AST nodes --- issues.md | 880 ++++++++++++++++++++++++++++++++++++ src/core/location.hpp | 80 ++++ src/core/sourcefile.hpp | 140 ++++++ src/lexer/lexer.hpp | 37 +- src/parser/ast.hpp | 95 +++- src/parser/parser.hpp | 104 ++++- src/tokenizer/token.hpp | 2 + src/tokenizer/tokenizer.hpp | 10 +- 8 files changed, 1319 insertions(+), 29 deletions(-) create mode 100644 issues.md create mode 100644 src/core/location.hpp create mode 100644 src/core/sourcefile.hpp diff --git a/issues.md b/issues.md new file mode 100644 index 0000000..1c32407 --- /dev/null +++ b/issues.md @@ -0,0 +1,880 @@ +# Complete saQut Compiler Issue List (English) + +Copy each issue's title and body directly into Gitea. +Issues are ordered by stage, from most urgent to long-term. + +--- + +## Aşama 0: Metadata and Location Tracking + +### Issue 0.1 — SourceFile and SourceLocation implementation + +**Title:** Aşama 0.1 — Implement SourceFile and SourceLocation classes + +**Body:** + +**Goal:** Create the foundational metadata system so every token and AST node knows its exact origin (file, line, column, offset). + +**Files to create/modify:** +- `src/core/location.hpp` — new file +- `src/core/sourcefile.hpp` — new file + +**Requirements:** +- `SourceLocation` struct with fields: `filePath` (string), `line` (int), `column` (int), `offset` (int). +- `SourceFile` class that stores the full source text and a precomputed vector of line-start offsets. Provides `offsetToLocation(int offset) -> SourceLocation` using binary search (O(log n)). +- `SourceFile` constructor takes file path and source text; computes line offsets in one pass (O(n)). +- Both classes live in the `src/core/` directory. + +**Success criteria:** +- Given a source string and offset, `offsetToLocation` returns correct line and column. +- Binary search is used, not linear scan. + +--- + +### Issue 0.2 — Add location tracking to Lexer + +**Title:** Aşama 0.2 — Add line/column tracking to Lexer + +**Body:** + +**Goal:** Lexer updates current line and column on every `nextChar()` call. + +**Files to modify:** +- `src/lexer/lexer.hpp` + +**Requirements:** +- Add `int currentLine`, `int currentColumn` private fields to `Lexer`. +- On `nextChar()`: if character is `\n`, increment line and reset column; otherwise increment column. +- Add `SourceLocation getLocation()` method returning current position. +- Initialize line=1, column=1 in `setText()`. +- Modify `INumber` struct to include `SourceLocation startLoc` and `SourceLocation endLoc` (or keep start/end offsets and add location separately — prefer using `SourceLocation` fields). + +**Success criteria:** +- After lexing any source, calling `getLocation()` returns correct line and column. +- `INumber` carries source location info. + +--- + +### Issue 0.3 — Add SourceLocation to Token base class + +**Title:** Aşama 0.3 — Add SourceLocation to all Token types + +**Body:** + +**Goal:** Every token produced by the Tokenizer carries its SourceLocation. + +**Files to modify:** +- `src/tokenizer/token.hpp` + +**Requirements:** +- Add `SourceLocation loc` field to the base `Token` class. +- Remove or deprecate `int start, int end` fields (replace with `loc` data). +- Update `StringToken`, `NumberToken`, `IdentifierToken` if they reference start/end directly. +- Ensure all token constructors initialize `loc`. + +**Success criteria:** +- After tokenizing, every token has a valid SourceLocation. +- Old start/end offsets are derivable from SourceLocation if needed (but location is primary). + +--- + +### Issue 0.4 — Add SourceLocation to ASTNode base class + +**Title:** Aşama 0.4 — Add SourceLocation to all AST nodes + +**Body:** + +**Goal:** Every AST node knows its originating source location. + +**Files to modify:** +- `src/parser/ast.hpp` + +**Requirements:** +- Add `SourceLocation loc` field to `ASTNode` base class. +- Optionally add `SourceLocation endLoc` for range. +- Parser must set `loc` when creating AST nodes from tokens. +- Update `toJson()` and `log()` methods to include location info. + +**Success criteria:** +- JSON output includes `"location": {"file": "...", "line": N, "column": M}` for every node. +- Log output shows location when available. + +--- + +## Aşama 1: CLI and REPL Mode + +### Issue 1.1 — Implement REPL mode with readline support + +**Title:** Aşama 1.1 — Implement REPL mode (`saqut` without arguments) + +**Body:** + +**Goal:** Running `saqut` without arguments enters an interactive REPL loop. + +**Files to create/modify:** +- `src/cli/repl.hpp` — new file +- `src/main.cpp` — modify to detect no-args and launch REPL + +**Requirements:** +- REPL prompt: `> ` +- Each line is parsed, evaluated, and the result printed. +- `.ast` command prints the AST of the last expression. +- `.tokens` command prints the token list of the last expression. +- `.symbols` command prints the current symbol table. +- `.exit` or `.quit` exits the REPL. +- Multi-line input support: when a block is started (`{`), keep reading until `}`. + +**Success criteria:** +- `./saqut` launches REPL. +- `.ast`, `.tokens`, `.symbols` work correctly. +- Multi-line input accumulates until balanced braces. + +--- + +### Issue 1.2 — Implement stdin mode (`saqut -`) + +**Title:** Aşama 1.2 — Implement stdin reading mode + +**Body:** + +**Goal:** `saqut -` reads source code from standard input. + +**Files to modify:** +- `src/cli/args.hpp` + +**Requirements:** +- When `-` is passed as positional argument, read all stdin until EOF. +- Works with all commands: `saqut run -`, `saqut tokens -`, etc. +- Remove the current "TODO" stub and implement fully. + +**Success criteria:** +- `echo "int main() { return 42; }" | ./saqut run -` works. +- `cat file.sqt | ./saqut tokens -` works. + +--- + +### Issue 1.3 — Implement output file support for all commands + +**Title:** Aşama 1.3 — Support `-o/--output` flag for all commands + +**Body:** + +**Goal:** All CLI commands respect `-o outputfile` to write results to a file instead of stdout. + +**Files to modify:** +- `src/cli/commands/run.hpp` +- `src/cli/commands/tokens.hpp` +- `src/cli/commands/symbols.hpp` + +**Requirements:** +- `cmdRun`, `cmdTokens`, `cmdSymbols` already partially support `-o`; ensure all do. +- If `-o` is provided, write output to the specified file; otherwise stdout. +- Handle file open errors gracefully. + +**Success criteria:** +- `saqut tokens source.sqt -o tokens.txt` writes to file. +- `saqut symbols source.sqt --output=symbols.json` writes JSON to file. + +--- + +## Aşama 2: AST — Memory Monster + +### Issue 2.1 — Migrate to unique_ptr for AST-owned tokens + +**Title:** Aşama 2.1 — Use std::unique_ptr for token ownership in AST + +**Body:** + +**Goal:** AST nodes own their tokens via `std::unique_ptr`, eliminating memory leaks. + +**Files to modify:** +- `src/parser/ast.hpp` +- `src/parser/parser.hpp` +- `src/parser/token.hpp` + +**Requirements:** +- `ParserToken::token` changes from `Token*` to `std::unique_ptr`. +- All AST nodes that store a `ParserToken` or `Token*` must use `std::unique_ptr`. +- Parser transfers ownership when creating nodes. +- Remove manual `delete` calls on tokens (they are now owned by AST nodes). + +**Success criteria:** +- No memory leaks when parsing and deleting AST. +- Valgrind/ASan reports zero leaks on test cases. + +--- + +### Issue 2.2 — Implement ASTNode::getSourceText() + +**Title:** Aşama 2.2 — Add getSourceText() and getSourceRange() to ASTNode + +**Body:** + +**Goal:** Given any AST node, retrieve the exact source code substring it represents. + +**Files to modify:** +- `src/parser/ast.hpp` +- `src/core/sourcefile.hpp` + +**Requirements:** +- `ASTNode::getSourceText()` returns `std::string` — the original source code for this node. +- `ASTNode::getSourceRange()` returns `std::pair` (start and end). +- Requires AST nodes to store a reference to the `SourceFile` (or the full source text). +- This powers future rich error messages with `^^^^` source highlighting. + +**Success criteria:** +- For a BinaryExpression node representing `a + b`, `getSourceText()` returns `"a + b"`. +- For an IfStatement, returns the entire `if (...) { ... }` block text. + +--- + +### Issue 2.3 — Implement Graphviz DOT format output + +**Title:** Aşama 2.3 — Add --format=dot for AST visualization + +**Body:** + +**Goal:** Export the AST as a Graphviz DOT file for graphical visualization. + +**Files to create/modify:** +- `src/format/dot.hpp` — new file +- `src/cli/commands/ast.hpp` — modify to support `--format=dot` + +**Requirements:** +- Implement `astToDot(ASTNode*) -> std::string` function. +- Each node becomes a labeled box. +- Parent-child relationships become directed edges. +- Node labels show kind and name (e.g., `BinaryExpression +`). +- Output is valid DOT format, renderable by `dot -Tpng -o ast.png ast.dot`. + +**Success criteria:** +- `saqut ast source.sqt --format=dot -o ast.dot` produces a valid DOT file. +- The resulting image shows a readable tree. + +--- + +## Aşama 3: Symbol Table + +### Issue 3.1 — Implement Symbol and SymbolTable classes + +**Title:** Aşama 3.1 — Implement Symbol struct and SymbolTable class with nested scopes + +**Body:** + +**Goal:** Build a full symbol table with nested scope support. + +**Files to create:** +- `src/symbol/symbol.hpp` — new file +- `src/symbol/symbol_table.hpp` — new file + +**Requirements:** + +`Symbol` struct: +- `name` (string) +- `kind` (enum: Variable, Function, Parameter, Type, Struct) +- `type` (Type* or string for now) +- `definitionLoc` (SourceLocation) +- `references` (vector of SourceLocation) +- `scope` (pointer to parent scope or scope level) +- `metadata` (optional map) + +`SymbolTable` class: +- Nested scope stack: `enterScope()`, `exitScope()`. +- `define(Symbol) -> bool` (returns false on duplicate in same scope). +- `resolve(name) -> Symbol*` (searches innermost to outermost). +- `addReference(name, location)` (appends to symbol's reference list). +- `getAllSymbols() -> vector` (flat list of all symbols in all scopes). +- `toJson() -> string` for serialization. + +**Success criteria:** +- Nested scopes work: variable in inner scope shadows outer. +- Duplicate definition in same scope returns false. +- resolve finds symbols across scope boundaries. + +--- + +### Issue 3.2 — Implement SymbolCollector AST walker + +**Title:** Aşama 3.2 — Implement SymbolCollector that populates SymbolTable from AST + +**Body:** + +**Goal:** Walk the AST and populate the SymbolTable with all definitions and references. + +**Files to create/modify:** +- `src/symbol/symbol_collector.hpp` — new file +- Replace or refactor the simple `collectSymbolsRecursive` in `src/json.hpp` + +**Requirements:** +- `SymbolCollector` class with method `collect(ASTNode* root, SymbolTable* table)`. +- Walks all AST node types (Program, FunctionDecl, VariableDecl, Block, etc.). +- Calls `table->define()` for declarations. +- Calls `table->addReference()` for identifier usages. +- Handles all AST node types currently in `ast.hpp` (19 types). +- Replaces the ad-hoc `SymbolEntry` vector with proper SymbolTable population. + +**Success criteria:** +- After parsing `source.sqt`, SymbolTable contains all functions and variables. +- References are collected for each symbol. +- `saqut symbols source.sqt` shows the enriched data. + +--- + +### Issue 3.3 — Semantic error: undefined variable + +**Title:** Aşama 3.3 — Report "undefined variable" errors using SymbolTable + +**Body:** + +**Goal:** When a variable is used before definition, report a clear error with location. + +**Files to modify:** +- `src/symbol/symbol_collector.hpp` +- `src/core/diagnostic.hpp` — new file (or extend existing error reporting) + +**Requirements:** +- During symbol collection, when an identifier reference has no matching `resolve()`, emit a diagnostic. +- Diagnostic includes: error level, SourceLocation, message, optional hint. +- `"Variable 'x' is not defined. Did you mean 'xy'?"` if a close match exists (Levenshtein distance < 3). +- Diagnostic system supports multiple errors (don't stop at first). + +**Success criteria:** +- `int main() { return x; }` reports: `Error: 'x' is not defined at line 1 column 19`. +- Typos suggest close matches. + +--- + +### Issue 3.4 — Semantic error: duplicate definition + +**Title:** Aşama 3.4 — Report "duplicate definition" errors + +**Body:** + +**Goal:** When a symbol is defined twice in the same scope, report an error. + +**Files to modify:** +- `src/symbol/symbol_table.hpp` +- `src/symbol/symbol_collector.hpp` + +**Requirements:** +- `SymbolTable::define()` returns false on duplicate; collector emits diagnostic. +- Error message: `"Function 'main' is already defined. Previous definition at line X."` +- Works for variables, functions, structs. + +**Success criteria:** +- Two `int main()` definitions produce an error. +- Two `int x` in the same block produce an error. +- Shadowing in nested scopes is allowed (not an error). + +--- + +## Aşama 4: Feature Toggle System + +### Issue 4.1 — Implement CompilerConfig struct and flag parsing + +**Title:** Aşama 4.1 — Implement CompilerConfig struct and --disable-* flags + +**Body:** + +**Goal:** Create a configuration system that controls language features at compile time. + +**Files to create/modify:** +- `src/core/config.hpp` — new file +- `src/cli/args.hpp` — extend to parse feature flags + +**Requirements:** + +`CompilerConfig` struct with boolean fields: +- `enableWhile`, `enableFor`, `enableDoWhile`, `enableSwitch` +- `enableClass`, `enableInterface`, `enableEnum` +- `enableTernary`, `enablePostfix`, `enableUnary` +- `optConstantFolding`, `optDeadCodeElim` +- `outputFormat` (text/json/dot) +- `mode` (run/tokens/ast/symbols/compile/transpile) + +CLI flags: +- `--disable-while` sets `enableWhile = false` +- `--disable-for` sets `enableFor = false` +- `--opt-all` enables all optimizations +- `--opt-none` disables all optimizations + +**Success criteria:** +- `--disable-while` flag is parsed into CompilerConfig. +- Config is passed through to Tokenizer and Parser. + +--- + +### Issue 4.2 — Implement keyword toggling in Tokenizer + +**Title:** Aşama 4.2 — Disable keywords based on CompilerConfig + +**Body:** + +**Goal:** When a keyword is disabled in config, the Tokenizer treats it as an identifier. + +**Files to modify:** +- `src/tokenizer/tokenizer.hpp` + +**Requirements:** +- Tokenizer receives a `CompilerConfig` reference (or copy). +- Before matching keywords, check config flags. +- Disabled keywords are skipped in keyword matching; they fall through to identifier. +- Example: `--disable-while` means `while` becomes a regular identifier. + +**Success criteria:** +- With `--disable-while`, `while (true) {}` tokenizes `while` as identifier. +- Parser then does not parse it as a while statement (falls through to expression). + +--- + +### Issue 4.3 — Implement optimization pass interface + +**Title:** Aşama 4.3 — Implement OptimizationPass interface and OptimizationManager + +**Body:** + +**Goal:** Create a framework for pluggable optimization passes. + +**Files to create:** +- `src/opt/optimization_pass.hpp` — new file +- `src/opt/optimization_manager.hpp` — new file + +**Requirements:** +- `OptimizationPass` abstract class with `run(ASTNode* root, SymbolTable* table) -> bool` method. +- `OptimizationManager` holds a list of passes, runs them in order based on CompilerConfig. +- Initially, two passes: `ConstantFoldingPass` and `DeadCodeEliminationPass` (empty implementations for now, will be filled in Aşama 6). +- `--skip-constant-folding` flag skips that pass. + +**Success criteria:** +- OptimizationManager runs (even if passes are no-ops for now). +- Feature flags control which passes execute. + +--- + +## Aşama 5: Backend — Execution + +### Issue 5.1 — Strengthen IR with control flow and function opcodes + +**Title:** Aşama 5.1 — Extend IR with control flow, function, and memory opcodes + +**Body:** + +**Goal:** IR must support control flow (branch, jump, compare), function calls, and memory operations before any backend can work. + +**Files to modify:** +- `src/ir/ir.hpp` + +**Requirements:** + +New opcodes: +- Control flow: `cmp`, `br`, `br_eq`, `br_lt`, `br_gt`, `jmp` +- Function: `call`, `ret`, `param` +- Memory: `load`, `store`, `alloca` + +Update `IROpData` if needed to support new parameter types (labels for jump targets, function indices). +Add a `label` field or a separate `IRLabel` structure for branch targets. + +**Success criteria:** +- All opcodes are defined and documented. +- IR can represent a simple if-else and a while loop. +- IR can represent a function definition with parameters and return. + +--- + +### Issue 5.2 — Implement C Transpile Backend + +**Title:** Aşama 5.2 — Implement C transpile backend (`saqut transpile`) + +**Body:** + +**Goal:** Convert saQut IR (or AST) to compilable C source code. + +**Files to create:** +- `src/backend/c_transpile.hpp` — new file + +**Requirements:** +- Reads IR (or AST) and generates equivalent C code. +- Handles: variable declarations, binary expressions, if/for/while/do-while, function definitions, return. +- Generates readable C with reasonable indentation. +- Embed `#line` directives so GCC/Clang error messages point to original `.sqt` files. +- `saqut transpile source.sqt -o output.c` command. + +**Success criteria:** +- Given `int main() { return 42; }`, generates compilable C code that returns 42. +- Generated C compiles with `gcc -Wall -Werror` without warnings. +- `saqut compile source.sqt output:prog` compiles and runs correctly. + +--- + +### Issue 5.3 — Implement Interpreter (Tree-walk VM) + +**Title:** Aşama 5.3 — Implement interpreter VM (`saqut run` execution) + +**Body:** + +**Goal:** Execute saQut programs by walking the AST or interpreting IR directly. + +**Files to create:** +- `src/backend/interpreter.hpp` — new file + +**Requirements:** +- `Interpreter` class that walks AST and executes. +- Supports: variable declaration and assignment, binary/unary expressions, if/else, while/for/do-while, break/continue, return, function calls (after function parameters are implemented). +- Stack-based or register-based value storage. +- `saqut run source.sqt` executes and prints program result. +- REPL mode (from Issue 1.1) uses the interpreter for evaluation. + +**Success criteria:** +- `saqut run source.sqt` executes correctly for arithmetic and control flow. +- Variables hold values across statements. +- Return value propagates correctly. + +--- + +## Aşama 6: Optimization + +### Issue 6.1 — Implement Constant Folding pass + +**Title:** Aşama 6.1 — Implement Constant Folding optimization + +**Body:** + +**Goal:** Evaluate constant expressions at compile time. + +**Files to create:** +- `src/opt/constant_folding.hpp` — new file + +**Requirements:** +- Walk AST, find `BinaryExpression` nodes where both operands are Literals. +- Compute the result, replace the subtree with a single Literal node. +- Handles: `+`, `-`, `*`, `/`, `%` for integers and floats. +- Handles unary `-` on literals. +- Guard against division by zero (emit warning, skip folding). + +**Success criteria:** +- `4 + 5` becomes `9` in the optimized AST. +- `x + 0` is NOT folded (x is not constant). +- `1 / 0` emits warning, AST unchanged. + +--- + +### Issue 6.2 — Implement Dead Code Elimination pass + +**Title:** Aşama 6.2 — Implement Dead Code Elimination + +**Body:** + +**Goal:** Remove code that is provably unreachable. + +**Files to create:** +- `src/opt/dead_code_elim.hpp` — new file + +**Requirements:** +- Remove statements after `return`, `break`, `continue` within the same block. +- Remove `if (false)` branches. +- Remove `while (false)` bodies. +- Remove unused variable declarations (requires SymbolTable reference counting). + +**Success criteria:** +- `return; x = 5;` — assignment is removed. +- `if (false) { ... }` — entire block removed. +- Unused variable `int y = 10;` removed when y has zero references. + +--- + +### Issue 6.3 — Implement Null Check and Type Check Elimination + +**Title:** Aşama 6.3 — Implement Null/Type Check Elimination + +**Body:** + +**Goal:** Remove redundant null checks and type checks when the compiler can prove they are unnecessary. + +**Files to create:** +- `src/opt/null_check_elim.hpp` — new file +- `src/opt/type_check_elim.hpp` — new file + +**Requirements:** +- Track which variables have been checked for null in the current path. +- If a variable was already null-checked (and not reassigned), skip subsequent checks. +- Similarly for type checks (`is` expressions). +- Requires dataflow analysis within a function body. + +**Success criteria:** +- Two consecutive `if (x != null)` — second check eliminated. +- `if (x is int) { ... if (x is int) { ... } }` — inner check eliminated. + +--- + +## Aşama 7: Test and Performance + +### Issue 7.1 — Set up unit test framework (Google Test) + +**Title:** Aşama 7.1 — Set up Google Test framework and write initial tests + +**Body:** + +**Goal:** Create a proper testing infrastructure. + +**Files to create/modify:** +- `tests/` directory +- `tests/lexer_test.cpp` +- `tests/tokenizer_test.cpp` +- `tests/parser_test.cpp` +- `tests/symbol_test.cpp` +- `CMakeLists.txt` or `Makefile` with test target + +**Requirements:** +- Use Google Test (download during build or include as submodule). +- Initial tests: lexing numbers, tokenizing keywords, parsing simple expressions, symbol collection. +- `make test` or `cmake --build . --target test` runs all tests. + +**Success criteria:** +- At least 10 passing tests. +- CI-ready: tests can be run from command line. +- Failures show expected vs actual. + +--- + +### Issue 7.2 — Write snapshot tests for AST output + +**Title:** Aşama 7.2 — Snapshot testing for AST/IR/symbol output + +**Body:** + +**Goal:** Ensure compiler output is stable across changes. + +**Files to create:** +- `tests/snapshots/` directory +- Script or C++ test that runs `saqut ast` and compares to stored JSON. + +**Requirements:** +- Store known-good JSON output for `source.sqt`, `Final.sqt`. +- Test compares current output to snapshot; fails on difference. +- Snapshot update mode to regenerate expected files. + +**Success criteria:** +- Changes to parser that affect AST structure are caught. +- False positives (formatting changes) are manageable. + +--- + +### Issue 7.3 — Implement benchmark suite + +**Title:** Aşama 7.3 — Implement benchmark infrastructure (`saqut bench`) + +**Body:** + +**Goal:** Measure compiler performance on large inputs. + +**Files to create/modify:** +- `src/cli/commands/bench.hpp` — new file +- `benchmarks/` directory with test files + +**Requirements:** +- `saqut bench` runs a set of benchmark files and reports parse time, token throughput, memory usage. +- Warm-up phase to reduce noise. +- Output in machine-readable format (JSON) for tracking over time. + +**Success criteria:** +- Parse a 10K-line file and report tokens/second. +- Memory usage reported in KB/MB. + +--- + +## Aşama 8: Advanced Type System + +### Issue 8.1 — Implement Struct type (user-defined types) + +**Title:** Aşama 8.1 — Full struct support: definition, instantiation, field access + +**Body:** + +**Goal:** Users can define and use `struct` types. + +**Files to modify:** +- `src/parser/parser.hpp` +- `src/parser/ast.hpp` +- `src/symbol/` +- `src/backend/` + +**Requirements:** +- `struct Point { int x; int y; }` defines a type. +- `Point p;` declares a variable of that type. +- `p.x` accesses a field (already partially supported via MemberAccess). +- Struct type checking: field must exist, type must match on assignment. +- C transpile and interpreter both support structs. + +**Success criteria:** +- Struct definition, instantiation, and field access work end-to-end. +- Accessing nonexistent field produces clear error. + +--- + +### Issue 8.2 — Implement Array and Pointer types + +**Title:** Aşama 8.2 — Implement array and pointer type support + +**Body:** + +**Goal:** Support `int[]`, `int*`, array indexing, and pointer arithmetic. + +**Files to modify:** +- `src/parser/parser.hpp` +- `src/parser/ast.hpp` +- `src/symbol/` +- `src/backend/` + +**Requirements:** +- `int arr[10];` array declaration. +- `arr[i]` indexing (already partially supported). +- `int* p;` pointer declaration. +- `*p` dereference (unary `*` operator). +- `&x` address-of operator. +- Type checking for pointer/array operations. + +**Success criteria:** +- Array declaration and indexing work. +- Pointer declaration, assignment, dereference work. +- Pointer arithmetic (`p + 1`) works. + +--- + +### Issue 8.3 — Implement standard library foundation (lib/std.sqt) + +**Title:** Aşama 8.3 — Create standard library with basic data structures + +**Body:** + +**Goal:** Provide built-in data structures: List, Map, Set, Buffer, String utilities. + +**Files to create:** +- `lib/std.sqt` — standard library source file +- `lib/collections.sqt`, `lib/io.sqt`, `lib/encoding.sqt` — optional modules + +**Requirements:** +- `List` — dynamic array with `add`, `get`, `remove`, `size`. +- `Map` — hash map with `put`, `get`, `contains`, `remove`. +- `Set` — hash set. +- `Buffer` — byte buffer for binary data. +- `String` methods: `split`, `replace`, `substring`, `toUpper`, `toLower`. +- All implemented in saQut itself (or native functions exposed to saQut). + +**Success criteria:** +- `import std;` makes these types available. +- Basic operations work without crashes. + +--- + +## Aşama 9: Ecosystem + +### Issue 9.1 — Implement project initialization (`saqut init`) + +**Title:** Aşama 9.1 — Implement `saqut init` project scaffolding + +**Body:** + +**Goal:** `saqut init my-project` creates a standard project directory. + +**Files to create/modify:** +- `src/cli/commands/init.hpp` — new file + +**Requirements:** +- Creates directory with: + - `project.saqut` manifest file (TOML or JSON format). + - `src/` directory with `main.sqt`. + - `.gitignore` file. +- Manifest contains: project name, version, description, author, dependencies (empty initially). + +**Success criteria:** +- `saqut init testproj` creates the expected structure. +- `saqut run` inside the project directory finds and runs `src/main.sqt`. + +--- + +### Issue 9.2 — Implement built-in test framework (`saqut test`) + +**Title:** Aşama 9.2 — Implement built-in test framework + +**Body:** + +**Goal:** `saqut test` discovers and runs test functions in the project. + +**Files to create/modify:** +- `src/cli/commands/test.hpp` — new file +- `lib/test.sqt` — test framework library + +**Requirements:** +- Functions annotated with `#[test]` or named `test_*` are test functions. +- `saqut test` runs all tests, reports pass/fail. +- `assert(condition)` and `assert_eq(a, b)` built-in functions. +- Output in TAP or JUnit XML format for CI integration. + +**Success criteria:** +- `saqut test` in a project with test functions runs them. +- Failures report file and line number. +- Exit code 0 for all pass, non-zero for any failure. + +--- + +## Aşama 10: Package Manager + +### Issue 10.1 — Implement package manager foundation (`saqut add`) + +**Title:** Aşama 10.1 — Implement `saqut add` package manager + +**Body:** + +**Goal:** `saqut add ` downloads and installs a package dependency. + +**Files to create/modify:** +- `src/cli/commands/add.hpp` — new file +- `src/package/registry.hpp` — new file +- `src/package/resolver.hpp` — new file + +**Requirements:** +- Package registry: a central Git repository or simple HTTP server listing available packages. +- `saqut add json` adds the `json` package to `project.saqut` dependencies. +- Downloads package source into `packages/` directory (or a cache). +- `import json;` in source code finds the installed package. +- Semantic versioning support (major.minor.patch). + +**Success criteria:** +- `saqut add` adds dependency to manifest. +- `import` of installed package works. +- Version constraints are enforced. + +--- + +## Aşama 11: Language Specification + +### Issue 11.1 — Write language specification document + +**Title:** Aşama 11.1 — Write comprehensive language specification + +**Body:** + +**Goal:** Create `docs/lang_spec.md` that fully defines the saQut language. + +**Files to create:** +- `docs/lang_spec.md` + +**Requirements:** +- Syntax: full grammar in EBNF or similar notation. +- Type system: all built-in types, conversion rules, type inference. +- Control flow: semantics of if/else, for, while, do-while, break, continue, return. +- Memory model: stack vs heap, pointer rules, array layout. +- Standard library: function signatures and contracts for all `lib/std.sqt` functions. +- Error handling: exception-like or error-return semantics. + +**Success criteria:** +- A developer can implement a saQut compiler from only this document. +- All implemented features are documented. +- Unimplemented features are clearly marked as "planned" or "future." + +--- + +> **Total issues: 30** +> +> **Order:** Start from 0.1 and work sequentially. Each issue is a milestone toward +> the next. Dependencies are explicit: later stages require earlier stages complete. \ No newline at end of file diff --git a/src/core/location.hpp b/src/core/location.hpp new file mode 100644 index 0000000..e8b4ff0 --- /dev/null +++ b/src/core/location.hpp @@ -0,0 +1,80 @@ +// ============================================================================ +// saQut Compiler — Kaynak Kod Konum Yapısı +// ============================================================================ +// +// DİZİN: src/core/location.hpp +// KATMAN: Katman 0 — Tüm katmanlar tarafından kullanılır +// BAĞIMLI: Yok (sadece ) +// +// AMAÇ: +// Her token ve AST düğümünün kaynak koddaki tam konumunu tutar. +// "Hata nerede?" ve "Kullanıcı imleci nerede?" sorularına cevap verir. +// +// ALANLAR: +// filePath : Kaynak dosyanın yolu (bilinmiyorsa boş string) +// line : 1-tabanlı satır numarası +// column : 1-tabanlı sütun numarası +// offset : 0-tabanlı karakter offset'i (dosya başından itibaren) +// +// ============================================================================ + +#ifndef SAQUT_CORE_LOCATION +#define SAQUT_CORE_LOCATION + +#include + +// ============================================================================ +// SourceLocation — Kaynak Koddaki Bir Nokta +// ============================================================================ +// +// KULLANIM: +// SourceLocation loc{"test.sqt", 5, 10, 134}; +// std::cout << loc.toString(); // "test.sqt:5:10" +// std::cout << loc.shortString(); // "5:10" +// +// Varsayılan kurucu: geçersiz bir konum üretir (line=0, column=0, offset=-1). +// isValid() ile kontrol edilebilir. +// +// ============================================================================ + +struct SourceLocation { + std::string filePath; + int line = 0; // 1-tabanlı, 0 = geçersiz + int column = 0; // 1-tabanlı, 0 = geçersiz + int offset = -1; // 0-tabanlı, -1 = geçersiz + + SourceLocation() = default; + + SourceLocation(std::string file, int line, int col, int off) + : filePath(std::move(file)), line(line), column(col), offset(off) {} + + // Geçerli bir konum mu? + bool isValid() const { + return line > 0 && column > 0 && offset >= 0; + } + + // Tam konum: "dosya.sqt:5:10" + std::string toString() const { + if (!isValid()) return ""; + return filePath + ":" + std::to_string(line) + ":" + std::to_string(column); + } + + // Kısa konum: "5:10" + std::string shortString() const { + if (!isValid()) return "?:?"; + return std::to_string(line) + ":" + std::to_string(column); + } + + // JSON formatı: {"file":"...","line":5,"column":10,"offset":134} + std::string toJson() const { + if (!isValid()) return "null"; + return "{" + "\"file\":\"" + filePath + "\"," + "\"line\":" + std::to_string(line) + "," + "\"column\":" + std::to_string(column) + "," + "\"offset\":" + std::to_string(offset) + + "}"; + } +}; + +#endif // SAQUT_CORE_LOCATION diff --git a/src/core/sourcefile.hpp b/src/core/sourcefile.hpp new file mode 100644 index 0000000..3205438 --- /dev/null +++ b/src/core/sourcefile.hpp @@ -0,0 +1,140 @@ +// ============================================================================ +// saQut Compiler — Kaynak Kod Yöneticisi +// ============================================================================ +// +// DİZİN: src/core/sourcefile.hpp +// KATMAN: Katman 0 — Tüm katmanlar tarafından kullanılır +// BAĞIMLI: core/location.hpp +// +// AMAÇ: +// Kaynak kodun tamamını ve satır başı offset'lerini tutar. +// offset → (line, column) dönüşümü yapar. +// +// TASARIM: +// lineStarts vektörü, her satırın ilk karakterinin offset'ini tutar: +// lineStarts[0] = 0 (1. satır, offset 0) +// lineStarts[1] = 15 (2. satır, offset 15) +// lineStarts[2] = 32 (3. satır, offset 32) +// +// offsetToLocation() bu dizide binary search yaparak O(log n)'de line/column +// bulur. Line-start dizisi bir kere setText()'te O(n)'de hesaplanır. +// +// PERFORMANS: +// setText() : O(n) — line-start dizisi bir kere kurulur +// offsetToLocation() : O(log n) — binary search +// Bellek : O(n) — lineStarts (en fazla n eleman, her satır için bir int) +// +// ============================================================================ + +#ifndef SAQUT_CORE_SOURCEFILE +#define SAQUT_CORE_SOURCEFILE + +#include +#include +#include +#include "core/location.hpp" + +// ============================================================================ +// SourceFile — Kaynak Kod Yöneticisi +// ============================================================================ +// +// KULLANIM: +// SourceFile sf; +// sf.setText("deneme.sqt", "int x = 5;\nreturn x;\n"); +// SourceLocation loc = sf.offsetToLocation(10); // 1:10 (2. satır) +// +// ============================================================================ + +class SourceFile { +public: + std::string filePath; // Kaynak dosyanın yolu + std::string text; // Kaynak kodun tamamı + std::vector lineStarts; // Her satırın başlangıç offset'i + + SourceFile() = default; + + // text verisini yeni satır dizisini de hesapla + void setText(const std::string& path, const std::string& source) { + filePath = path; + text = source; + computeLineStarts(); + } + + // Kaynak kodun toplam satır sayısı + int lineCount() const { + return static_cast(lineStarts.size()); + } + + // Belirtilen offset'teki satırın tam metnini döndür + std::string getLine(int line) const { + if (line < 1 || line > lineCount()) return ""; + int start = lineStarts[line - 1]; + int end; + if (line < lineCount()) { + end = lineStarts[line] - 1; // Satır sonu (\n) hariç + // \r\n varsa bir karakter daha geri + if (end > start && text[end - 1] == '\r') end--; + } else { + end = static_cast(text.length()); + } + return text.substr(start, end - start); + } + + // Offset'ten (line, column) dönüşümü + // Binary search ile O(log n) + SourceLocation offsetToLocation(int offset) const { + // Geçersiz offset kontrolü + if (offset < 0 || offset > static_cast(text.length())) { + return SourceLocation{filePath, 0, 0, -1}; + } + + // Binary search: offset'in hangi satıra ait olduğunu bul + // lineStarts içinde offset'ten büyük ilk elemanı bul + auto it = std::upper_bound(lineStarts.begin(), lineStarts.end(), offset); + int lineIndex = static_cast(it - lineStarts.begin()) - 1; + + // lineIndex geçerli değilse + if (lineIndex < 0) { + lineIndex = 0; + } else if (lineIndex >= static_cast(lineStarts.size())) { + lineIndex = static_cast(lineStarts.size()) - 1; + } + + int lineStart = lineStarts[lineIndex]; + int line = lineIndex + 1; // 1-tabanlı + int column = offset - lineStart + 1; // 1-tabanlı + + return SourceLocation{filePath, line, column, offset}; + } + + // Bir aralığın başlangıç ve bitiş konumlarını döndür + struct LocationRange { + SourceLocation start; + SourceLocation end; + }; + + LocationRange rangeFromOffsets(int startOffset, int endOffset) const { + return {offsetToLocation(startOffset), offsetToLocation(endOffset)}; + } + +private: + // lineStarts vektörünü hesapla + // Her \n karakterinden sonraki offset bir sonraki satırın başlangıcıdır + // İlk satır her zaman offset 0'dan başlar + void computeLineStarts() { + lineStarts.clear(); + lineStarts.push_back(0); // 1. satır offset 0 + + for (int i = 0; i < static_cast(text.length()); i++) { + if (text[i] == '\n') { + // \r\n kontrolü: \r'yi atla, \n'den sonraki karakter yeni satır + int nextStart = i + 1; + if (nextStart < static_cast(text.length())) { + lineStarts.push_back(nextStart); + } + } + } + } +}; + +#endif // SAQUT_CORE_SOURCEFILE diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp index b8f4b5b..e12f898 100644 --- a/src/lexer/lexer.hpp +++ b/src/lexer/lexer.hpp @@ -63,6 +63,8 @@ #include #include #include +#include "core/location.hpp" +#include "core/sourcefile.hpp" // ============================================================================ // INumber — Ara Sayısal Veri Yapısı @@ -89,6 +91,8 @@ struct INumber { int start = 0; // Kaynak koddaki başlangıç offset'i int end = 0; // Kaynak koddaki bitiş offset'i + SourceLocation startLoc; // Kaynak koddaki başlangıç konumu (line, column) + SourceLocation endLoc; // Kaynak koddaki bitiş konumu std::string token; // Sayının ham metni (örn: "42", "0xFF", "3.14e-2") bool isFloat = false; // true ise float/double literal bool hasEpsilon = false; // true ise bilimsel gösterim (örn: 1e10) @@ -160,6 +164,11 @@ public: void toChar(int n); // offset'i n kadar ilerlet // --- Üst Seviye İşlemler --- + // --- Konum Bilgisi (SourceFile üzerinden) --- + SourceFile sourceFile; // Kaynak kod ve satır başı offset'leri + SourceLocation getLocation(); // Mevcut offset'in SourceLocation'ını döndür + void setSourceText(const std::string& path, const std::string& text); + void setText(std::string input); // Yeni kaynak kodu yükle void skipWhiteSpace(); // Boşluk/sekme/satırsonu karakterlerini atla bool isNumeric(); // Mevcut karakter 0-9 aralığında mı? @@ -337,6 +346,23 @@ inline void Lexer::toChar(int n) { setOffset(getOffset() + n); } +// -------------------------------------------------------------------------- +// getLocation: Mevcut offset'in SourceLocation'ını döndür. +// sourceFile bağlı değilse offset+dosya adı olmadan temel bilgi döndür. +// -------------------------------------------------------------------------- +inline SourceLocation Lexer::getLocation() { + return sourceFile.offsetToLocation(getOffset()); +} + +// -------------------------------------------------------------------------- +// setSourceText: Yeni kaynak kodu yükle ve SourceFile'ı güncelle. +// Aynı anda Lexer ve SourceFile'ı hazırlar. +// -------------------------------------------------------------------------- +inline void Lexer::setSourceText(const std::string& path, const std::string& text) { + sourceFile.setText(path, text); + setText(text); +} + // -------------------------------------------------------------------------- // setText: Yeni kaynak kodu yükle. input ve size'ı günceller. // -------------------------------------------------------------------------- @@ -402,6 +428,7 @@ inline bool Lexer::isNumeric() { inline INumber Lexer::readNumeric() { INumber num; num.start = getLastPosition(); + num.startLoc = getLocation(); // --- Adım 1: İsteğe bağlı işaret --- if (getchar() == '-') { @@ -450,6 +477,7 @@ inline INumber Lexer::readNumeric() { // token'a ekleniyor ve base=8 yapılıyordu. Bu, "0;" durumunda // ';' karakterinin sayıya eklenmesine neden oluyordu. num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } } else { @@ -478,6 +506,7 @@ inline INumber Lexer::readNumeric() { num.token.push_back(c); else { num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } break; @@ -486,6 +515,7 @@ inline INumber Lexer::readNumeric() { num.token.push_back(c); else { num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } break; @@ -496,6 +526,7 @@ inline INumber Lexer::readNumeric() { num.token.push_back(c); else { num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } break; @@ -512,6 +543,7 @@ inline INumber Lexer::readNumeric() { } else { // İkinci nokta → sayı bitti num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } break; @@ -541,12 +573,14 @@ inline INumber Lexer::readNumeric() { nextChar(); } else { num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } } break; } num.end = getLastPosition(); + num.endLoc = getLocation(); return num; default: // Tanınmayan karakter → sayı bitti @@ -556,7 +590,8 @@ inline INumber Lexer::readNumeric() { nextChar(); } num.end = getLastPosition(); + num.endLoc = getLocation(); return num; } -#endif // SAQUT_LEXER +#endif // SAQUT_LEXER \ No newline at end of file diff --git a/src/parser/ast.hpp b/src/parser/ast.hpp index 42db235..bed7cee 100644 --- a/src/parser/ast.hpp +++ b/src/parser/ast.hpp @@ -51,6 +51,7 @@ #include #include #include +#include "core/location.hpp" #include "parser/token.hpp" #include "tools.hpp" @@ -90,6 +91,7 @@ class ASTNode { public: ASTKind kind; ASTNode* parent = nullptr; + SourceLocation loc; // Bu düğümün kaynak koddaki konumu virtual void log(int indent = 0) { (void)indent; @@ -179,6 +181,7 @@ public: << in << " \"kind\": \"FunctionDecl\",\n" << in << " \"name\": \"" << jsonEscape(name) << "\",\n" << in << " \"returnType\": \"" << jsonEscape(returnType) << "\",\n" + << in << " \"location\": " << loc.toJson() << ",\n" << in << " \"children\": [\n" << childrenToJson(this, depth + 3) << in << " ]\n" @@ -225,6 +228,32 @@ public: VariableDeclNode() { kind = ASTKind::VariableDecl; } + std::string toJson(int depth = 0) override { + std::string in = jsonIndent(depth); + std::ostringstream ss; + ss << in << "{\n" + << in << " \"kind\": \"VariableDecl\",\n" + << in << " \"name\": \"" << jsonEscape(name) << "\",\n" + << in << " \"varType\": \"" << jsonEscape(varType) << "\",\n" + << in << " \"location\": " << loc.toJson() << ""; + if (initExpr) { + ss << ",\n" << in << " \"initExpr\":\n" + << initExpr->toJson(depth + 2); + } + // Çoklu değişken bildirimindeki kardeşler (int a, b, c;) + if (!getChildren().empty()) { + ss << ",\n" << in << " \"declarators\": [\n"; + for (size_t i = 0; i < getChildren().size(); i++) { + ss << ((VariableDeclNode*)getChildren()[i])->toJson(depth + 2); + if (i + 1 < getChildren().size()) ss << ","; + ss << "\n"; + } + ss << in << " ]"; + } + ss << "\n" << in << "}"; + return ss.str(); + } + void log(int indent = 0) override { std::cout << padRight("", indent) << "VariableDecl " << varType << " " << name; @@ -234,21 +263,10 @@ public: } else { std::cout << "\n"; } - } - - std::string toJson(int depth = 0) override { - std::string in = jsonIndent(depth); - std::ostringstream ss; - ss << in << "{\n" - << in << " \"kind\": \"VariableDecl\",\n" - << in << " \"name\": \"" << jsonEscape(name) << "\",\n" - << in << " \"varType\": \"" << jsonEscape(varType) << "\""; - if (initExpr) { - ss << ",\n" << in << " \"initExpr\":\n" - << initExpr->toJson(depth + 2); + // Kardeş değişkenleri de logla + for (auto* child : getChildren()) { + child->log(indent); } - ss << "\n" << in << "}"; - return ss.str(); } }; @@ -286,7 +304,8 @@ public: std::ostringstream ss; ss << in << "{\n" << in << " \"kind\": \"BinaryExpression\",\n" - << in << " \"operator\": \"" << jsonEscape(opSym) << "\""; + << in << " \"operator\": \"" << jsonEscape(opSym) << "\",\n" + << in << " \"location\": " << loc.toJson() << ""; if (Left) { ss << ",\n" << in << " \"left\":\n" << Left->toJson(depth + 2); @@ -304,16 +323,44 @@ public: // LiteralNode — Sabit Değer // ============================================================================ +// Literal tipleri +enum class LiteralType : uint8_t { + INTEGER, // Tamsayı (decimal, hex, octal, binary) + FLOAT, // Ondalıklı sayı (3.14, 1e-5) + STRING, // Metin ("hello") + BOOLEAN, // true / false + BOŞ // null +}; + +inline const char* literalTypeToString(LiteralType t) { + switch (t) { + case LiteralType::INTEGER: return "integer"; + case LiteralType::FLOAT: return "float"; + case LiteralType::STRING: return "string"; + case LiteralType::BOOLEAN: return "boolean"; + case LiteralType::BOŞ: return "null"; + } + return "?"; +} + class LiteralNode : public ASTNode { public: Token* lexerToken = nullptr; ParserToken parserToken; + LiteralType literalType = LiteralType::INTEGER; + int literalBase = 10; // 10, 16, 8, 2 (sadece INTEGER/FLOAT için) + bool isFloatValue = false; // Ondalıklı mı? (sadece INTEGER/FLOAT için) + LiteralNode() { kind = ASTKind::Literal; } void log(int indent = 0) override { std::cout << padRight("", indent) - << "Literal {" << parserToken.token->token << "}\n"; + << "Literal {" << parserToken.token->token << "} " + << literalTypeToString(literalType); + if (literalType == LiteralType::INTEGER && literalBase != 10) + std::cout << " (base " << literalBase << ")"; + std::cout << "\n"; } std::string toJson(int depth = 0) override { @@ -322,7 +369,15 @@ public: std::ostringstream ss; ss << in << "{\n" << in << " \"kind\": \"Literal\",\n" - << in << " \"value\": \"" << jsonEscape(val) << "\"\n" + << in << " \"literalType\": \"" << literalTypeToString(literalType) << "\",\n" + << in << " \"value\": \"" << jsonEscape(val) << "\""; + if (literalType == LiteralType::INTEGER && literalBase != 10) { + ss << ",\n" << in << " \"base\": " << literalBase; + } + if (literalType == LiteralType::FLOAT) { + ss << ",\n" << in << " \"isFloat\": true"; + } + ss << ",\n" << in << " \"location\": " << loc.toJson() << "\n" << in << "}"; return ss.str(); } @@ -350,7 +405,8 @@ public: std::ostringstream ss; ss << in << "{\n" << in << " \"kind\": \"Identifier\",\n" - << in << " \"name\": \"" << jsonEscape(name) << "\"\n" + << in << " \"name\": \"" << jsonEscape(name) << "\",\n" + << in << " \"location\": " << loc.toJson() << "\n" << in << "}"; return ss.str(); } @@ -663,7 +719,8 @@ public: std::string in = jsonIndent(depth); std::ostringstream ss; ss << in << "{\n" - << in << " \"kind\": \"ExpressionStatement\""; + << in << " \"kind\": \"ExpressionStatement\",\n" + << in << " \"location\": " << loc.toJson() << ""; if (expression) { ss << ",\n" << in << " \"expression\":\n" << expression->toJson(depth + 2); diff --git a/src/parser/parser.hpp b/src/parser/parser.hpp index 73ac58b..2d93afa 100644 --- a/src/parser/parser.hpp +++ b/src/parser/parser.hpp @@ -287,6 +287,7 @@ inline ASTNode* Parser::parseDeclaration() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseFunctionDecl() { FunctionDeclNode* fn = new FunctionDeclNode(); + fn->loc = currentToken().token->loc; fn->returnType = currentToken().token->token; // "int", "void", ... nextToken(); // Dönüş tipini tüket @@ -318,6 +319,7 @@ inline ASTNode* Parser::parseFunctionDecl() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseStructDecl() { StructDeclNode* st = new StructDeclNode(); + st->loc = currentToken().token->loc; nextToken(); if (currentToken().type == TokenType::IDENTIFIER) { st->name = currentToken().token->token; @@ -340,31 +342,84 @@ inline ASTNode* Parser::parseStructDecl() { // -------------------------------------------------------------------------- // parseVariableDecl: Değişken tanımı. // -// Sözdizimi: Type Identifier [= Expression] ; +// Sözdizimi: Type Identifier [= Expression] {, Identifier [= Expression]} ; // Örnek: int x = 10; -// float y; (initExpr = nullptr) +// float y; (initExpr = nullptr) +// int first = 0, second = 1, next; // -// TODO: Çoklu değişken: int x = 1, y = 2; +// Çoklu değişken: +// İlk değişken ana düğüm olur. Virgülle ayrılmış ek değişkenler +// ana düğümün children vektörüne eklenir. JSON çıktısında "declarators" +// dizisi olarak görünür. // -------------------------------------------------------------------------- inline ASTNode* Parser::parseVariableDecl() { + // --- Tip ve ilk değişken adı --- VariableDeclNode* vd = new VariableDeclNode(); + vd->loc = currentToken().token->loc; vd->varType = currentToken().token->token; // "int", "float", ... nextToken(); // Tipi tüket if (currentToken().type != TokenType::IDENTIFIER) { std::cerr << "Parser hatası: değişken ismi bekleniyor\n"; - return vd; // Hatalı düğüm, çağıran kontrol etmeli + return vd; } - vd->name = currentToken().token->token; // "x", "counter", ... + vd->name = currentToken().token->token; nextToken(); // İsmi tüket - // Opsiyonel başlangıç değeri: = expression + // Opsiyonel array boyutu: [expr] + if (currentToken().type == TokenType::LBRACKET) { + nextToken(); // '[' + while (currentToken().type != TokenType::RBRACKET && + currentToken().type != TokenType::SEMICOLON && + currentToken().type != TokenType::SVR_VOID) + nextToken(); + if (currentToken().type == TokenType::RBRACKET) + nextToken(); // ']' + } + + // İlk değişkenin başlangıç değeri if (currentToken().type == TokenType::EQUAL) { nextToken(); // '=' tüket vd->initExpr = parseExpression(); } + // --- Çoklu değişken: , identifier [= expr] --- + while (currentToken().type == TokenType::COMMA) { + nextToken(); // ',' tüket + + if (currentToken().type != TokenType::IDENTIFIER) { + std::cerr << "Parser hatası: virgülden sonra değişken ismi bekleniyor\n"; + break; + } + + VariableDeclNode* sibling = new VariableDeclNode(); + sibling->loc = currentToken().token->loc; + sibling->varType = vd->varType; // Aynı tip + sibling->name = currentToken().token->token; + nextToken(); // İsmi tüket + + // Opsiyonel array boyutu: [expr] + if (currentToken().type == TokenType::LBRACKET) { + nextToken(); // '[' + while (currentToken().type != TokenType::RBRACKET && + currentToken().type != TokenType::SEMICOLON && + currentToken().type != TokenType::SVR_VOID) + nextToken(); + if (currentToken().type == TokenType::RBRACKET) + nextToken(); // ']' + } + + // Başlangıç değeri + if (currentToken().type == TokenType::EQUAL) { + nextToken(); // '=' tüket + sibling->initExpr = parseExpression(); + } + + // Kardeş düğümü ana düğüme ekle + vd->addChild(sibling); + } + // Noktalı virgül (opsiyonel — parser hoşgörülü) if (currentToken().type == TokenType::SEMICOLON) nextToken(); @@ -418,6 +473,10 @@ inline ASTNode* Parser::parseStatement() { return parseVariableDecl(); } + // struct tanımı: struct Name { ... } + if (ct.type == TokenType::KW_STRUCT) + return parseStructDecl(); + // Hiçbiri değilse → ifade statement'ı (atama, fonksiyon çağrısı, ...) return parseExpressionStatement(); } @@ -427,6 +486,7 @@ inline ASTNode* Parser::parseStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseBlock() { BlockNode* block = new BlockNode(); + block->loc = currentToken().token ? currentToken().token->loc : SourceLocation{}; if (currentToken().type == TokenType::LBRACE) nextToken(); // '{' tüket @@ -459,6 +519,7 @@ inline ASTNode* Parser::parseBlock() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseIfStatement() { IfStatementNode* ifNode = new IfStatementNode(); + ifNode->loc = currentToken().token->loc; nextToken(); // 'if' tüket // Koşul: ( expression ) @@ -486,6 +547,7 @@ inline ASTNode* Parser::parseIfStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseWhileStatement() { WhileStatementNode* ws = new WhileStatementNode(); + ws->loc = currentToken().token->loc; nextToken(); // 'while' tüket if (currentToken().type == TokenType::LPAREN) { @@ -513,6 +575,7 @@ inline ASTNode* Parser::parseWhileStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseForStatement() { ForStatementNode* fs = new ForStatementNode(); + fs->loc = currentToken().token->loc; nextToken(); // 'for' tüket if (currentToken().type == TokenType::LPAREN) @@ -547,6 +610,7 @@ inline ASTNode* Parser::parseForStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseDoWhileStatement() { DoWhileStatementNode* dw = new DoWhileStatementNode(); + dw->loc = currentToken().token->loc; nextToken(); // 'do' tüket // Gövde @@ -576,6 +640,7 @@ inline ASTNode* Parser::parseDoWhileStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseReturnStatement() { ReturnStatementNode* rs = new ReturnStatementNode(); + rs->loc = currentToken().token->loc; nextToken(); // 'return' tüket // Opsiyonel dönüş değeri @@ -596,6 +661,7 @@ inline ASTNode* Parser::parseReturnStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseBreakStatement() { BreakStatementNode* bs = new BreakStatementNode(); + bs->loc = currentToken().token->loc; nextToken(); // 'break' tüket if (currentToken().type == TokenType::SEMICOLON) nextToken(); @@ -604,6 +670,7 @@ inline ASTNode* Parser::parseBreakStatement() { inline ASTNode* Parser::parseContinueStatement() { ContinueStatementNode* cs = new ContinueStatementNode(); + cs->loc = currentToken().token->loc; nextToken(); // 'continue' tüket if (currentToken().type == TokenType::SEMICOLON) nextToken(); @@ -625,6 +692,7 @@ inline ASTNode* Parser::parseContinueStatement() { // -------------------------------------------------------------------------- inline ASTNode* Parser::parseExpressionStatement() { ExpressionStatementNode* es = new ExpressionStatementNode(); + es->loc = currentToken().token ? currentToken().token->loc : SourceLocation{}; es->expression = parseExpression(); if (!es->expression) { // Hata kurtarma: sonraki güvenli noktaya atla @@ -769,6 +837,7 @@ inline ASTNode* Parser::parseNullDenotation() { // Sağ operand'ı ayrıştır. Unary prefix sağdan sola bağlanır. ASTNode* right = parseExpression(ct.getPowerOperator()); BinaryExpressionNode* bin = new BinaryExpressionNode(); + bin->loc = ct.token ? ct.token->loc : SourceLocation{}; bin->Right = right; bin->Left = nullptr; // Unary işaretçisi bin->Operator = ct.type; @@ -780,8 +849,15 @@ inline ASTNode* Parser::parseNullDenotation() { if (ct.type == TokenType::NUMBER) { nextToken(); // Token'ı tüket LiteralNode* lit = new LiteralNode(); + lit->loc = ct.token ? ct.token->loc : SourceLocation{}; lit->lexerToken = ct.token; lit->parserToken = ct; + // NumberToken'a cast edip base/isFloat bilgisini al + if (auto* nt = dynamic_cast(ct.token)) { + lit->literalBase = nt->base; + lit->isFloatValue = nt->isFloat; + lit->literalType = nt->isFloat ? LiteralType::FLOAT : LiteralType::INTEGER; + } return lit; } @@ -789,6 +865,8 @@ inline ASTNode* Parser::parseNullDenotation() { if (ct.type == TokenType::STRING) { nextToken(); LiteralNode* lit = new LiteralNode(); + lit->literalType = LiteralType::STRING; + lit->loc = ct.token ? ct.token->loc : SourceLocation{}; lit->lexerToken = ct.token; lit->parserToken = ct; return lit; @@ -798,6 +876,12 @@ inline ASTNode* Parser::parseNullDenotation() { if (ct.is({TokenType::KW_TRUE, TokenType::KW_FALSE, TokenType::KW_NULL})) { nextToken(); LiteralNode* lit = new LiteralNode(); + // Token içeriğine göre boolean/null ayrımı + if (ct.is({TokenType::KW_TRUE, TokenType::KW_FALSE})) + lit->literalType = LiteralType::BOOLEAN; + else + lit->literalType = LiteralType::BOŞ; + lit->loc = ct.token ? ct.token->loc : SourceLocation{}; lit->lexerToken = ct.token; lit->parserToken = ct; return lit; @@ -807,6 +891,7 @@ inline ASTNode* Parser::parseNullDenotation() { if (ct.type == TokenType::IDENTIFIER) { nextToken(); IdentifierNode* id = new IdentifierNode(); + id->loc = ct.token ? ct.token->loc : SourceLocation{}; id->lexerToken = ct.token; id->parserToken = ct; return id; @@ -836,6 +921,7 @@ inline ASTNode* Parser::parseLeftDenotation(ASTNode* left) { if (ct.is({TokenType::PLUS_PLUS, TokenType::MINUS_MINUS})) { nextToken(); PostfixNode* pf = new PostfixNode(); + pf->loc = ct.token ? ct.token->loc : SourceLocation{}; pf->operand = left; pf->Operator = ct.type; left->parent = pf; @@ -846,6 +932,7 @@ inline ASTNode* Parser::parseLeftDenotation(ASTNode* left) { if (ct.type == TokenType::LPAREN) { nextToken(); CallExpressionNode* call = new CallExpressionNode(); + call->loc = ct.token ? ct.token->loc : SourceLocation{}; call->callee = left; left->parent = call; @@ -865,6 +952,7 @@ inline ASTNode* Parser::parseLeftDenotation(ASTNode* left) { if (ct.type == TokenType::LBRACKET) { nextToken(); IndexExpressionNode* idx = new IndexExpressionNode(); + idx->loc = ct.token ? ct.token->loc : SourceLocation{}; idx->object = left; left->parent = idx; idx->index = parseExpression(0); @@ -884,6 +972,7 @@ inline ASTNode* Parser::parseLeftDenotation(ASTNode* left) { } MemberAccessNode* ma = new MemberAccessNode(); + ma->loc = ct.token ? ct.token->loc : SourceLocation{}; ma->object = left; ma->member = currentToken().token->token; ma->arrow = arrow; @@ -899,6 +988,7 @@ inline ASTNode* Parser::parseLeftDenotation(ASTNode* left) { ASTNode* right = parseExpression(prec); BinaryExpressionNode* bin = new BinaryExpressionNode(); + bin->loc = ct.token ? ct.token->loc : SourceLocation{}; bin->Left = left; bin->Right = right; bin->Operator = ct.type; @@ -907,4 +997,4 @@ inline ASTNode* Parser::parseLeftDenotation(ASTNode* left) { return bin; } -#endif // SAQUT_PARSER +#endif // SAQUT_PARSER \ No newline at end of file diff --git a/src/tokenizer/token.hpp b/src/tokenizer/token.hpp index c99af99..1a3da61 100644 --- a/src/tokenizer/token.hpp +++ b/src/tokenizer/token.hpp @@ -17,6 +17,7 @@ #define SAQUT_TOKENIZER_TOKEN #include +#include "core/location.hpp" class Token { protected: @@ -24,6 +25,7 @@ protected: public: int start = 0; int end = 0; + SourceLocation loc; // Token'ın kaynak koddaki konumu std::string token; std::string gettype() { return type; } virtual ~Token() = default; diff --git a/src/tokenizer/tokenizer.hpp b/src/tokenizer/tokenizer.hpp index bd44607..07ddec8 100644 --- a/src/tokenizer/tokenizer.hpp +++ b/src/tokenizer/tokenizer.hpp @@ -207,7 +207,7 @@ private: // -------------------------------------------------------------------------- inline std::vector Tokenizer::scan(std::string input) { std::vector tokens; - hmx.setText(input); + hmx.setSourceText("", input); // Kaynak kodu hem Lexer'a yükle hem SourceFile'ı hazırla while (true) { Token* token = scope(); if (token->token == "EOL") break; // Dosya sonu sinyali @@ -262,6 +262,7 @@ inline Token* Tokenizer::scope() { if (hmx.isNumeric()) { INumber lem = hmx.readNumeric(); NumberToken* nt = new NumberToken(); + nt->loc = lem.startLoc; nt->base = lem.base; nt->start = lem.start; nt->end = lem.end; @@ -284,6 +285,7 @@ inline Token* Tokenizer::scope() { } KeywordToken* kt = new KeywordToken(); kt->start = hmx.getOffset(); + kt->loc = hmx.getLocation(); hmx.toChar(static_cast(kw.size())); kt->end = hmx.getOffset(); kt->token = kw; @@ -296,6 +298,7 @@ inline Token* Tokenizer::scope() { if (hmx.include(std::string(del), false)) { DelimiterToken* dt = new DelimiterToken(); dt->start = hmx.getOffset(); + dt->loc = hmx.getLocation(); hmx.toChar(static_cast(del.size())); dt->end = hmx.getOffset(); dt->token = del; @@ -308,6 +311,7 @@ inline Token* Tokenizer::scope() { if (hmx.include(std::string(op), false)) { OperatorToken* ot = new OperatorToken(); ot->start = hmx.getOffset(); + ot->loc = hmx.getLocation(); hmx.toChar(static_cast(op.size())); ot->end = hmx.getOffset(); ot->token = op; @@ -360,6 +364,7 @@ inline IdentifierToken* Tokenizer::readIdentifier() { it->end = hmx.getOffset(); it->size = static_cast(it->context.size()); + it->loc = hmx.sourceFile.offsetToLocation(it->start); hmx.acceptPosition(); // Başarılı okuma → konumu kalıcı yap return it; } @@ -425,6 +430,7 @@ inline StringToken* Tokenizer::readString() { st->end = hmx.getOffset(); st->size = static_cast(st->context.size()); + st->loc = hmx.sourceFile.offsetToLocation(st->start); hmx.acceptPosition(); return st; } @@ -457,4 +463,4 @@ inline void Tokenizer::skipMultiLineComment() { } } -#endif // SAQUT_TOKENIZER +#endif // SAQUT_TOKENIZER \ No newline at end of file