From b0ff0ef841c0e447baccc1224c426a21642bf71f Mon Sep 17 00:00:00 2001 From: abdussamedulutas Date: Sat, 10 Jan 2026 20:19:49 +0300 Subject: [PATCH] =?UTF-8?q?refactor:=20AST=20ve=20Token=20yap=C4=B1s=C4=B1?= =?UTF-8?q?nda=20pointer=20semanti=C4=9Fine=20ge=C3=A7i=C5=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Bellekte 'object slicing' (nesne dilimlenmesi) sorununu önlemek için Token ve ASTNode yapıları pointer (yıldız) kullanımına geçirildi. - Bellek sızıntılarını önlemek için temel sınıflara (Token, ASTNode) 'virtual destructor' eklendi. - AST düğümlerine 'parent pointer' bağlantısı eklenerek ağaç üzerinde yukarı yönlü navigasyon sağlandı. - IR (Ara Temsil) ve CodeGenerator katmanları projeye dahil edildi. - Tokenizer'da EOL (satır sonu) kontrolü ve dinamik bellek tahsisi (new) optimizasyonları yapıldı. --- Parsing.cpp | 7 +- core/IR/IR.cpp | 133 ++++++++++++++++++++++++++++++++++++ core/Parser/ASTNode.cpp | 19 ++++-- core/Parser/Parser.cpp | 45 ++++++------ core/Parser/ParserToken.cpp | 2 +- core/Tokenizer.cpp | 101 ++++++++++++++------------- 6 files changed, 229 insertions(+), 78 deletions(-) create mode 100644 core/IR/IR.cpp diff --git a/Parsing.cpp b/Parsing.cpp index 7944b8f..298fccd 100644 --- a/Parsing.cpp +++ b/Parsing.cpp @@ -5,6 +5,7 @@ #include #include "./core/Tokenizer.cpp" #include "./core/Parser/Parser.cpp" +#include "./core/IR/IR.cpp" int main() { @@ -20,10 +21,14 @@ int main() Tokenizer tokenizer; Parser parser; + CodeGenerator codeGenerator; auto tokens = tokenizer.scan(icerik); - parser.parse(tokens); + ASTNode * ast = parser.parse(tokens); + + codeGenerator.parse(ast); + codeGenerator.IROpDatas; return 0; } \ No newline at end of file diff --git a/core/IR/IR.cpp b/core/IR/IR.cpp new file mode 100644 index 0000000..e2d9729 --- /dev/null +++ b/core/IR/IR.cpp @@ -0,0 +1,133 @@ +#include "../Parser/ASTNode.cpp" +#include "../Tokenizer.cpp" +#include +#include + +#ifndef IR +#define IR + +enum class OPCode { + // İşlem + mathadd, + mathsub, + mathdiv, + mathmul, + // Tanımalama + declare +}; + +struct Param { + bool isRegister; + std::variant value; +}; + +struct IROpData { + OPCode op; + int targetReg; + Param arg1; + Param arg2; + Param arg3; +}; + +struct Identifier +{ + int last = 0; +}; + + +class CodeGenerator +{ + private: + void * processNumber(NumberToken * num, const std::string& rawStr) { + if (num->isFloat || num->hasEpsilon) { + return new float(std::strtof(rawStr.c_str(), nullptr)); + } + else { + return new int(std::strtol(rawStr.c_str(), nullptr, num->base)); + } + } + public: + CodeGenerator() + { + + } + Identifier identifier; + std::vector IROpDatas; + int parse(ASTNode * ast) + { + switch (ast->kind) + { + case ASTKind::BinaryExpression:{ + return this->parseBinaryExpression((BinaryExpressionNode *) ast); + } + case ASTKind::Literal:{ + return this->parseLiteral((LiteralNode *) ast); + } + default: return 0; + } + }; + int parseBinaryExpression(BinaryExpressionNode * binaryAST) + { + OPCode op; + switch (binaryAST->Operator) + { + case TokenType::STAR:{ + op = OPCode::mathmul; + break; + } + case TokenType::PLUS:{ + op = OPCode::mathadd; + break; + } + case TokenType::MINUS:{ + op = OPCode::mathsub; + break; + } + case TokenType::SLASH:{ + op = OPCode::mathdiv; + break; + } + } + int left = this->parse(binaryAST->Left); + int right = this->parse(binaryAST->Right); + + IROpDatas.push_back({ + op, + ++identifier.last, + {true, left}, + {true, right}, + {false, 0} + }); + return identifier.last; + } + int parseLiteral(LiteralNode * binaryAST) + { + LiteralNode literal = *binaryAST; + NumberToken * num = (NumberToken *) &literal.parserToken.token; + + + if(num->isFloat) + { + float * _value = (float *) this->processNumber(num, num->token); + IROpDatas.push_back({ + OPCode::declare, + ++identifier.last, + {false, *_value}, + {false, 0}, + {false, 0} + }); + }else{ + int * _value = (int *) this->processNumber(num, num->token); + IROpDatas.push_back({ + OPCode::declare, + ++identifier.last, + {false, *_value}, + {false, 0}, + {false, 0} + }); + } + return identifier.last; + } +}; + +#endif \ No newline at end of file diff --git a/core/Parser/ASTNode.cpp b/core/Parser/ASTNode.cpp index 58637d9..26a5c48 100644 --- a/core/Parser/ASTNode.cpp +++ b/core/Parser/ASTNode.cpp @@ -34,15 +34,18 @@ class ASTNode { this->parent = children; } + virtual ~ASTNode() = default; }; class BinaryExpressionNode : public ASTNode { - protected: - ASTKind Kind = ASTKind::BinaryExpression; public: + BinaryExpressionNode(){ + this->kind = ASTKind::BinaryExpression; + } void log(int indent) override { + auto korku = OPERATOR_MAP_STRREV.find(this->Operator); std::cout << padRight(" ",indent) << "BinaryExpressionNode " @@ -62,14 +65,16 @@ class BinaryExpressionNode : public ASTNode class LiteralNode : public ASTNode { protected: - ASTKind kind = ASTKind::Literal; void log(int indent) { - std::cout << padRight(" ",indent) << "LiteralNode {" << this->lexerToken.token << "}\n"; + std::cout << padRight(" ",indent) << "LiteralNode {" << this->lexerToken->token << "}\n"; } public: - Token lexerToken; + Token * lexerToken; ParserToken parserToken; + LiteralNode(){ + this->kind = ASTKind::Literal; + } }; class IdentifierNode : public ASTNode @@ -78,10 +83,10 @@ class IdentifierNode : public ASTNode ASTKind kind = ASTKind::Literal; void log(int indent) { - std::cout << padRight(" ",indent) << "IdentifierNode {" << this->lexerToken.token << "}\n"; + std::cout << padRight(" ",indent) << "IdentifierNode {" << this->lexerToken->token << "}\n"; } public: - Token lexerToken; + Token * lexerToken; ParserToken parserToken; }; class PostfixNode : public ASTNode diff --git a/core/Parser/Parser.cpp b/core/Parser/Parser.cpp index 15d859d..a09f652 100644 --- a/core/Parser/Parser.cpp +++ b/core/Parser/Parser.cpp @@ -20,45 +20,45 @@ class Parser { ASTNode astroot; public: TokenList tokens; - void parse(TokenList tokens); + ASTNode * parse(TokenList tokens); int current = 0; ParserToken currentToken(); void nextToken(); ParserToken lookehead(uint32_t); - ParserToken parseToken(Token); + ParserToken parseToken(Token *); ParserToken getToken(int); - void primaryExpression(); + ASTNode * primaryExpression(); ASTNode * volumeExpression(uint16_t precedence); ASTNode * volumeNullDominatorExpression(); ASTNode * volumeLeftDominatorExpression(ASTNode * left); }; -ParserToken Parser::parseToken(Token token){ +ParserToken Parser::parseToken(Token * token){ ParserToken pToken; - pToken.token = token; + pToken.token = *token; - if(token.gettype() == "string") + if(token->gettype() == "string") { pToken.type = TokenType::STRING; } - else if(token.gettype() == "number") + else if(token->gettype() == "number") { pToken.type = TokenType::NUMBER; } - else if(token.gettype() == "operator") + else if(token->gettype() == "operator") { - pToken.type = OPERATOR_MAP.find(token.token)->second; + pToken.type = OPERATOR_MAP.find(token->token)->second; } - else if(token.gettype() == "delimiter") + else if(token->gettype() == "delimiter") { - pToken.type = OPERATOR_MAP.find(token.token)->second; + pToken.type = OPERATOR_MAP.find(token->token)->second; } - else if(token.gettype() == "keyword") + else if(token->gettype() == "keyword") { - pToken.type = KEYWORD_MAP.find(token.token)->second; + pToken.type = KEYWORD_MAP.find(token->token)->second; } - else if(token.gettype() == "identifier") + else if(token->gettype() == "identifier") { pToken.type = TokenType::IDENTIFIER; } @@ -92,15 +92,16 @@ ParserToken Parser::currentToken(){ return this->getToken(0); } -void Parser::parse(TokenList tokens){ +ASTNode * Parser::parse(TokenList tokens){ this->tokens = tokens; - this->primaryExpression(); + return this->primaryExpression(); } -void Parser::primaryExpression() +ASTNode * Parser::primaryExpression() { auto currentToken = this->currentToken(); + ASTNode * tree = nullptr; if( currentToken.is({ TokenType::NUMBER, @@ -113,9 +114,9 @@ void Parser::primaryExpression() }) ) { - ASTNode * tree = this->volumeExpression(0); - tree->log(0); + tree = this->volumeExpression(0); } + return tree; } // Expresssionu tamamen okuyup bitiren kısım burası @@ -197,14 +198,14 @@ ASTNode * Parser::volumeNullDominatorExpression() if(currentToken.is(TokenType::NUMBER)) { LiteralNode * lNode = new LiteralNode(); - lNode->lexerToken = currentToken.token; + lNode->lexerToken = ¤tToken.token; lNode->parserToken = currentToken; return lNode; } if(currentToken.is(TokenType::IDENTIFIER)) { IdentifierNode * iNode = new IdentifierNode(); - iNode->lexerToken = currentToken.token; + iNode->lexerToken = ¤tToken.token; iNode->parserToken = currentToken; return iNode; } @@ -237,6 +238,8 @@ ASTNode * Parser::volumeLeftDominatorExpression(ASTNode * left) binNode->Right = right; binNode->Left = left; binNode->Operator = currentToken.type; + binNode->Right->parent = binNode; + binNode->Left->parent = binNode; return binNode; } diff --git a/core/Parser/ParserToken.cpp b/core/Parser/ParserToken.cpp index fa318b5..5b35369 100644 --- a/core/Parser/ParserToken.cpp +++ b/core/Parser/ParserToken.cpp @@ -7,7 +7,7 @@ #ifndef PARSER_TOKEN #define PARSER_TOKEN -typedef std::vector TokenList; +typedef std::vector TokenList; enum class TokenType : uint16_t { diff --git a/core/Tokenizer.cpp b/core/Tokenizer.cpp index 2a929f0..bbf3c07 100644 --- a/core/Tokenizer.cpp +++ b/core/Tokenizer.cpp @@ -17,6 +17,7 @@ class Token { std::string gettype(){ return this->type; } + virtual ~Token() = default; }; class StringToken : public Token { @@ -139,22 +140,26 @@ const constexpr std::string_view keywords[] = { class Tokenizer { public: Lexer hmx; - std::vector scan(std::string input); - Token scope(); - IdentifierToken readIndetifier(); - StringToken readString(); + std::vector scan(std::string input); + Token * scope(); + IdentifierToken * readIndetifier(); + StringToken * readString(); void skipOneLineComment(); void skipMultiLineComment(); }; -std::vector Tokenizer::scan(std::string input) +std::vector Tokenizer::scan(std::string input) { - std::vector tokens; + std::vector tokens; this->hmx.setText(input); while(1) { - Token token = this->scope(); + Token * token = this->scope(); + if(token->token == "EOL") + { + break; + } tokens.push_back(token); if(this->hmx.isEnd()) { @@ -163,7 +168,7 @@ std::vector Tokenizer::scan(std::string input) } return tokens; } -Token Tokenizer::scope() +Token * Tokenizer::scope() { this->hmx.skipWhiteSpace(); @@ -178,8 +183,8 @@ Token Tokenizer::scope() } if(this->hmx.isEnd()){ - Token token; - token.token = "EOL"; + Token * token = new Token(); + token->token = "EOL"; return token; }; @@ -193,24 +198,24 @@ Token Tokenizer::scope() if(this->hmx.isNumeric()) { INumber lem = this->hmx.readNumeric(); - NumberToken numberToken; - numberToken.base = lem.base; - numberToken.start = lem.start; - numberToken.end = lem.end; - numberToken.hasEpsilon = lem.hasEpsilon; - numberToken.isFloat = lem.isFloat; - numberToken.token = lem.token; + NumberToken * numberToken = new NumberToken(); + numberToken->base = lem.base; + numberToken->start = lem.start; + numberToken->end = lem.end; + numberToken->hasEpsilon = lem.hasEpsilon; + numberToken->isFloat = lem.isFloat; + numberToken->token = lem.token; return numberToken; } for (const std::string_view& keys : keywords) { if(this->hmx.include(std::string(keys),false)) { - KeywordToken keytoken; - keytoken.start = this->hmx.getOffset(); + KeywordToken * keytoken = new KeywordToken(); + keytoken->start = this->hmx.getOffset(); this->hmx.toChar(+keys.size()); - keytoken.end = this->hmx.getOffset(); - keytoken.token = keys; + keytoken->end = this->hmx.getOffset(); + keytoken->token = keys; return keytoken; } } @@ -218,11 +223,11 @@ Token Tokenizer::scope() for (const std::string_view& del : delimiters) { if(this->hmx.include(std::string(del),false)) { - DelimiterToken dtoken; - dtoken.start = this->hmx.getOffset(); + DelimiterToken * dtoken = new DelimiterToken();; + dtoken->start = this->hmx.getOffset(); this->hmx.toChar(+del.size()); - dtoken.end = this->hmx.getOffset(); - dtoken.token = del; + dtoken->end = this->hmx.getOffset(); + dtoken->token = del; return dtoken; } } @@ -230,22 +235,22 @@ Token Tokenizer::scope() for (const std::string_view& op : operators) { if(this->hmx.include(std::string(op),false)) { - OperatorToken optoken; - optoken.start = this->hmx.getOffset(); + OperatorToken* optoken = new OperatorToken(); + optoken->start = this->hmx.getOffset(); this->hmx.toChar(+op.size()); - optoken.end = this->hmx.getOffset(); - optoken.token = op; + optoken->end = this->hmx.getOffset(); + optoken->token = op; return optoken; } } return this->readIndetifier(); } -IdentifierToken Tokenizer::readIndetifier() +IdentifierToken * Tokenizer::readIndetifier() { this->hmx.beginPosition(); - IdentifierToken idenditifierToken; - idenditifierToken.start = this->hmx.getOffset(); + IdentifierToken * idenditifierToken = new IdentifierToken(); + idenditifierToken->start = this->hmx.getOffset(); while(this->hmx.isEnd() == false) { @@ -255,7 +260,7 @@ IdentifierToken Tokenizer::readIndetifier() if(c >= 'a' && c <= 'z') { readed = true; - idenditifierToken.token.push_back(c); + idenditifierToken->token.push_back(c); this->hmx.nextChar(); continue; } @@ -263,7 +268,7 @@ IdentifierToken Tokenizer::readIndetifier() if(c >= 'A' && c <= 'Z') { readed = true; - idenditifierToken.token.push_back(c); + idenditifierToken->token.push_back(c); this->hmx.nextChar(); continue; } @@ -272,7 +277,7 @@ IdentifierToken Tokenizer::readIndetifier() if(c >= '0' && c <= '9') { readed = true; - idenditifierToken.token.push_back(c); + idenditifierToken->token.push_back(c); this->hmx.nextChar(); continue; } @@ -281,13 +286,13 @@ IdentifierToken Tokenizer::readIndetifier() { case '_':{ readed = true; - idenditifierToken.token.push_back(c); + idenditifierToken->token.push_back(c); this->hmx.nextChar(); break; } case '$':{ readed = true; - idenditifierToken.token.push_back(c); + idenditifierToken->token.push_back(c); this->hmx.nextChar(); break; } @@ -297,23 +302,23 @@ IdentifierToken Tokenizer::readIndetifier() break; } } - idenditifierToken.end = this->hmx.getOffset(); - idenditifierToken.size = idenditifierToken.context.size(); + idenditifierToken->end = this->hmx.getOffset(); + idenditifierToken->size = idenditifierToken->context.size(); this->hmx.acceptPosition(); return idenditifierToken; } -StringToken Tokenizer::readString() +StringToken * Tokenizer::readString() { this->hmx.beginPosition(); - StringToken stringToken; + StringToken * stringToken = new StringToken(); bool started = false; bool isended = false; - stringToken.start = this->hmx.getOffset(); + stringToken->start = this->hmx.getOffset(); while(this->hmx.isEnd() == false) { char c = this->hmx.getchar(); - stringToken.token.push_back(c); + stringToken->token.push_back(c); switch(c) { case '"':{ @@ -329,12 +334,12 @@ StringToken Tokenizer::readString() case '\\':{ this->hmx.nextChar(); c = this->hmx.getchar(); - stringToken.token.push_back(c); - stringToken.context.push_back(c); + stringToken->token.push_back(c); + stringToken->context.push_back(c); break; } default:{ - stringToken.context.push_back(c); + stringToken->context.push_back(c); } } this->hmx.nextChar(); @@ -343,8 +348,8 @@ StringToken Tokenizer::readString() break; } } - stringToken.end = this->hmx.getOffset(); - stringToken.size = stringToken.context.size(); + stringToken->end = this->hmx.getOffset(); + stringToken->size = stringToken->context.size(); this->hmx.acceptPosition(); return stringToken; }