From c1467048579d6387e3561a4b5e6f8f9a11f19b88 Mon Sep 17 00:00:00 2001 From: abdussamedulutas Date: Sun, 28 Dec 2025 21:58:15 +0300 Subject: [PATCH] =?UTF-8?q?Pratt=20Parsing=20Algoritmas=C4=B1=20eklendi?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Parsing.cpp | 2 +- core/Parser/ASTNode.cpp | 55 ++++ core/Parser/Parser.cpp | 272 ++++++++++++++++++++ core/{Parser.cpp => Parser/ParserToken.cpp} | 175 ++++--------- core/Tokenizer.cpp | 2 - source.sqt | 3 +- 6 files changed, 377 insertions(+), 132 deletions(-) create mode 100644 core/Parser/ASTNode.cpp create mode 100644 core/Parser/Parser.cpp rename core/{Parser.cpp => Parser/ParserToken.cpp} (78%) diff --git a/Parsing.cpp b/Parsing.cpp index 32e6742..7944b8f 100644 --- a/Parsing.cpp +++ b/Parsing.cpp @@ -4,7 +4,7 @@ #include #include #include "./core/Tokenizer.cpp" -#include "./core/Parser.cpp" +#include "./core/Parser/Parser.cpp" int main() { diff --git a/core/Parser/ASTNode.cpp b/core/Parser/ASTNode.cpp new file mode 100644 index 0000000..af33058 --- /dev/null +++ b/core/Parser/ASTNode.cpp @@ -0,0 +1,55 @@ +#include +#include + +#include "../Tokenizer.cpp" +#include "./ParserToken.cpp" + +#ifndef AST +#define AST + + +enum class ASTKind +{ + BinaryExpression, + Literal +}; + +class ASTNode +{ + private: + std::vector childrens; + public: + ASTKind kind; + ASTNode * parent; + public: + void addChild(ASTNode * children) + { + this->childrens.push_back(children); + } + void setParent(ASTNode * children) + { + this->parent = children; + } +}; + +class BinaryExpressionNode : public ASTNode +{ + protected: + ASTKind Kind = ASTKind::BinaryExpression; + public: + ASTNode* Right; + TokenType Operator; + ASTNode* Left; +}; + + +class LiteralNode : public ASTNode +{ + protected: + ASTKind kind = ASTKind::Literal; + public: + Token lexerToken; + ParserToken parserToken; +}; + +#endif \ No newline at end of file diff --git a/core/Parser/Parser.cpp b/core/Parser/Parser.cpp new file mode 100644 index 0000000..24e456f --- /dev/null +++ b/core/Parser/Parser.cpp @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#include +#include "../Tokenizer.cpp" +#include "../Tools.cpp" +#include +#include +#include "./ParserToken.cpp" +#include "./ASTNode.cpp" + + +#ifndef PARSER +#define PARSER + + +class Parser { + private: + ASTNode astroot; + public: + TokenList tokens; + void parse(TokenList tokens); + int current = 0; + ParserToken currentToken(); + void nextToken(); + ParserToken lookehead(uint32_t); + ParserToken parseToken(Token); + ParserToken getToken(int); + void primaryExpression(); + ASTNode * volumeExpression(uint16_t precedence); + ASTNode * volumeNullDominatorExpression(); + ASTNode * volumeLeftDominatorExpression(ASTNode * left); +}; + + +ParserToken Parser::parseToken(Token token){ + ParserToken pToken; + pToken.token = token; + + if(token.gettype() == "string") + { + pToken.type = TokenType::STRING; + } + else if(token.gettype() == "number") + { + pToken.type = TokenType::NUMBER; + } + else if(token.gettype() == "operator") + { + pToken.type = OPERATOR_MAP.find(token.token)->second; + } + else if(token.gettype() == "delimiter") + { + pToken.type = OPERATOR_MAP.find(token.token)->second; + } + else if(token.gettype() == "keyword") + { + pToken.type = KEYWORD_MAP.find(token.token)->second; + } + else if(token.gettype() == "identifier") + { + pToken.type = KEYWORD_MAP.find(token.token)->second; + } + + return pToken; +} + + +ParserToken Parser::getToken(int offset){ + if(this->tokens.size() - 1 < this->current + offset) + { + ParserToken pToken; + pToken.type = TokenType::SVR_VOID; + return pToken; + } + return this->parseToken(this->tokens[this->current + offset]); +} + +void Parser::nextToken(){ + if(this->tokens.size() <= this->current + 1) + { + this->current++; + } +} + +ParserToken Parser::lookehead(uint32_t forward){ + return this->getToken(this->current + forward); +} + +ParserToken Parser::currentToken(){ + return this->getToken(this->current); +} + +void Parser::parse(TokenList tokens){ + this->tokens = tokens; + this->primaryExpression(); +} + +void Parser::primaryExpression() +{ + auto currentToken = this->currentToken(); + + if( + currentToken.is({ + TokenType::NUMBER, + TokenType::PLUS_PLUS, + TokenType::MINUS_MINUS, + TokenType::PLUS, + TokenType::MINUS, + TokenType::BANG, + TokenType::TILDE, + }) + ) + { + this->volumeExpression(0); + } +} + +ASTNode * Parser::volumeExpression(uint16_t precedence) +{ + if (this->currentToken().type == TokenType::SVR_VOID) + { + return nullptr; + } + + ASTNode* left = this->volumeNullDominatorExpression(); + + while(1) + { + auto nextToken = this->lookehead(+1); + if(precedence < nextToken.getPowerOperator()) + { + this->nextToken(); + left = this->volumeLeftDominatorExpression(left); + }else{ + break; + } + } + return left; +} + + + +ASTNode * Parser::volumeNullDominatorExpression() +{ + auto currentToken = this->currentToken(); + + if (currentToken.type == TokenType::SVR_VOID) { + // Hata: "Beklenmedik dosya sonu, bir değer bekleniyordu!" + return nullptr; + } + + + if(currentToken.is({ + TokenType::PLUS_PLUS, + TokenType::MINUS_MINUS, + TokenType::PLUS, + TokenType::MINUS, + TokenType::BANG, + TokenType::TILDE, + })) { + this->nextToken(); + ASTNode * right = this->volumeExpression(currentToken.getPowerOperator()); + BinaryExpressionNode * binNode = new BinaryExpressionNode(); + binNode->Right = right; + binNode->Left = nullptr; + binNode->Operator = currentToken.type; + return binNode; + }; + + if(currentToken.is({ + TokenType::NUMBER + })) { + LiteralNode * lNode = new LiteralNode(); + lNode->lexerToken = currentToken.token; + lNode->parserToken = currentToken; + return lNode; + } + + return nullptr; +} + + + +ASTNode * Parser::volumeLeftDominatorExpression(ASTNode * left) +{ + auto currentToken = this->currentToken(); + uint16_t precedence = currentToken.getPowerOperator(); + this->nextToken(); + auto right = this->volumeExpression(precedence); + + BinaryExpressionNode * binNode = new BinaryExpressionNode(); + binNode->Right = right; + binNode->Left = left; + binNode->Operator = currentToken.type; + return binNode; +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#endif \ No newline at end of file diff --git a/core/Parser.cpp b/core/Parser/ParserToken.cpp similarity index 78% rename from core/Parser.cpp rename to core/Parser/ParserToken.cpp index c669cbb..588af04 100644 --- a/core/Parser.cpp +++ b/core/Parser/ParserToken.cpp @@ -1,27 +1,21 @@ -#include -#include -#include +#include "../Tokenizer.cpp" +#include "../Tools.cpp" +#include #include -#include -#include "./Tokenizer.cpp" -#include "./Tools.cpp" +#include - -#ifndef PARSER -#define PARSER +#ifndef PARSER_TOKEN +#define PARSER_TOKEN typedef std::vector TokenList; -// token.h -#pragma once -#include -#include enum class TokenType : uint16_t { // --- Değerler ve Tanımlayıcılar --- IDENTIFIER, // değişken/fonksiyon isimleri NUMBER, // 42, 0xFF, 0b1010 STRING, // "merhaba" + SVR_VOID, // --- KEYWORD'ler (Alfabetik) --- KW_IF, // if @@ -310,77 +304,75 @@ uint16_t TokenPrecedence(TokenType type) { case TokenType::ARROW: case TokenType::LBRACKET: case TokenType::LPAREN: - return 170; + return 18; // Seviye 16: Postfix case TokenType::PLUS_PLUS: case TokenType::MINUS_MINUS: - return 160; + return 17; // Seviye 15: Unary/Prefix - case TokenType::PLUS: // unary + - case TokenType::MINUS: // unary - case TokenType::BANG: // ! case TokenType::TILDE: // ~ - return 150; + return 16; // Seviye 14: Üs alma case TokenType::STAR_STAR: // ** - case TokenType::CARET: // ^ (üs olarak) - return 140; + case TokenType::CARET: // ^ + return 15; // Seviye 13: Çarpma/bölme case TokenType::STAR: // * case TokenType::SLASH: // / case TokenType::PERCENT: // % - return 130; + return 14; - // Seviye 12: Toplama/çıkarma (binary) - // PLUS ve MINUS'un binary kullanımı - return 120; + case TokenType::PLUS: // + + case TokenType::MINUS: // - + return 13; // Seviye 11: Bitsel kaydırma case TokenType::LSHIFT: // << case TokenType::RSHIFT: // >> - return 110; + return 12; // Seviye 10: İlişkisel case TokenType::LESS: // < case TokenType::LESS_EQUAL:// <= case TokenType::GREATER: // > case TokenType::GREATER_EQUAL: // >= - return 100; + return 11; // Seviye 9: Eşitlik case TokenType::EQUAL_EQUAL: // == case TokenType::BANG_EQUAL: // != - return 90; + return 10; // Seviye 8: Bitsel VE case TokenType::AMPERSAND: // & - return 80; + return 9; // Seviye 7: Bitsel XOR // CARET burada binary XOR olarak - return 70; + return 8; // Seviye 6: Bitsel VEYA case TokenType::PIPE: // | - return 60; + return 7; // Seviye 5: Mantıksal VE case TokenType::AMPERSAND_AMPERSAND: // && - return 50; + return 6; // Seviye 4: Mantıksal VEYA case TokenType::PIPE_PIPE: // || - return 40; + return 5; // Seviye 3: Ternary (özel işlem) case TokenType::TERNARY: // ? - return 30; + return 4; case TokenType::COLON: // : (ternary için) - return 35; // özel değer + return 3; // özel değer // Seviye 2: Atama case TokenType::EQUAL: // = @@ -394,18 +386,18 @@ uint16_t TokenPrecedence(TokenType type) { case TokenType::CARET_EQUAL:// ^= case TokenType::LSHIFT_EQUAL:// <<= case TokenType::RSHIFT_EQUAL:// >>= - return 20; + return 2; // Seviye 1: Virgül case TokenType::COMMA: // , - return 10; + return 1; default: return 0; // Önceliksiz } } -bool Token_is_right_associative(TokenType type) +bool RightAssociative(TokenType type) { switch (type) { // Sağdan sola işleyen operatörler: @@ -424,7 +416,6 @@ bool Token_is_right_associative(TokenType type) case TokenType::RSHIFT_EQUAL:// >>= case TokenType::TERNARY: // ? (ternary) return true; - // Soldan sağa işleyenler: default: return false; @@ -435,95 +426,25 @@ struct ParserToken { Token token; TokenType type; + bool is(TokenType type){ + return this->type == type; + } + bool is(std::initializer_list types){ + for (TokenType t : types) { + if (this->type == t) { + return true; + } + } + return false; + } + uint16_t getPowerOperator() + { + return TokenPrecedence(this->type); + } + bool isRightAssociative() + { + return RightAssociative(this->type); + } }; -class Parser { - public: - TokenList tokens; - void parse(TokenList tokens); - int current = 0; - ParserToken currentToken(); - void nextToken(); - bool lookehead(TokenType,uint32_t); - ParserToken parseToken(Token); - ParserToken getToken(int); -}; - - -ParserToken Parser::parseToken(Token token){ - ParserToken pToken; - pToken.token = token; - - if(token.gettype() == "string") - { - pToken.type = TokenType::STRING; - } - else if(token.gettype() == "number") - { - pToken.type = TokenType::NUMBER; - } - else if(token.gettype() == "operator") - { - pToken.type = OPERATOR_MAP.find(token.token)->second; - } - else if(token.gettype() == "delimiter") - { - pToken.type = OPERATOR_MAP.find(token.token)->second; - } - else if(token.gettype() == "keyword") - { - pToken.type = KEYWORD_MAP.find(token.token)->second; - } - else if(token.gettype() == "identifier") - { - pToken.type = KEYWORD_MAP.find(token.token)->second; - } - - return pToken; -} - - -ParserToken Parser::getToken(int offset){ - return this->parseToken(this->tokens[this->current + offset]); -} - -void Parser::nextToken(){ - this->current++; -} - -bool Parser::lookehead(TokenType type, uint32_t forward){ - ParserToken token = this->getToken(forward); - return token.type == type; -} - -void Parser::parse(TokenList tokens){ - this->tokens = tokens; - for(Token token : tokens) - { - std::cout << padRight(token.token,20) << token.gettype() << "\n"; - } -} - - - -/* -1. () [] . -> // Gruplama, üye erişimi -2. ++ -- (postfix) // Sonra artır/azalt -3. ++ -- + - ! ~ (type) // Önce artır/azalt, unary -4. ** ^ // Üs alma -5. * / % // Çarpma/bölme -6. + - // Toplama/çıkarma -7. << >> // Bitsel kaydırma -8. < <= > >= // Karşılaştırma -9. == != // Eşitlik -10. & // Bitsel VE -11. ^ // Bitsel XOR -12. | // Bitsel VEYA -13. && // Mantıksal VE -14. || // Mantıksal VEYA -15. ?: // Ternary (koşul) -16. = += -= *= /= %= ... // Atama -17. , // Virgül -*/ - #endif \ No newline at end of file diff --git a/core/Tokenizer.cpp b/core/Tokenizer.cpp index 022313b..2a929f0 100644 --- a/core/Tokenizer.cpp +++ b/core/Tokenizer.cpp @@ -350,7 +350,6 @@ StringToken Tokenizer::readString() } void Tokenizer::skipOneLineComment() { - std::cout << "SkipLineComment\n"; while(this->hmx.isEnd() == false) { if(this->hmx.getchar() == '\n') @@ -365,7 +364,6 @@ void Tokenizer::skipOneLineComment() } void Tokenizer::skipMultiLineComment() { - std::cout << "SkipBlockComment\n"; while(this->hmx.isEnd() == false) { if(this->hmx.include("*/",true)) diff --git a/source.sqt b/source.sqt index 5a4ff33..9c0719e 100644 --- a/source.sqt +++ b/source.sqt @@ -1,2 +1 @@ -// Basit -25 + 53 + 75 \ No newline at end of file +75.37 \ No newline at end of file