Pratt Parsing Algoritması eklendi

2025-12-28 21:58:15 +03:00 · 2025-12-28 21:58:15 +03:00 · c146704857
parent ea25e07e3f
commit c146704857
6 changed files with 377 additions and 132 deletions
--- a/Parsing.cpp
+++ b/Parsing.cpp
@ -4,7 +4,7 @@
 #include <string>
 #include <stdlib.h>
 #include "./core/Tokenizer.cpp"
-#include "./core/Parser.cpp"
+#include "./core/Parser/Parser.cpp"

 int main()
 {
--- a/core/Parser/ASTNode.cpp
+++ b/core/Parser/ASTNode.cpp
@ -0,0 +1,55 @@
+#include <unordered_map>
+#include <string_view>
+
+#include "../Tokenizer.cpp"
+#include "./ParserToken.cpp"
+
+#ifndef AST
+#define AST
+
+
+enum class ASTKind 
+{
+    BinaryExpression,
+    Literal
+};
+
+class ASTNode
+{
+    private:
+        std::vector<ASTNode *> childrens;
+    public:
+        ASTKind kind;
+        ASTNode * parent;
+    public:
+        void addChild(ASTNode * children)
+        {
+            this->childrens.push_back(children);
+        }
+        void setParent(ASTNode * children)
+        {
+            this->parent = children;
+        }
+};
+
+class BinaryExpressionNode : public ASTNode
+{
+    protected:
+        ASTKind Kind = ASTKind::BinaryExpression;
+    public:
+        ASTNode* Right;
+        TokenType Operator;
+        ASTNode* Left;
+};
+
+
+class LiteralNode : public ASTNode
+{
+    protected:
+        ASTKind kind = ASTKind::Literal;
+    public:
+        Token lexerToken;
+        ParserToken parserToken;
+};
+
+#endif
--- a/core/Parser/Parser.cpp
+++ b/core/Parser/Parser.cpp
@ -0,0 +1,272 @@
+#include <iostream>
+#include <string>
+#include <stdlib.h>
+#include <unordered_map>
+#include <string_view>
+#include "../Tokenizer.cpp"
+#include "../Tools.cpp"
+#include <cstdint>
+#include <string_view>
+#include "./ParserToken.cpp"
+#include "./ASTNode.cpp"
+
+
+#ifndef PARSER
+#define PARSER
+
+
+class Parser {
+    private:
+        ASTNode astroot;
+    public:
+        TokenList tokens;
+        void parse(TokenList tokens);
+        int current = 0;
+        ParserToken currentToken();
+        void nextToken();
+        ParserToken lookehead(uint32_t);
+        ParserToken parseToken(Token);
+        ParserToken getToken(int);
+        void primaryExpression();
+        ASTNode * volumeExpression(uint16_t precedence);
+        ASTNode * volumeNullDominatorExpression();
+        ASTNode * volumeLeftDominatorExpression(ASTNode * left);
+};
+
+
+ParserToken Parser::parseToken(Token token){
+    ParserToken pToken;
+    pToken.token = token;
+    
+    if(token.gettype() == "string")
+    {
+        pToken.type = TokenType::STRING;
+    }
+    else if(token.gettype() == "number")
+    {
+        pToken.type = TokenType::NUMBER;
+    }
+    else if(token.gettype() == "operator")
+    {
+        pToken.type = OPERATOR_MAP.find(token.token)->second;
+    }
+    else if(token.gettype() == "delimiter")
+    {
+        pToken.type = OPERATOR_MAP.find(token.token)->second;
+    }
+    else if(token.gettype() == "keyword")
+    {
+        pToken.type = KEYWORD_MAP.find(token.token)->second;
+    }
+    else if(token.gettype() == "identifier")
+    {
+        pToken.type = KEYWORD_MAP.find(token.token)->second;
+    }
+
+    return pToken;
+}
+
+
+ParserToken Parser::getToken(int offset){
+    if(this->tokens.size() - 1 < this->current + offset)
+    {
+        ParserToken pToken;
+        pToken.type = TokenType::SVR_VOID;
+        return pToken;
+    }
+    return this->parseToken(this->tokens[this->current + offset]);
+}
+
+void Parser::nextToken(){
+    if(this->tokens.size() <= this->current + 1)
+    {
+        this->current++;
+    }
+}
+
+ParserToken Parser::lookehead(uint32_t forward){
+    return this->getToken(this->current + forward);
+}
+
+ParserToken Parser::currentToken(){
+    return this->getToken(this->current);
+}
+
+void Parser::parse(TokenList tokens){
+    this->tokens = tokens;
+    this->primaryExpression();
+}
+
+void Parser::primaryExpression()
+{
+    auto currentToken = this->currentToken();
+
+    if(
+        currentToken.is({
+            TokenType::NUMBER,
+            TokenType::PLUS_PLUS,
+            TokenType::MINUS_MINUS,
+            TokenType::PLUS,
+            TokenType::MINUS,
+            TokenType::BANG,
+            TokenType::TILDE,
+        })
+    )
+    {
+        this->volumeExpression(0);
+    }
+}
+
+ASTNode * Parser::volumeExpression(uint16_t precedence)
+{
+    if (this->currentToken().type == TokenType::SVR_VOID)
+    {
+        return nullptr;
+    }
+
+    ASTNode* left = this->volumeNullDominatorExpression();
+
+    while(1)
+    {
+        auto nextToken = this->lookehead(+1);
+        if(precedence < nextToken.getPowerOperator())
+        {
+            this->nextToken();
+            left = this->volumeLeftDominatorExpression(left);
+        }else{
+            break;  
+        }
+    }
+    return left;
+}
+
+
+
+ASTNode * Parser::volumeNullDominatorExpression()
+{
+    auto currentToken = this->currentToken();
+    
+    if (currentToken.type == TokenType::SVR_VOID) {
+        // Hata: "Beklenmedik dosya sonu, bir değer bekleniyordu!"
+        return nullptr; 
+    }
+    
+
+    if(currentToken.is({
+        TokenType::PLUS_PLUS,
+        TokenType::MINUS_MINUS,
+        TokenType::PLUS,
+        TokenType::MINUS,
+        TokenType::BANG,
+        TokenType::TILDE,
+    })) {
+        this->nextToken();
+        ASTNode * right = this->volumeExpression(currentToken.getPowerOperator());
+        BinaryExpressionNode * binNode = new BinaryExpressionNode();
+        binNode->Right = right;
+        binNode->Left = nullptr;
+        binNode->Operator = currentToken.type;
+        return binNode;
+    };
+
+    if(currentToken.is({
+        TokenType::NUMBER
+    })) {
+        LiteralNode * lNode = new LiteralNode();
+        lNode->lexerToken = currentToken.token;
+        lNode->parserToken = currentToken;
+        return lNode;
+    }
+
+    return nullptr;
+}
+
+
+
+ASTNode * Parser::volumeLeftDominatorExpression(ASTNode * left)
+{
+    auto currentToken = this->currentToken();
+    uint16_t precedence = currentToken.getPowerOperator();
+    this->nextToken();
+    auto right = this->volumeExpression(precedence);
+
+    BinaryExpressionNode * binNode = new BinaryExpressionNode();
+    binNode->Right = right;
+    binNode->Left = left;
+    binNode->Operator = currentToken.type;
+    return binNode;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#endif
--- a/core/Parser/ParserToken.cpp
+++ b/core/Parser/ParserToken.cpp
@ -1,27 +1,21 @@
-#include <iostream>
-#include <string>
-#include <stdlib.h>
+#include "../Tokenizer.cpp"
+#include "../Tools.cpp"
+#include <cstdint>
 #include <unordered_map>
-#include <string_view>
-#include "./Tokenizer.cpp"
-#include "./Tools.cpp"
+#include <initializer_list>

-
-#ifndef PARSER
-#define PARSER
+#ifndef PARSER_TOKEN
+#define PARSER_TOKEN

 typedef std::vector<Token> TokenList;

-// token.h
-#pragma once
-#include <cstdint>
-#include <string_view>
 enum class TokenType : uint16_t
 {
    // --- Değerler ve Tanımlayıcılar ---
    IDENTIFIER,     // değişken/fonksiyon isimleri
    NUMBER,        // 42, 0xFF, 0b1010
    STRING,         // "merhaba"
+    SVR_VOID,
    
    // --- KEYWORD'ler (Alfabetik) ---
    KW_IF,          // if
@ -310,77 +304,75 @@ uint16_t TokenPrecedence(TokenType type) {
        case TokenType::ARROW:
        case TokenType::LBRACKET:
        case TokenType::LPAREN:
-            return 170;
+            return 18;
            
        // Seviye 16: Postfix
        case TokenType::PLUS_PLUS:
        case TokenType::MINUS_MINUS:
-            return 160;
+            return 17;
            
        // Seviye 15: Unary/Prefix
-        case TokenType::PLUS:      // unary +
-        case TokenType::MINUS:     // unary -
        case TokenType::BANG:      // !
        case TokenType::TILDE:     // ~
-            return 150;
+            return 16;
            
        // Seviye 14: Üs alma
        case TokenType::STAR_STAR: // **
-        case TokenType::CARET:     // ^ (üs olarak)
-            return 140;
+        case TokenType::CARET:     // ^
+            return 15;
            
        // Seviye 13: Çarpma/bölme
        case TokenType::STAR:      // *
        case TokenType::SLASH:     // /
        case TokenType::PERCENT:   // %
-            return 130;
+            return 14;
            
-        // Seviye 12: Toplama/çıkarma (binary)
-        // PLUS ve MINUS'un binary kullanımı
-            return 120;
+        case TokenType::PLUS:      // +
+        case TokenType::MINUS:     // -
+            return 13;
            
        // Seviye 11: Bitsel kaydırma
        case TokenType::LSHIFT:    // <<
        case TokenType::RSHIFT:    // >>
-            return 110;
+            return 12;
            
        // Seviye 10: İlişkisel
        case TokenType::LESS:      // <
        case TokenType::LESS_EQUAL:// <=
        case TokenType::GREATER:   // >
        case TokenType::GREATER_EQUAL: // >=
-            return 100;
+            return 11;
            
        // Seviye 9: Eşitlik
        case TokenType::EQUAL_EQUAL:   // ==
        case TokenType::BANG_EQUAL:    // !=
-            return 90;
+            return 10;
            
        // Seviye 8: Bitsel VE
        case TokenType::AMPERSAND: // &
-            return 80;
+            return 9;
            
        // Seviye 7: Bitsel XOR
        // CARET burada binary XOR olarak
-            return 70;
+            return 8;
            
        // Seviye 6: Bitsel VEYA
        case TokenType::PIPE:      // |
-            return 60;
+            return 7;
            
        // Seviye 5: Mantıksal VE
        case TokenType::AMPERSAND_AMPERSAND: // &&
-            return 50;
+            return 6;
            
        // Seviye 4: Mantıksal VEYA
        case TokenType::PIPE_PIPE: // ||
-            return 40;
+            return 5;
            
        // Seviye 3: Ternary (özel işlem)
        case TokenType::TERNARY:  // ?
-            return 30;
+            return 4;
        case TokenType::COLON:     // : (ternary için)
-            return 35;             // özel değer
+            return 3;             // özel değer
            
        // Seviye 2: Atama
        case TokenType::EQUAL:     // =
@ -394,18 +386,18 @@ uint16_t TokenPrecedence(TokenType type) {
        case TokenType::CARET_EQUAL:// ^=
        case TokenType::LSHIFT_EQUAL:// <<=
        case TokenType::RSHIFT_EQUAL:// >>=
-            return 20;
+            return 2;
            
        // Seviye 1: Virgül
        case TokenType::COMMA:     // ,
-            return 10;
+            return 1;
            
        default:
            return 0;  // Önceliksiz
    }
 }

-bool Token_is_right_associative(TokenType type)
+bool RightAssociative(TokenType type)
 {
    switch (type) {
        // Sağdan sola işleyen operatörler:
@ -424,7 +416,6 @@ bool Token_is_right_associative(TokenType type)
        case TokenType::RSHIFT_EQUAL:// >>=
        case TokenType::TERNARY:   // ? (ternary)
            return true;
-            
        // Soldan sağa işleyenler:
        default:
            return false;
@ -435,95 +426,25 @@ struct ParserToken
 {
    Token token;
    TokenType type;
+    bool is(TokenType type){
+        return this->type == type;
+    }
+    bool is(std::initializer_list<TokenType> types){
+        for (TokenType t : types) {
+            if (this->type == t) {
+                return true;
+            }
+        }
+        return false;
+    }
+    uint16_t getPowerOperator()
+    {
+        return TokenPrecedence(this->type);
+    }
+    bool isRightAssociative()
+    {
+        return RightAssociative(this->type);
+    }
 };

-class Parser {
-    public:
-        TokenList tokens;
-        void parse(TokenList tokens);
-        int current = 0;
-        ParserToken currentToken();
-        void nextToken();
-        bool lookehead(TokenType,uint32_t);
-        ParserToken parseToken(Token);
-        ParserToken getToken(int);
-};
-
-
-ParserToken Parser::parseToken(Token token){
-    ParserToken pToken;
-    pToken.token = token;
-    
-    if(token.gettype() == "string")
-    {
-        pToken.type = TokenType::STRING;
-    }
-    else if(token.gettype() == "number")
-    {
-        pToken.type = TokenType::NUMBER;
-    }
-    else if(token.gettype() == "operator")
-    {
-        pToken.type = OPERATOR_MAP.find(token.token)->second;
-    }
-    else if(token.gettype() == "delimiter")
-    {
-        pToken.type = OPERATOR_MAP.find(token.token)->second;
-    }
-    else if(token.gettype() == "keyword")
-    {
-        pToken.type = KEYWORD_MAP.find(token.token)->second;
-    }
-    else if(token.gettype() == "identifier")
-    {
-        pToken.type = KEYWORD_MAP.find(token.token)->second;
-    }
-
-    return pToken;
-}
-
-
-ParserToken Parser::getToken(int offset){
-    return this->parseToken(this->tokens[this->current + offset]);
-}
-
-void Parser::nextToken(){
-    this->current++;
-}
-
-bool Parser::lookehead(TokenType type, uint32_t forward){
-    ParserToken token = this->getToken(forward);
-    return token.type == type;
-}
-
-void Parser::parse(TokenList tokens){
-    this->tokens = tokens;
-    for(Token token : tokens)
-    {
-        std::cout << padRight(token.token,20) << token.gettype() << "\n";
-    }
-}
-
-
-
-/*
-1.  () [] . ->           // Gruplama, üye erişimi
-2.  ++ -- (postfix)      // Sonra artır/azalt
-3.  ++ -- + - ! ~ (type) // Önce artır/azalt, unary
-4.  ** ^                  // Üs alma
-5.  * / %                // Çarpma/bölme
-6.  + -                  // Toplama/çıkarma  
-7.  << >>                // Bitsel kaydırma
-8.  < <= > >=            // Karşılaştırma
-9.  == !=                // Eşitlik
-10. &                    // Bitsel VE
-11. ^                    // Bitsel XOR
-12. |                    // Bitsel VEYA
-13. &&                   // Mantıksal VE
-14. ||                   // Mantıksal VEYA
-15. ?:                   // Ternary (koşul)
-16. = += -= *= /= %= ... // Atama
-17. ,                    // Virgül
-*/
-
 #endif
--- a/core/Tokenizer.cpp
+++ b/core/Tokenizer.cpp
@ -350,7 +350,6 @@ StringToken Tokenizer::readString()
 }
 void Tokenizer::skipOneLineComment()
 {
-    std::cout << "SkipLineComment\n";
    while(this->hmx.isEnd() == false)
    {
        if(this->hmx.getchar() == '\n')
@ -365,7 +364,6 @@ void Tokenizer::skipOneLineComment()
 }
 void Tokenizer::skipMultiLineComment()
 {
-    std::cout << "SkipBlockComment\n";
    while(this->hmx.isEnd() == false)
    {
        if(this->hmx.include("*/",true))
--- a/source.sqt
+++ b/source.sqt
@ -1,2 +1 @@
-// Basit
-25 + 53 + 75
+75.37