refactor: AST ve Token yapısında pointer semantiğine geçiş

- Bellekte 'object slicing' (nesne dilimlenmesi) sorununu önlemek için Token ve ASTNode yapıları pointer (yıldız) kullanımına geçirildi.
- Bellek sızıntılarını önlemek için temel sınıflara (Token, ASTNode) 'virtual destructor' eklendi.
- AST düğümlerine 'parent pointer' bağlantısı eklenerek ağaç üzerinde yukarı yönlü navigasyon sağlandı.
- IR (Ara Temsil) ve CodeGenerator katmanları projeye dahil edildi.
- Tokenizer'da EOL (satır sonu) kontrolü ve dinamik bellek tahsisi (new) optimizasyonları yapıldı.
This commit is contained in:
abdussamedulutas 2026-01-10 20:19:49 +03:00
parent 817d36bc4a
commit b0ff0ef841
6 changed files with 229 additions and 78 deletions

View File

@ -5,6 +5,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "./core/Tokenizer.cpp" #include "./core/Tokenizer.cpp"
#include "./core/Parser/Parser.cpp" #include "./core/Parser/Parser.cpp"
#include "./core/IR/IR.cpp"
int main() int main()
{ {
@ -20,10 +21,14 @@ int main()
Tokenizer tokenizer; Tokenizer tokenizer;
Parser parser; Parser parser;
CodeGenerator codeGenerator;
auto tokens = tokenizer.scan(icerik); auto tokens = tokenizer.scan(icerik);
parser.parse(tokens); ASTNode * ast = parser.parse(tokens);
codeGenerator.parse(ast);
codeGenerator.IROpDatas;
return 0; return 0;
} }

133
core/IR/IR.cpp Normal file
View File

@ -0,0 +1,133 @@
#include "../Parser/ASTNode.cpp"
#include "../Tokenizer.cpp"
#include <vector>
#include <variant>
#ifndef IR
#define IR
enum class OPCode {
// İşlem
mathadd,
mathsub,
mathdiv,
mathmul,
// Tanımalama
declare
};
struct Param {
bool isRegister;
std::variant<int,float> value;
};
struct IROpData {
OPCode op;
int targetReg;
Param arg1;
Param arg2;
Param arg3;
};
struct Identifier
{
int last = 0;
};
class CodeGenerator
{
private:
void * processNumber(NumberToken * num, const std::string& rawStr) {
if (num->isFloat || num->hasEpsilon) {
return new float(std::strtof(rawStr.c_str(), nullptr));
}
else {
return new int(std::strtol(rawStr.c_str(), nullptr, num->base));
}
}
public:
CodeGenerator()
{
}
Identifier identifier;
std::vector<IROpData> IROpDatas;
int parse(ASTNode * ast)
{
switch (ast->kind)
{
case ASTKind::BinaryExpression:{
return this->parseBinaryExpression((BinaryExpressionNode *) ast);
}
case ASTKind::Literal:{
return this->parseLiteral((LiteralNode *) ast);
}
default: return 0;
}
};
int parseBinaryExpression(BinaryExpressionNode * binaryAST)
{
OPCode op;
switch (binaryAST->Operator)
{
case TokenType::STAR:{
op = OPCode::mathmul;
break;
}
case TokenType::PLUS:{
op = OPCode::mathadd;
break;
}
case TokenType::MINUS:{
op = OPCode::mathsub;
break;
}
case TokenType::SLASH:{
op = OPCode::mathdiv;
break;
}
}
int left = this->parse(binaryAST->Left);
int right = this->parse(binaryAST->Right);
IROpDatas.push_back({
op,
++identifier.last,
{true, left},
{true, right},
{false, 0}
});
return identifier.last;
}
int parseLiteral(LiteralNode * binaryAST)
{
LiteralNode literal = *binaryAST;
NumberToken * num = (NumberToken *) &literal.parserToken.token;
if(num->isFloat)
{
float * _value = (float *) this->processNumber(num, num->token);
IROpDatas.push_back({
OPCode::declare,
++identifier.last,
{false, *_value},
{false, 0},
{false, 0}
});
}else{
int * _value = (int *) this->processNumber(num, num->token);
IROpDatas.push_back({
OPCode::declare,
++identifier.last,
{false, *_value},
{false, 0},
{false, 0}
});
}
return identifier.last;
}
};
#endif

View File

@ -34,15 +34,18 @@ class ASTNode
{ {
this->parent = children; this->parent = children;
} }
virtual ~ASTNode() = default;
}; };
class BinaryExpressionNode : public ASTNode class BinaryExpressionNode : public ASTNode
{ {
protected:
ASTKind Kind = ASTKind::BinaryExpression;
public: public:
BinaryExpressionNode(){
this->kind = ASTKind::BinaryExpression;
}
void log(int indent) override void log(int indent) override
{ {
auto korku = OPERATOR_MAP_STRREV.find(this->Operator);
std::cout std::cout
<< padRight(" ",indent) << padRight(" ",indent)
<< "BinaryExpressionNode " << "BinaryExpressionNode "
@ -62,14 +65,16 @@ class BinaryExpressionNode : public ASTNode
class LiteralNode : public ASTNode class LiteralNode : public ASTNode
{ {
protected: protected:
ASTKind kind = ASTKind::Literal;
void log(int indent) void log(int indent)
{ {
std::cout << padRight(" ",indent) << "LiteralNode {" << this->lexerToken.token << "}\n"; std::cout << padRight(" ",indent) << "LiteralNode {" << this->lexerToken->token << "}\n";
} }
public: public:
Token lexerToken; Token * lexerToken;
ParserToken parserToken; ParserToken parserToken;
LiteralNode(){
this->kind = ASTKind::Literal;
}
}; };
class IdentifierNode : public ASTNode class IdentifierNode : public ASTNode
@ -78,10 +83,10 @@ class IdentifierNode : public ASTNode
ASTKind kind = ASTKind::Literal; ASTKind kind = ASTKind::Literal;
void log(int indent) void log(int indent)
{ {
std::cout << padRight(" ",indent) << "IdentifierNode {" << this->lexerToken.token << "}\n"; std::cout << padRight(" ",indent) << "IdentifierNode {" << this->lexerToken->token << "}\n";
} }
public: public:
Token lexerToken; Token * lexerToken;
ParserToken parserToken; ParserToken parserToken;
}; };
class PostfixNode : public ASTNode class PostfixNode : public ASTNode

View File

@ -20,45 +20,45 @@ class Parser {
ASTNode astroot; ASTNode astroot;
public: public:
TokenList tokens; TokenList tokens;
void parse(TokenList tokens); ASTNode * parse(TokenList tokens);
int current = 0; int current = 0;
ParserToken currentToken(); ParserToken currentToken();
void nextToken(); void nextToken();
ParserToken lookehead(uint32_t); ParserToken lookehead(uint32_t);
ParserToken parseToken(Token); ParserToken parseToken(Token *);
ParserToken getToken(int); ParserToken getToken(int);
void primaryExpression(); ASTNode * primaryExpression();
ASTNode * volumeExpression(uint16_t precedence); ASTNode * volumeExpression(uint16_t precedence);
ASTNode * volumeNullDominatorExpression(); ASTNode * volumeNullDominatorExpression();
ASTNode * volumeLeftDominatorExpression(ASTNode * left); ASTNode * volumeLeftDominatorExpression(ASTNode * left);
}; };
ParserToken Parser::parseToken(Token token){ ParserToken Parser::parseToken(Token * token){
ParserToken pToken; ParserToken pToken;
pToken.token = token; pToken.token = *token;
if(token.gettype() == "string") if(token->gettype() == "string")
{ {
pToken.type = TokenType::STRING; pToken.type = TokenType::STRING;
} }
else if(token.gettype() == "number") else if(token->gettype() == "number")
{ {
pToken.type = TokenType::NUMBER; pToken.type = TokenType::NUMBER;
} }
else if(token.gettype() == "operator") else if(token->gettype() == "operator")
{ {
pToken.type = OPERATOR_MAP.find(token.token)->second; pToken.type = OPERATOR_MAP.find(token->token)->second;
} }
else if(token.gettype() == "delimiter") else if(token->gettype() == "delimiter")
{ {
pToken.type = OPERATOR_MAP.find(token.token)->second; pToken.type = OPERATOR_MAP.find(token->token)->second;
} }
else if(token.gettype() == "keyword") else if(token->gettype() == "keyword")
{ {
pToken.type = KEYWORD_MAP.find(token.token)->second; pToken.type = KEYWORD_MAP.find(token->token)->second;
} }
else if(token.gettype() == "identifier") else if(token->gettype() == "identifier")
{ {
pToken.type = TokenType::IDENTIFIER; pToken.type = TokenType::IDENTIFIER;
} }
@ -92,15 +92,16 @@ ParserToken Parser::currentToken(){
return this->getToken(0); return this->getToken(0);
} }
void Parser::parse(TokenList tokens){ ASTNode * Parser::parse(TokenList tokens){
this->tokens = tokens; this->tokens = tokens;
this->primaryExpression(); return this->primaryExpression();
} }
void Parser::primaryExpression() ASTNode * Parser::primaryExpression()
{ {
auto currentToken = this->currentToken(); auto currentToken = this->currentToken();
ASTNode * tree = nullptr;
if( if(
currentToken.is({ currentToken.is({
TokenType::NUMBER, TokenType::NUMBER,
@ -113,9 +114,9 @@ void Parser::primaryExpression()
}) })
) )
{ {
ASTNode * tree = this->volumeExpression(0); tree = this->volumeExpression(0);
tree->log(0);
} }
return tree;
} }
// Expresssionu tamamen okuyup bitiren kısım burası // Expresssionu tamamen okuyup bitiren kısım burası
@ -197,14 +198,14 @@ ASTNode * Parser::volumeNullDominatorExpression()
if(currentToken.is(TokenType::NUMBER)) { if(currentToken.is(TokenType::NUMBER)) {
LiteralNode * lNode = new LiteralNode(); LiteralNode * lNode = new LiteralNode();
lNode->lexerToken = currentToken.token; lNode->lexerToken = &currentToken.token;
lNode->parserToken = currentToken; lNode->parserToken = currentToken;
return lNode; return lNode;
} }
if(currentToken.is(TokenType::IDENTIFIER)) { if(currentToken.is(TokenType::IDENTIFIER)) {
IdentifierNode * iNode = new IdentifierNode(); IdentifierNode * iNode = new IdentifierNode();
iNode->lexerToken = currentToken.token; iNode->lexerToken = &currentToken.token;
iNode->parserToken = currentToken; iNode->parserToken = currentToken;
return iNode; return iNode;
} }
@ -237,6 +238,8 @@ ASTNode * Parser::volumeLeftDominatorExpression(ASTNode * left)
binNode->Right = right; binNode->Right = right;
binNode->Left = left; binNode->Left = left;
binNode->Operator = currentToken.type; binNode->Operator = currentToken.type;
binNode->Right->parent = binNode;
binNode->Left->parent = binNode;
return binNode; return binNode;
} }

View File

@ -7,7 +7,7 @@
#ifndef PARSER_TOKEN #ifndef PARSER_TOKEN
#define PARSER_TOKEN #define PARSER_TOKEN
typedef std::vector<Token> TokenList; typedef std::vector<Token *> TokenList;
enum class TokenType : uint16_t enum class TokenType : uint16_t
{ {

View File

@ -17,6 +17,7 @@ class Token {
std::string gettype(){ std::string gettype(){
return this->type; return this->type;
} }
virtual ~Token() = default;
}; };
class StringToken : public Token { class StringToken : public Token {
@ -139,22 +140,26 @@ const constexpr std::string_view keywords[] = {
class Tokenizer { class Tokenizer {
public: public:
Lexer hmx; Lexer hmx;
std::vector<Token> scan(std::string input); std::vector<Token *> scan(std::string input);
Token scope(); Token * scope();
IdentifierToken readIndetifier(); IdentifierToken * readIndetifier();
StringToken readString(); StringToken * readString();
void skipOneLineComment(); void skipOneLineComment();
void skipMultiLineComment(); void skipMultiLineComment();
}; };
std::vector<Token> Tokenizer::scan(std::string input) std::vector<Token *> Tokenizer::scan(std::string input)
{ {
std::vector<Token> tokens; std::vector<Token *> tokens;
this->hmx.setText(input); this->hmx.setText(input);
while(1) while(1)
{ {
Token token = this->scope(); Token * token = this->scope();
if(token->token == "EOL")
{
break;
}
tokens.push_back(token); tokens.push_back(token);
if(this->hmx.isEnd()) if(this->hmx.isEnd())
{ {
@ -163,7 +168,7 @@ std::vector<Token> Tokenizer::scan(std::string input)
} }
return tokens; return tokens;
} }
Token Tokenizer::scope() Token * Tokenizer::scope()
{ {
this->hmx.skipWhiteSpace(); this->hmx.skipWhiteSpace();
@ -178,8 +183,8 @@ Token Tokenizer::scope()
} }
if(this->hmx.isEnd()){ if(this->hmx.isEnd()){
Token token; Token * token = new Token();
token.token = "EOL"; token->token = "EOL";
return token; return token;
}; };
@ -193,24 +198,24 @@ Token Tokenizer::scope()
if(this->hmx.isNumeric()) if(this->hmx.isNumeric())
{ {
INumber lem = this->hmx.readNumeric(); INumber lem = this->hmx.readNumeric();
NumberToken numberToken; NumberToken * numberToken = new NumberToken();
numberToken.base = lem.base; numberToken->base = lem.base;
numberToken.start = lem.start; numberToken->start = lem.start;
numberToken.end = lem.end; numberToken->end = lem.end;
numberToken.hasEpsilon = lem.hasEpsilon; numberToken->hasEpsilon = lem.hasEpsilon;
numberToken.isFloat = lem.isFloat; numberToken->isFloat = lem.isFloat;
numberToken.token = lem.token; numberToken->token = lem.token;
return numberToken; return numberToken;
} }
for (const std::string_view& keys : keywords) { for (const std::string_view& keys : keywords) {
if(this->hmx.include(std::string(keys),false)) if(this->hmx.include(std::string(keys),false))
{ {
KeywordToken keytoken; KeywordToken * keytoken = new KeywordToken();
keytoken.start = this->hmx.getOffset(); keytoken->start = this->hmx.getOffset();
this->hmx.toChar(+keys.size()); this->hmx.toChar(+keys.size());
keytoken.end = this->hmx.getOffset(); keytoken->end = this->hmx.getOffset();
keytoken.token = keys; keytoken->token = keys;
return keytoken; return keytoken;
} }
} }
@ -218,11 +223,11 @@ Token Tokenizer::scope()
for (const std::string_view& del : delimiters) { for (const std::string_view& del : delimiters) {
if(this->hmx.include(std::string(del),false)) if(this->hmx.include(std::string(del),false))
{ {
DelimiterToken dtoken; DelimiterToken * dtoken = new DelimiterToken();;
dtoken.start = this->hmx.getOffset(); dtoken->start = this->hmx.getOffset();
this->hmx.toChar(+del.size()); this->hmx.toChar(+del.size());
dtoken.end = this->hmx.getOffset(); dtoken->end = this->hmx.getOffset();
dtoken.token = del; dtoken->token = del;
return dtoken; return dtoken;
} }
} }
@ -230,22 +235,22 @@ Token Tokenizer::scope()
for (const std::string_view& op : operators) { for (const std::string_view& op : operators) {
if(this->hmx.include(std::string(op),false)) if(this->hmx.include(std::string(op),false))
{ {
OperatorToken optoken; OperatorToken* optoken = new OperatorToken();
optoken.start = this->hmx.getOffset(); optoken->start = this->hmx.getOffset();
this->hmx.toChar(+op.size()); this->hmx.toChar(+op.size());
optoken.end = this->hmx.getOffset(); optoken->end = this->hmx.getOffset();
optoken.token = op; optoken->token = op;
return optoken; return optoken;
} }
} }
return this->readIndetifier(); return this->readIndetifier();
} }
IdentifierToken Tokenizer::readIndetifier() IdentifierToken * Tokenizer::readIndetifier()
{ {
this->hmx.beginPosition(); this->hmx.beginPosition();
IdentifierToken idenditifierToken; IdentifierToken * idenditifierToken = new IdentifierToken();
idenditifierToken.start = this->hmx.getOffset(); idenditifierToken->start = this->hmx.getOffset();
while(this->hmx.isEnd() == false) while(this->hmx.isEnd() == false)
{ {
@ -255,7 +260,7 @@ IdentifierToken Tokenizer::readIndetifier()
if(c >= 'a' && c <= 'z') if(c >= 'a' && c <= 'z')
{ {
readed = true; readed = true;
idenditifierToken.token.push_back(c); idenditifierToken->token.push_back(c);
this->hmx.nextChar(); this->hmx.nextChar();
continue; continue;
} }
@ -263,7 +268,7 @@ IdentifierToken Tokenizer::readIndetifier()
if(c >= 'A' && c <= 'Z') if(c >= 'A' && c <= 'Z')
{ {
readed = true; readed = true;
idenditifierToken.token.push_back(c); idenditifierToken->token.push_back(c);
this->hmx.nextChar(); this->hmx.nextChar();
continue; continue;
} }
@ -272,7 +277,7 @@ IdentifierToken Tokenizer::readIndetifier()
if(c >= '0' && c <= '9') if(c >= '0' && c <= '9')
{ {
readed = true; readed = true;
idenditifierToken.token.push_back(c); idenditifierToken->token.push_back(c);
this->hmx.nextChar(); this->hmx.nextChar();
continue; continue;
} }
@ -281,13 +286,13 @@ IdentifierToken Tokenizer::readIndetifier()
{ {
case '_':{ case '_':{
readed = true; readed = true;
idenditifierToken.token.push_back(c); idenditifierToken->token.push_back(c);
this->hmx.nextChar(); this->hmx.nextChar();
break; break;
} }
case '$':{ case '$':{
readed = true; readed = true;
idenditifierToken.token.push_back(c); idenditifierToken->token.push_back(c);
this->hmx.nextChar(); this->hmx.nextChar();
break; break;
} }
@ -297,23 +302,23 @@ IdentifierToken Tokenizer::readIndetifier()
break; break;
} }
} }
idenditifierToken.end = this->hmx.getOffset(); idenditifierToken->end = this->hmx.getOffset();
idenditifierToken.size = idenditifierToken.context.size(); idenditifierToken->size = idenditifierToken->context.size();
this->hmx.acceptPosition(); this->hmx.acceptPosition();
return idenditifierToken; return idenditifierToken;
} }
StringToken Tokenizer::readString() StringToken * Tokenizer::readString()
{ {
this->hmx.beginPosition(); this->hmx.beginPosition();
StringToken stringToken; StringToken * stringToken = new StringToken();
bool started = false; bool started = false;
bool isended = false; bool isended = false;
stringToken.start = this->hmx.getOffset(); stringToken->start = this->hmx.getOffset();
while(this->hmx.isEnd() == false) while(this->hmx.isEnd() == false)
{ {
char c = this->hmx.getchar(); char c = this->hmx.getchar();
stringToken.token.push_back(c); stringToken->token.push_back(c);
switch(c) switch(c)
{ {
case '"':{ case '"':{
@ -329,12 +334,12 @@ StringToken Tokenizer::readString()
case '\\':{ case '\\':{
this->hmx.nextChar(); this->hmx.nextChar();
c = this->hmx.getchar(); c = this->hmx.getchar();
stringToken.token.push_back(c); stringToken->token.push_back(c);
stringToken.context.push_back(c); stringToken->context.push_back(c);
break; break;
} }
default:{ default:{
stringToken.context.push_back(c); stringToken->context.push_back(c);
} }
} }
this->hmx.nextChar(); this->hmx.nextChar();
@ -343,8 +348,8 @@ StringToken Tokenizer::readString()
break; break;
} }
} }
stringToken.end = this->hmx.getOffset(); stringToken->end = this->hmx.getOffset();
stringToken.size = stringToken.context.size(); stringToken->size = stringToken->context.size();
this->hmx.acceptPosition(); this->hmx.acceptPosition();
return stringToken; return stringToken;
} }