refactor: AST ve Token yapısında pointer semantiğine geçiş

- Bellekte 'object slicing' (nesne dilimlenmesi) sorununu önlemek için Token ve ASTNode yapıları pointer (yıldız) kullanımına geçirildi.
- Bellek sızıntılarını önlemek için temel sınıflara (Token, ASTNode) 'virtual destructor' eklendi.
- AST düğümlerine 'parent pointer' bağlantısı eklenerek ağaç üzerinde yukarı yönlü navigasyon sağlandı.
- IR (Ara Temsil) ve CodeGenerator katmanları projeye dahil edildi.
- Tokenizer'da EOL (satır sonu) kontrolü ve dinamik bellek tahsisi (new) optimizasyonları yapıldı.
This commit is contained in:
abdussamedulutas 2026-01-10 20:19:49 +03:00
parent 817d36bc4a
commit b0ff0ef841
6 changed files with 229 additions and 78 deletions

View File

@ -5,6 +5,7 @@
#include <stdlib.h>
#include "./core/Tokenizer.cpp"
#include "./core/Parser/Parser.cpp"
#include "./core/IR/IR.cpp"
int main()
{
@ -20,10 +21,14 @@ int main()
Tokenizer tokenizer;
Parser parser;
CodeGenerator codeGenerator;
auto tokens = tokenizer.scan(icerik);
parser.parse(tokens);
ASTNode * ast = parser.parse(tokens);
codeGenerator.parse(ast);
codeGenerator.IROpDatas;
return 0;
}

133
core/IR/IR.cpp Normal file
View File

@ -0,0 +1,133 @@
#include "../Parser/ASTNode.cpp"
#include "../Tokenizer.cpp"
#include <vector>
#include <variant>
#ifndef IR
#define IR
enum class OPCode {
// İşlem
mathadd,
mathsub,
mathdiv,
mathmul,
// Tanımalama
declare
};
struct Param {
bool isRegister;
std::variant<int,float> value;
};
struct IROpData {
OPCode op;
int targetReg;
Param arg1;
Param arg2;
Param arg3;
};
struct Identifier
{
int last = 0;
};
class CodeGenerator
{
private:
void * processNumber(NumberToken * num, const std::string& rawStr) {
if (num->isFloat || num->hasEpsilon) {
return new float(std::strtof(rawStr.c_str(), nullptr));
}
else {
return new int(std::strtol(rawStr.c_str(), nullptr, num->base));
}
}
public:
CodeGenerator()
{
}
Identifier identifier;
std::vector<IROpData> IROpDatas;
int parse(ASTNode * ast)
{
switch (ast->kind)
{
case ASTKind::BinaryExpression:{
return this->parseBinaryExpression((BinaryExpressionNode *) ast);
}
case ASTKind::Literal:{
return this->parseLiteral((LiteralNode *) ast);
}
default: return 0;
}
};
int parseBinaryExpression(BinaryExpressionNode * binaryAST)
{
OPCode op;
switch (binaryAST->Operator)
{
case TokenType::STAR:{
op = OPCode::mathmul;
break;
}
case TokenType::PLUS:{
op = OPCode::mathadd;
break;
}
case TokenType::MINUS:{
op = OPCode::mathsub;
break;
}
case TokenType::SLASH:{
op = OPCode::mathdiv;
break;
}
}
int left = this->parse(binaryAST->Left);
int right = this->parse(binaryAST->Right);
IROpDatas.push_back({
op,
++identifier.last,
{true, left},
{true, right},
{false, 0}
});
return identifier.last;
}
int parseLiteral(LiteralNode * binaryAST)
{
LiteralNode literal = *binaryAST;
NumberToken * num = (NumberToken *) &literal.parserToken.token;
if(num->isFloat)
{
float * _value = (float *) this->processNumber(num, num->token);
IROpDatas.push_back({
OPCode::declare,
++identifier.last,
{false, *_value},
{false, 0},
{false, 0}
});
}else{
int * _value = (int *) this->processNumber(num, num->token);
IROpDatas.push_back({
OPCode::declare,
++identifier.last,
{false, *_value},
{false, 0},
{false, 0}
});
}
return identifier.last;
}
};
#endif

View File

@ -34,15 +34,18 @@ class ASTNode
{
this->parent = children;
}
virtual ~ASTNode() = default;
};
class BinaryExpressionNode : public ASTNode
{
protected:
ASTKind Kind = ASTKind::BinaryExpression;
public:
BinaryExpressionNode(){
this->kind = ASTKind::BinaryExpression;
}
void log(int indent) override
{
auto korku = OPERATOR_MAP_STRREV.find(this->Operator);
std::cout
<< padRight(" ",indent)
<< "BinaryExpressionNode "
@ -62,14 +65,16 @@ class BinaryExpressionNode : public ASTNode
class LiteralNode : public ASTNode
{
protected:
ASTKind kind = ASTKind::Literal;
void log(int indent)
{
std::cout << padRight(" ",indent) << "LiteralNode {" << this->lexerToken.token << "}\n";
std::cout << padRight(" ",indent) << "LiteralNode {" << this->lexerToken->token << "}\n";
}
public:
Token lexerToken;
Token * lexerToken;
ParserToken parserToken;
LiteralNode(){
this->kind = ASTKind::Literal;
}
};
class IdentifierNode : public ASTNode
@ -78,10 +83,10 @@ class IdentifierNode : public ASTNode
ASTKind kind = ASTKind::Literal;
void log(int indent)
{
std::cout << padRight(" ",indent) << "IdentifierNode {" << this->lexerToken.token << "}\n";
std::cout << padRight(" ",indent) << "IdentifierNode {" << this->lexerToken->token << "}\n";
}
public:
Token lexerToken;
Token * lexerToken;
ParserToken parserToken;
};
class PostfixNode : public ASTNode

View File

@ -20,45 +20,45 @@ class Parser {
ASTNode astroot;
public:
TokenList tokens;
void parse(TokenList tokens);
ASTNode * parse(TokenList tokens);
int current = 0;
ParserToken currentToken();
void nextToken();
ParserToken lookehead(uint32_t);
ParserToken parseToken(Token);
ParserToken parseToken(Token *);
ParserToken getToken(int);
void primaryExpression();
ASTNode * primaryExpression();
ASTNode * volumeExpression(uint16_t precedence);
ASTNode * volumeNullDominatorExpression();
ASTNode * volumeLeftDominatorExpression(ASTNode * left);
};
ParserToken Parser::parseToken(Token token){
ParserToken Parser::parseToken(Token * token){
ParserToken pToken;
pToken.token = token;
pToken.token = *token;
if(token.gettype() == "string")
if(token->gettype() == "string")
{
pToken.type = TokenType::STRING;
}
else if(token.gettype() == "number")
else if(token->gettype() == "number")
{
pToken.type = TokenType::NUMBER;
}
else if(token.gettype() == "operator")
else if(token->gettype() == "operator")
{
pToken.type = OPERATOR_MAP.find(token.token)->second;
pToken.type = OPERATOR_MAP.find(token->token)->second;
}
else if(token.gettype() == "delimiter")
else if(token->gettype() == "delimiter")
{
pToken.type = OPERATOR_MAP.find(token.token)->second;
pToken.type = OPERATOR_MAP.find(token->token)->second;
}
else if(token.gettype() == "keyword")
else if(token->gettype() == "keyword")
{
pToken.type = KEYWORD_MAP.find(token.token)->second;
pToken.type = KEYWORD_MAP.find(token->token)->second;
}
else if(token.gettype() == "identifier")
else if(token->gettype() == "identifier")
{
pToken.type = TokenType::IDENTIFIER;
}
@ -92,15 +92,16 @@ ParserToken Parser::currentToken(){
return this->getToken(0);
}
void Parser::parse(TokenList tokens){
ASTNode * Parser::parse(TokenList tokens){
this->tokens = tokens;
this->primaryExpression();
return this->primaryExpression();
}
void Parser::primaryExpression()
ASTNode * Parser::primaryExpression()
{
auto currentToken = this->currentToken();
ASTNode * tree = nullptr;
if(
currentToken.is({
TokenType::NUMBER,
@ -113,9 +114,9 @@ void Parser::primaryExpression()
})
)
{
ASTNode * tree = this->volumeExpression(0);
tree->log(0);
tree = this->volumeExpression(0);
}
return tree;
}
// Expresssionu tamamen okuyup bitiren kısım burası
@ -197,14 +198,14 @@ ASTNode * Parser::volumeNullDominatorExpression()
if(currentToken.is(TokenType::NUMBER)) {
LiteralNode * lNode = new LiteralNode();
lNode->lexerToken = currentToken.token;
lNode->lexerToken = &currentToken.token;
lNode->parserToken = currentToken;
return lNode;
}
if(currentToken.is(TokenType::IDENTIFIER)) {
IdentifierNode * iNode = new IdentifierNode();
iNode->lexerToken = currentToken.token;
iNode->lexerToken = &currentToken.token;
iNode->parserToken = currentToken;
return iNode;
}
@ -237,6 +238,8 @@ ASTNode * Parser::volumeLeftDominatorExpression(ASTNode * left)
binNode->Right = right;
binNode->Left = left;
binNode->Operator = currentToken.type;
binNode->Right->parent = binNode;
binNode->Left->parent = binNode;
return binNode;
}

View File

@ -7,7 +7,7 @@
#ifndef PARSER_TOKEN
#define PARSER_TOKEN
typedef std::vector<Token> TokenList;
typedef std::vector<Token *> TokenList;
enum class TokenType : uint16_t
{

View File

@ -17,6 +17,7 @@ class Token {
std::string gettype(){
return this->type;
}
virtual ~Token() = default;
};
class StringToken : public Token {
@ -139,22 +140,26 @@ const constexpr std::string_view keywords[] = {
class Tokenizer {
public:
Lexer hmx;
std::vector<Token> scan(std::string input);
Token scope();
IdentifierToken readIndetifier();
StringToken readString();
std::vector<Token *> scan(std::string input);
Token * scope();
IdentifierToken * readIndetifier();
StringToken * readString();
void skipOneLineComment();
void skipMultiLineComment();
};
std::vector<Token> Tokenizer::scan(std::string input)
std::vector<Token *> Tokenizer::scan(std::string input)
{
std::vector<Token> tokens;
std::vector<Token *> tokens;
this->hmx.setText(input);
while(1)
{
Token token = this->scope();
Token * token = this->scope();
if(token->token == "EOL")
{
break;
}
tokens.push_back(token);
if(this->hmx.isEnd())
{
@ -163,7 +168,7 @@ std::vector<Token> Tokenizer::scan(std::string input)
}
return tokens;
}
Token Tokenizer::scope()
Token * Tokenizer::scope()
{
this->hmx.skipWhiteSpace();
@ -178,8 +183,8 @@ Token Tokenizer::scope()
}
if(this->hmx.isEnd()){
Token token;
token.token = "EOL";
Token * token = new Token();
token->token = "EOL";
return token;
};
@ -193,24 +198,24 @@ Token Tokenizer::scope()
if(this->hmx.isNumeric())
{
INumber lem = this->hmx.readNumeric();
NumberToken numberToken;
numberToken.base = lem.base;
numberToken.start = lem.start;
numberToken.end = lem.end;
numberToken.hasEpsilon = lem.hasEpsilon;
numberToken.isFloat = lem.isFloat;
numberToken.token = lem.token;
NumberToken * numberToken = new NumberToken();
numberToken->base = lem.base;
numberToken->start = lem.start;
numberToken->end = lem.end;
numberToken->hasEpsilon = lem.hasEpsilon;
numberToken->isFloat = lem.isFloat;
numberToken->token = lem.token;
return numberToken;
}
for (const std::string_view& keys : keywords) {
if(this->hmx.include(std::string(keys),false))
{
KeywordToken keytoken;
keytoken.start = this->hmx.getOffset();
KeywordToken * keytoken = new KeywordToken();
keytoken->start = this->hmx.getOffset();
this->hmx.toChar(+keys.size());
keytoken.end = this->hmx.getOffset();
keytoken.token = keys;
keytoken->end = this->hmx.getOffset();
keytoken->token = keys;
return keytoken;
}
}
@ -218,11 +223,11 @@ Token Tokenizer::scope()
for (const std::string_view& del : delimiters) {
if(this->hmx.include(std::string(del),false))
{
DelimiterToken dtoken;
dtoken.start = this->hmx.getOffset();
DelimiterToken * dtoken = new DelimiterToken();;
dtoken->start = this->hmx.getOffset();
this->hmx.toChar(+del.size());
dtoken.end = this->hmx.getOffset();
dtoken.token = del;
dtoken->end = this->hmx.getOffset();
dtoken->token = del;
return dtoken;
}
}
@ -230,22 +235,22 @@ Token Tokenizer::scope()
for (const std::string_view& op : operators) {
if(this->hmx.include(std::string(op),false))
{
OperatorToken optoken;
optoken.start = this->hmx.getOffset();
OperatorToken* optoken = new OperatorToken();
optoken->start = this->hmx.getOffset();
this->hmx.toChar(+op.size());
optoken.end = this->hmx.getOffset();
optoken.token = op;
optoken->end = this->hmx.getOffset();
optoken->token = op;
return optoken;
}
}
return this->readIndetifier();
}
IdentifierToken Tokenizer::readIndetifier()
IdentifierToken * Tokenizer::readIndetifier()
{
this->hmx.beginPosition();
IdentifierToken idenditifierToken;
idenditifierToken.start = this->hmx.getOffset();
IdentifierToken * idenditifierToken = new IdentifierToken();
idenditifierToken->start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
@ -255,7 +260,7 @@ IdentifierToken Tokenizer::readIndetifier()
if(c >= 'a' && c <= 'z')
{
readed = true;
idenditifierToken.token.push_back(c);
idenditifierToken->token.push_back(c);
this->hmx.nextChar();
continue;
}
@ -263,7 +268,7 @@ IdentifierToken Tokenizer::readIndetifier()
if(c >= 'A' && c <= 'Z')
{
readed = true;
idenditifierToken.token.push_back(c);
idenditifierToken->token.push_back(c);
this->hmx.nextChar();
continue;
}
@ -272,7 +277,7 @@ IdentifierToken Tokenizer::readIndetifier()
if(c >= '0' && c <= '9')
{
readed = true;
idenditifierToken.token.push_back(c);
idenditifierToken->token.push_back(c);
this->hmx.nextChar();
continue;
}
@ -281,13 +286,13 @@ IdentifierToken Tokenizer::readIndetifier()
{
case '_':{
readed = true;
idenditifierToken.token.push_back(c);
idenditifierToken->token.push_back(c);
this->hmx.nextChar();
break;
}
case '$':{
readed = true;
idenditifierToken.token.push_back(c);
idenditifierToken->token.push_back(c);
this->hmx.nextChar();
break;
}
@ -297,23 +302,23 @@ IdentifierToken Tokenizer::readIndetifier()
break;
}
}
idenditifierToken.end = this->hmx.getOffset();
idenditifierToken.size = idenditifierToken.context.size();
idenditifierToken->end = this->hmx.getOffset();
idenditifierToken->size = idenditifierToken->context.size();
this->hmx.acceptPosition();
return idenditifierToken;
}
StringToken Tokenizer::readString()
StringToken * Tokenizer::readString()
{
this->hmx.beginPosition();
StringToken stringToken;
StringToken * stringToken = new StringToken();
bool started = false;
bool isended = false;
stringToken.start = this->hmx.getOffset();
stringToken->start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
char c = this->hmx.getchar();
stringToken.token.push_back(c);
stringToken->token.push_back(c);
switch(c)
{
case '"':{
@ -329,12 +334,12 @@ StringToken Tokenizer::readString()
case '\\':{
this->hmx.nextChar();
c = this->hmx.getchar();
stringToken.token.push_back(c);
stringToken.context.push_back(c);
stringToken->token.push_back(c);
stringToken->context.push_back(c);
break;
}
default:{
stringToken.context.push_back(c);
stringToken->context.push_back(c);
}
}
this->hmx.nextChar();
@ -343,8 +348,8 @@ StringToken Tokenizer::readString()
break;
}
}
stringToken.end = this->hmx.getOffset();
stringToken.size = stringToken.context.size();
stringToken->end = this->hmx.getOffset();
stringToken->size = stringToken->context.size();
this->hmx.acceptPosition();
return stringToken;
}