Tokenizer Okuması kolaylaştırıldı

This commit is contained in:
abdussamedulutas 2025-12-28 15:40:47 +03:00
parent 0f33740a07
commit cb6bb6fc2e
2 changed files with 266 additions and 264 deletions

View File

@ -21,66 +21,46 @@ class Token {
class StringToken : public Token { class StringToken : public Token {
public: public:
StringToken() { this->type = "string"; } StringToken(){
this->type = "string";
};
std::string context; std::string context;
int size = 0; int size = 0;
void log()
{
std::cout << "Token String{" << this->token<<"} Start=" << this->start << " End=" << this->end << " Context{"<< this->context << "} Size="<< this->context.size() <<"\n";
}
}; };
class NumberToken : public Token { class NumberToken : public Token {
public: public:
NumberToken() { this->type = "number"; } NumberToken(){
this->type = "number";
}
bool isFloat = false; bool isFloat = false;
bool hasEpsilon = false; bool hasEpsilon = false;
int base = 10; int base = 10;
void log()
{
std::cout << "NumberToken "<< (this->isFloat ? "Float" : "Integer") <<"{" << this->token << "} HasExponent="<< (this->hasEpsilon ? "Yes" : "No") << " Base=" << this->base << " Start=" << this->start << " End=" << this->end << "\n";
}
}; };
// class BoolToken : public Token {
// public:
// BoolToken() { this->type = "boolean"; }
// void log()
// {
// std::cout << "BoolToken Value{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
// }
// };
class OperatorToken : public Token { class OperatorToken : public Token {
public: public:
OperatorToken() { this->type = "operator"; } OperatorToken(){
void log() this->type = "operator";
{
std::cout << "OperatorToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
} }
}; };
class DelimiterToken : public Token { class DelimiterToken : public Token {
public: public:
DelimiterToken() { this->type = "delimiter"; } DelimiterToken(){
void log() this->type = "delimiter";
{
std::cout << "DelimiterToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
} }
}; };
class KeywordToken : public Token { class KeywordToken : public Token {
public: public:
KeywordToken() { this->type = "keyword"; } KeywordToken(){
void log() this->type = "keyword";
{
std::cout << "KeywordToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
} }
}; };
class IdentifierToken : public Token { class IdentifierToken : public Token {
public: public:
IdentifierToken() { this->type = "identifier"; } IdentifierToken(){
this->type = "identifier";
}
std::string context; std::string context;
int size = 0; int size = 0;
void log()
{
std::cout << "IdentifierToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
}; };
@ -159,235 +139,243 @@ const constexpr std::string_view keywords[] = {
class Tokenizer { class Tokenizer {
public: public:
Lexer hmx; Lexer hmx;
std::vector<Token> scan(std::string input) std::vector<Token> scan(std::string input);
{ Token scope();
std::vector<Token> tokens; IdentifierToken readIndetifier();
this->hmx.setText(input); StringToken readString();
while(1) void skipOneLineComment();
{ void skipMultiLineComment();
Token token = this->scope();
tokens.push_back(token);
if(this->hmx.isEnd())
{
break;
}
}
return tokens;
}
Token scope()
{
this->hmx.skipWhiteSpace();
// Yorum satırları
if(this->hmx.include("//", true))
{
this->skipOneLineComment();
}
if(this->hmx.include("/*", true))
{
this->skipMultiLineComment();
}
if(this->hmx.isEnd()){
Token token;
token.token = "EOL";
return token;
};
// Stringler
if(this->hmx.getchar() == '"')
{
return this->readString();
}
// Sayılar
if(this->hmx.isNumeric())
{
INumber lem = this->hmx.readNumeric();
NumberToken numberToken;
numberToken.base = lem.base;
numberToken.start = lem.start;
numberToken.end = lem.end;
numberToken.hasEpsilon = lem.hasEpsilon;
numberToken.isFloat = lem.isFloat;
numberToken.token = lem.token;
return numberToken;
}
for (const std::string_view& keys : keywords) {
if(this->hmx.include(std::string(keys),false))
{
KeywordToken keytoken;
keytoken.start = this->hmx.getOffset();
this->hmx.toChar(+keys.size());
keytoken.end = this->hmx.getOffset();
keytoken.token = keys;
return keytoken;
}
}
for (const std::string_view& del : delimiters) {
if(this->hmx.include(std::string(del),false))
{
DelimiterToken dtoken;
dtoken.start = this->hmx.getOffset();
this->hmx.toChar(+del.size());
dtoken.end = this->hmx.getOffset();
dtoken.token = del;
return dtoken;
}
}
for (const std::string_view& op : operators) {
if(this->hmx.include(std::string(op),false))
{
OperatorToken optoken;
optoken.start = this->hmx.getOffset();
this->hmx.toChar(+op.size());
optoken.end = this->hmx.getOffset();
optoken.token = op;
return optoken;
}
}
return this->readIndetifier();
}
IdentifierToken readIndetifier()
{
this->hmx.beginPosition();
IdentifierToken idenditifierToken;
idenditifierToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
bool readed = false;
char c = this->hmx.getchar();
if(c >= 'a' && c <= 'z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= 'A' && c <= 'Z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= '0' && c <= '9')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
switch(c)
{
case '_':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
case '$':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
}
if(readed == false)
{
break;
}
}
idenditifierToken.end = this->hmx.getOffset();
idenditifierToken.size = idenditifierToken.context.size();
this->hmx.acceptPosition();
return idenditifierToken;
}
StringToken readString()
{
this->hmx.beginPosition();
StringToken stringToken;
bool started = false;
bool isended = false;
stringToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
char c = this->hmx.getchar();
stringToken.token.push_back(c);
switch(c)
{
case '"':{
if(started == false)
{
started = true;
break;
}else{
isended = true;
break;
}
}
case '\\':{
this->hmx.nextChar();
c = this->hmx.getchar();
stringToken.token.push_back(c);
stringToken.context.push_back(c);
break;
}
default:{
stringToken.context.push_back(c);
}
}
this->hmx.nextChar();
if(isended)
{
break;
}
}
stringToken.end = this->hmx.getOffset();
stringToken.size = stringToken.context.size();
this->hmx.acceptPosition();
return stringToken;
}
void skipOneLineComment()
{
std::cout << "SkipLineComment\n";
while(this->hmx.isEnd() == false)
{
if(this->hmx.getchar() == '\n')
{
this->hmx.nextChar();
this->hmx.skipWhiteSpace();
return;
}else{
this->hmx.nextChar();
}
}
}
void skipMultiLineComment()
{
std::cout << "SkipBlockComment\n";
while(this->hmx.isEnd() == false)
{
if(this->hmx.include("*/",true))
{
this->hmx.skipWhiteSpace();
return;
}else{
this->hmx.nextChar();
}
}
}
}; };
std::vector<Token> Tokenizer::scan(std::string input)
{
std::vector<Token> tokens;
this->hmx.setText(input);
while(1)
{
Token token = this->scope();
tokens.push_back(token);
if(this->hmx.isEnd())
{
break;
}
}
return tokens;
}
Token Tokenizer::scope()
{
this->hmx.skipWhiteSpace();
// Yorum satırları
if(this->hmx.include("//", true))
{
this->skipOneLineComment();
}
if(this->hmx.include("/*", true))
{
this->skipMultiLineComment();
}
if(this->hmx.isEnd()){
Token token;
token.token = "EOL";
return token;
};
// Stringler
if(this->hmx.getchar() == '"')
{
return this->readString();
}
// Sayılar
if(this->hmx.isNumeric())
{
INumber lem = this->hmx.readNumeric();
NumberToken numberToken;
numberToken.base = lem.base;
numberToken.start = lem.start;
numberToken.end = lem.end;
numberToken.hasEpsilon = lem.hasEpsilon;
numberToken.isFloat = lem.isFloat;
numberToken.token = lem.token;
return numberToken;
}
for (const std::string_view& keys : keywords) {
if(this->hmx.include(std::string(keys),false))
{
KeywordToken keytoken;
keytoken.start = this->hmx.getOffset();
this->hmx.toChar(+keys.size());
keytoken.end = this->hmx.getOffset();
keytoken.token = keys;
return keytoken;
}
}
for (const std::string_view& del : delimiters) {
if(this->hmx.include(std::string(del),false))
{
DelimiterToken dtoken;
dtoken.start = this->hmx.getOffset();
this->hmx.toChar(+del.size());
dtoken.end = this->hmx.getOffset();
dtoken.token = del;
return dtoken;
}
}
for (const std::string_view& op : operators) {
if(this->hmx.include(std::string(op),false))
{
OperatorToken optoken;
optoken.start = this->hmx.getOffset();
this->hmx.toChar(+op.size());
optoken.end = this->hmx.getOffset();
optoken.token = op;
return optoken;
}
}
return this->readIndetifier();
}
IdentifierToken Tokenizer::readIndetifier()
{
this->hmx.beginPosition();
IdentifierToken idenditifierToken;
idenditifierToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
bool readed = false;
char c = this->hmx.getchar();
if(c >= 'a' && c <= 'z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= 'A' && c <= 'Z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= '0' && c <= '9')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
switch(c)
{
case '_':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
case '$':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
}
if(readed == false)
{
break;
}
}
idenditifierToken.end = this->hmx.getOffset();
idenditifierToken.size = idenditifierToken.context.size();
this->hmx.acceptPosition();
return idenditifierToken;
}
StringToken Tokenizer::readString()
{
this->hmx.beginPosition();
StringToken stringToken;
bool started = false;
bool isended = false;
stringToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
char c = this->hmx.getchar();
stringToken.token.push_back(c);
switch(c)
{
case '"':{
if(started == false)
{
started = true;
break;
}else{
isended = true;
break;
}
}
case '\\':{
this->hmx.nextChar();
c = this->hmx.getchar();
stringToken.token.push_back(c);
stringToken.context.push_back(c);
break;
}
default:{
stringToken.context.push_back(c);
}
}
this->hmx.nextChar();
if(isended)
{
break;
}
}
stringToken.end = this->hmx.getOffset();
stringToken.size = stringToken.context.size();
this->hmx.acceptPosition();
return stringToken;
}
void Tokenizer::skipOneLineComment()
{
std::cout << "SkipLineComment\n";
while(this->hmx.isEnd() == false)
{
if(this->hmx.getchar() == '\n')
{
this->hmx.nextChar();
this->hmx.skipWhiteSpace();
return;
}else{
this->hmx.nextChar();
}
}
}
void Tokenizer::skipMultiLineComment()
{
std::cout << "SkipBlockComment\n";
while(this->hmx.isEnd() == false)
{
if(this->hmx.include("*/",true))
{
this->hmx.skipWhiteSpace();
return;
}else{
this->hmx.nextChar();
}
}
}
#endif #endif

14
core/Tools.cpp Normal file
View File

@ -0,0 +1,14 @@
#include <string>
#ifndef Tools
#define Tools
std::string padRight(std::string str, size_t totalLen) {
if (str.size() < totalLen) {
str.append(totalLen - str.size(), ' ');
}
return str;
}
#endif