Lexer and Tokenizer Completed

This commit is contained in:
abdussamedulutas 2025-12-23 21:05:36 +03:00
parent 78621d990a
commit 2bf52f5f21
1 changed files with 153 additions and 49 deletions

View File

@ -6,18 +6,20 @@
#include "./Lexer.cpp" #include "./Lexer.cpp"
class Token { class Token {
private: protected:
std::string type = ""; std::string type = "";
public: public:
int start = 0; int start = 0;
int end = 0; int end = 0;
std::string token; std::string token;
std::string gettype(){
return this->type;
}
}; };
class StringToken : public Token { class StringToken : public Token {
private:
std::string type = "string";
public: public:
StringToken() { this->type = "string"; }
std::string context; std::string context;
int size = 0; int size = 0;
void log() void log()
@ -26,9 +28,8 @@ class StringToken : public Token {
} }
}; };
class NumberToken : public Token { class NumberToken : public Token {
private:
std::string type = "number";
public: public:
NumberToken() { this->type = "number"; }
bool isFloat = false; bool isFloat = false;
bool hasEpsilon = false; bool hasEpsilon = false;
int base = 10; int base = 10;
@ -37,33 +38,48 @@ class NumberToken : public Token {
std::cout << "NumberToken "<< (this->isFloat ? "Float" : "Integer") <<"{" << this->token << "} HasExponent="<< (this->hasEpsilon ? "Yes" : "No") << " Base=" << this->base << " Start=" << this->start << " End=" << this->end << "\n"; std::cout << "NumberToken "<< (this->isFloat ? "Float" : "Integer") <<"{" << this->token << "} HasExponent="<< (this->hasEpsilon ? "Yes" : "No") << " Base=" << this->base << " Start=" << this->start << " End=" << this->end << "\n";
} }
}; };
class BoolToken : public Token { // class BoolToken : public Token {
private: // public:
std::string type = "boolean"; // BoolToken() { this->type = "boolean"; }
public: // void log()
void log() // {
{ // std::cout << "BoolToken Value{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
std::cout << "BoolToken Value{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n"; // }
} // };
};
class OperatorToken : public Token { class OperatorToken : public Token {
private:
std::string type = "operator";
public: public:
OperatorToken() { this->type = "operator"; }
void log() void log()
{ {
std::cout << "OperatorToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n"; std::cout << "OperatorToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
} }
}; };
class DelimiterToken : public Token { class DelimiterToken : public Token {
private:
std::string type = "delimiter";
public: public:
DelimiterToken() { this->type = "delimiter"; }
void log() void log()
{ {
std::cout << "DelimiterToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n"; std::cout << "DelimiterToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
} }
}; };
class KeywordToken : public Token {
public:
KeywordToken() { this->type = "keyword"; }
void log()
{
std::cout << "KeywordToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
};
class IdentifierToken : public Token {
public:
IdentifierToken() { this->type = "identifier"; }
std::string context;
int size = 0;
void log()
{
std::cout << "IdentifierToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
};
const constexpr std::string_view operators[] = { const constexpr std::string_view operators[] = {
@ -105,6 +121,38 @@ const constexpr std::string_view delimiters[] = {
".", ".",
}; };
const constexpr std::string_view keywords[] = {
"implements",
"protected",
"interface",
"continue",
"private",
"finally",
"extends",
"default",
"throws",
"switch",
"return",
"public",
"assert",
"false",
"while",
"throw",
"class",
"catch",
"break",
"null",
"true",
"enum",
"else",
"case",
"new",
"try",
"for",
"if",
"do"
};
class Tokenizer { class Tokenizer {
public: public:
@ -112,17 +160,23 @@ public:
void parse(std::string input) void parse(std::string input)
{ {
this->hmx.setText(input); this->hmx.setText(input);
this->scope(); while(1)
{
Token token = this->scope();
std::cout << token.gettype() << " -> " << token.token << "\n";
if(this->hmx.isEnd())
{
break;
}
}
} }
void scope() Token scope()
{ {
this->hmx.skipWhiteSpace(); this->hmx.skipWhiteSpace();
// Stringler // Stringler
if(this->hmx.getchar() == '"') if(this->hmx.getchar() == '"')
{ {
StringToken t = this->readString(); return this->readString();
t.log();
return;
} }
// Sayılar // Sayılar
@ -136,30 +190,19 @@ public:
numberToken.hasEpsilon = lem.hasEpsilon; numberToken.hasEpsilon = lem.hasEpsilon;
numberToken.isFloat = lem.isFloat; numberToken.isFloat = lem.isFloat;
numberToken.token = lem.token; numberToken.token = lem.token;
numberToken.log(); return numberToken;
return;
} }
// Boolean for (const std::string_view& keys : keywords) {
if(this->hmx.include("true",false)) if(this->hmx.include(std::string(keys),false))
{ {
BoolToken BoolToken; KeywordToken keytoken;
BoolToken.token = "true"; keytoken.start = this->hmx.getOffset();
BoolToken.start = this->hmx.getOffset(); this->hmx.toChar(+keys.size());
this->hmx.toChar(+4); keytoken.end = this->hmx.getOffset();
BoolToken.end = this->hmx.getOffset(); keytoken.token = keys;
BoolToken.log(); return keytoken;
return; }
}
if(this->hmx.include("false",false))
{
BoolToken BoolToken;
BoolToken.token = "false";
BoolToken.start = this->hmx.getOffset();
this->hmx.toChar(+5);
BoolToken.end = this->hmx.getOffset();
BoolToken.log();
return;
} }
for (const std::string_view& del : delimiters) { for (const std::string_view& del : delimiters) {
@ -170,8 +213,7 @@ public:
this->hmx.toChar(+del.size()); this->hmx.toChar(+del.size());
dtoken.end = this->hmx.getOffset(); dtoken.end = this->hmx.getOffset();
dtoken.token = del; dtoken.token = del;
dtoken.log(); return dtoken;
return;
} }
} }
@ -183,10 +225,72 @@ public:
this->hmx.toChar(+op.size()); this->hmx.toChar(+op.size());
optoken.end = this->hmx.getOffset(); optoken.end = this->hmx.getOffset();
optoken.token = op; optoken.token = op;
optoken.log(); return optoken;
return;
} }
} }
return this->readIndetifier();
}
IdentifierToken readIndetifier()
{
this->hmx.beginPosition();
IdentifierToken idenditifierToken;
idenditifierToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
bool readed = false;
char c = this->hmx.getchar();
if(c >= 'a' && c <= 'z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= 'A' && c <= 'Z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= '0' && c <= '9')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
switch(c)
{
case '_':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
case '$':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
}
if(readed == false)
{
break;
}
}
idenditifierToken.end = this->hmx.getOffset();
idenditifierToken.size = idenditifierToken.context.size();
this->hmx.acceptPosition();
return idenditifierToken;
} }
StringToken readString() StringToken readString()
{ {