saqut-compiler/core/Tokenizer.cpp

341 lines
8.9 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <iostream>
#include <string>
#include <stdlib.h>
#include <vector>
#include "./Lexer.cpp"
class Token {
protected:
std::string type = "";
public:
int start = 0;
int end = 0;
std::string token;
std::string gettype(){
return this->type;
}
};
class StringToken : public Token {
public:
StringToken() { this->type = "string"; }
std::string context;
int size = 0;
void log()
{
std::cout << "Token String{" << this->token<<"} Start=" << this->start << " End=" << this->end << " Context{"<< this->context << "} Size="<< this->context.size() <<"\n";
}
};
class NumberToken : public Token {
public:
NumberToken() { this->type = "number"; }
bool isFloat = false;
bool hasEpsilon = false;
int base = 10;
void log()
{
std::cout << "NumberToken "<< (this->isFloat ? "Float" : "Integer") <<"{" << this->token << "} HasExponent="<< (this->hasEpsilon ? "Yes" : "No") << " Base=" << this->base << " Start=" << this->start << " End=" << this->end << "\n";
}
};
// class BoolToken : public Token {
// public:
// BoolToken() { this->type = "boolean"; }
// void log()
// {
// std::cout << "BoolToken Value{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
// }
// };
class OperatorToken : public Token {
public:
OperatorToken() { this->type = "operator"; }
void log()
{
std::cout << "OperatorToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
};
class DelimiterToken : public Token {
public:
DelimiterToken() { this->type = "delimiter"; }
void log()
{
std::cout << "DelimiterToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
};
class KeywordToken : public Token {
public:
KeywordToken() { this->type = "keyword"; }
void log()
{
std::cout << "KeywordToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
};
class IdentifierToken : public Token {
public:
IdentifierToken() { this->type = "identifier"; }
std::string context;
int size = 0;
void log()
{
std::cout << "IdentifierToken Context{"<<this->token<<"} Start=" << this->start << " End=" << this->end << " \n";
}
};
const constexpr std::string_view operators[] = {
// --- Mantıksal Karşılaştırma ---
"==", "!=", "<=", ">=", "&&", "||",
// --- Aritmetik (Çift Karakterli) ---
"++", "--", "<<", ">>",
// --- Atama Operatörleri ---
"+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=",
// --- Aritmetik (Tek Karakterli) ---
"+", "-", "*", "/", "%", "<", ">",
// --- Bitwise ve Mantıksal (Tek Karakterli) ---
"^", "!", "~", "&", "|",
// --- Temel Atama ---
"="
};
const constexpr std::string_view delimiters[] = {
// Bağlayıcılar
"->",
"::",
// Sınırlandırıcılar
"[",
"]",
"(",
")",
"{",
"}",
// Ayırıcılar
";",
",",
":",
// Bağlayıcılar
".",
};
const constexpr std::string_view keywords[] = {
"implements",
"protected",
"interface",
"continue",
"private",
"finally",
"extends",
"default",
"throws",
"switch",
"return",
"public",
"assert",
"false",
"while",
"throw",
"class",
"catch",
"break",
"null",
"true",
"enum",
"else",
"case",
"new",
"try",
"for",
"if",
"do"
};
class Tokenizer {
public:
Lexer hmx;
void parse(std::string input)
{
this->hmx.setText(input);
while(1)
{
Token token = this->scope();
std::cout << token.gettype() << " -> " << token.token << "\n";
if(this->hmx.isEnd())
{
break;
}
}
}
Token scope()
{
this->hmx.skipWhiteSpace();
// Stringler
if(this->hmx.getchar() == '"')
{
return this->readString();
}
// Sayılar
if(this->hmx.isNumeric())
{
INumber lem = this->hmx.readNumeric();
NumberToken numberToken;
numberToken.base = lem.base;
numberToken.start = lem.start;
numberToken.end = lem.end;
numberToken.hasEpsilon = lem.hasEpsilon;
numberToken.isFloat = lem.isFloat;
numberToken.token = lem.token;
return numberToken;
}
for (const std::string_view& keys : keywords) {
if(this->hmx.include(std::string(keys),false))
{
KeywordToken keytoken;
keytoken.start = this->hmx.getOffset();
this->hmx.toChar(+keys.size());
keytoken.end = this->hmx.getOffset();
keytoken.token = keys;
return keytoken;
}
}
for (const std::string_view& del : delimiters) {
if(this->hmx.include(std::string(del),false))
{
DelimiterToken dtoken;
dtoken.start = this->hmx.getOffset();
this->hmx.toChar(+del.size());
dtoken.end = this->hmx.getOffset();
dtoken.token = del;
return dtoken;
}
}
for (const std::string_view& op : operators) {
if(this->hmx.include(std::string(op),false))
{
OperatorToken optoken;
optoken.start = this->hmx.getOffset();
this->hmx.toChar(+op.size());
optoken.end = this->hmx.getOffset();
optoken.token = op;
return optoken;
}
}
return this->readIndetifier();
}
IdentifierToken readIndetifier()
{
this->hmx.beginPosition();
IdentifierToken idenditifierToken;
idenditifierToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
bool readed = false;
char c = this->hmx.getchar();
if(c >= 'a' && c <= 'z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= 'A' && c <= 'Z')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
if(c >= '0' && c <= '9')
{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
continue;
}
switch(c)
{
case '_':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
case '$':{
readed = true;
idenditifierToken.token.push_back(c);
this->hmx.nextChar();
break;
}
}
if(readed == false)
{
break;
}
}
idenditifierToken.end = this->hmx.getOffset();
idenditifierToken.size = idenditifierToken.context.size();
this->hmx.acceptPosition();
return idenditifierToken;
}
StringToken readString()
{
this->hmx.beginPosition();
StringToken stringToken;
bool started = false;
bool isended = false;
stringToken.start = this->hmx.getOffset();
while(this->hmx.isEnd() == false)
{
char c = this->hmx.getchar();
stringToken.token.push_back(c);
switch(c)
{
case '"':{
if(started == false)
{
started = true;
break;
}else{
isended = true;
break;
}
}
case '\\':{
this->hmx.nextChar();
c = this->hmx.getchar();
stringToken.token.push_back(c);
stringToken.context.push_back(c);
break;
}
default:{
stringToken.context.push_back(c);
}
}
this->hmx.nextChar();
if(isended)
{
break;
}
}
stringToken.end = this->hmx.getOffset();
stringToken.size = stringToken.context.size();
this->hmx.acceptPosition();
return stringToken;
}
};