diff --git a/core/Tokenizer.cpp b/core/Tokenizer.cpp index a1b806c..022313b 100644 --- a/core/Tokenizer.cpp +++ b/core/Tokenizer.cpp @@ -21,66 +21,46 @@ class Token { class StringToken : public Token { public: - StringToken() { this->type = "string"; } + StringToken(){ + this->type = "string"; + }; std::string context; int size = 0; - void log() - { - std::cout << "Token String{" << this->token<<"} Start=" << this->start << " End=" << this->end << " Context{"<< this->context << "} Size="<< this->context.size() <<"\n"; - } }; class NumberToken : public Token { public: - NumberToken() { this->type = "number"; } + NumberToken(){ + this->type = "number"; + } bool isFloat = false; bool hasEpsilon = false; int base = 10; - void log() - { - std::cout << "NumberToken "<< (this->isFloat ? "Float" : "Integer") <<"{" << this->token << "} HasExponent="<< (this->hasEpsilon ? "Yes" : "No") << " Base=" << this->base << " Start=" << this->start << " End=" << this->end << "\n"; - } }; -// class BoolToken : public Token { -// public: -// BoolToken() { this->type = "boolean"; } -// void log() -// { -// std::cout << "BoolToken Value{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; -// } -// }; class OperatorToken : public Token { public: - OperatorToken() { this->type = "operator"; } - void log() - { - std::cout << "OperatorToken Context{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; + OperatorToken(){ + this->type = "operator"; } }; class DelimiterToken : public Token { public: - DelimiterToken() { this->type = "delimiter"; } - void log() - { - std::cout << "DelimiterToken Context{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; + DelimiterToken(){ + this->type = "delimiter"; } }; class KeywordToken : public Token { public: - KeywordToken() { this->type = "keyword"; } - void log() - { - std::cout << "KeywordToken Context{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; + KeywordToken(){ + this->type = "keyword"; } }; class IdentifierToken : public Token { public: - IdentifierToken() { this->type = "identifier"; } + IdentifierToken(){ + this->type = "identifier"; + } std::string context; int size = 0; - void log() - { - std::cout << "IdentifierToken Context{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; - } }; @@ -159,235 +139,243 @@ const constexpr std::string_view keywords[] = { class Tokenizer { public: Lexer hmx; - std::vector scan(std::string input) - { - std::vector tokens; - this->hmx.setText(input); - while(1) - { - Token token = this->scope(); - tokens.push_back(token); - if(this->hmx.isEnd()) - { - break; - } - } - return tokens; - } - Token scope() - { - this->hmx.skipWhiteSpace(); - - // Yorum satırları - if(this->hmx.include("//", true)) - { - this->skipOneLineComment(); - } - if(this->hmx.include("/*", true)) - { - this->skipMultiLineComment(); - } - - if(this->hmx.isEnd()){ - Token token; - token.token = "EOL"; - return token; - }; - - // Stringler - if(this->hmx.getchar() == '"') - { - return this->readString(); - } - - // Sayılar - if(this->hmx.isNumeric()) - { - INumber lem = this->hmx.readNumeric(); - NumberToken numberToken; - numberToken.base = lem.base; - numberToken.start = lem.start; - numberToken.end = lem.end; - numberToken.hasEpsilon = lem.hasEpsilon; - numberToken.isFloat = lem.isFloat; - numberToken.token = lem.token; - return numberToken; - } - - for (const std::string_view& keys : keywords) { - if(this->hmx.include(std::string(keys),false)) - { - KeywordToken keytoken; - keytoken.start = this->hmx.getOffset(); - this->hmx.toChar(+keys.size()); - keytoken.end = this->hmx.getOffset(); - keytoken.token = keys; - return keytoken; - } - } - - for (const std::string_view& del : delimiters) { - if(this->hmx.include(std::string(del),false)) - { - DelimiterToken dtoken; - dtoken.start = this->hmx.getOffset(); - this->hmx.toChar(+del.size()); - dtoken.end = this->hmx.getOffset(); - dtoken.token = del; - return dtoken; - } - } - - for (const std::string_view& op : operators) { - if(this->hmx.include(std::string(op),false)) - { - OperatorToken optoken; - optoken.start = this->hmx.getOffset(); - this->hmx.toChar(+op.size()); - optoken.end = this->hmx.getOffset(); - optoken.token = op; - return optoken; - } - } - - return this->readIndetifier(); - } - IdentifierToken readIndetifier() - { - this->hmx.beginPosition(); - IdentifierToken idenditifierToken; - idenditifierToken.start = this->hmx.getOffset(); - - while(this->hmx.isEnd() == false) - { - bool readed = false; - char c = this->hmx.getchar(); - - if(c >= 'a' && c <= 'z') - { - readed = true; - idenditifierToken.token.push_back(c); - this->hmx.nextChar(); - continue; - } - - if(c >= 'A' && c <= 'Z') - { - readed = true; - idenditifierToken.token.push_back(c); - this->hmx.nextChar(); - continue; - } - - - if(c >= '0' && c <= '9') - { - readed = true; - idenditifierToken.token.push_back(c); - this->hmx.nextChar(); - continue; - } - - switch(c) - { - case '_':{ - readed = true; - idenditifierToken.token.push_back(c); - this->hmx.nextChar(); - break; - } - case '$':{ - readed = true; - idenditifierToken.token.push_back(c); - this->hmx.nextChar(); - break; - } - } - if(readed == false) - { - break; - } - } - idenditifierToken.end = this->hmx.getOffset(); - idenditifierToken.size = idenditifierToken.context.size(); - this->hmx.acceptPosition(); - return idenditifierToken; - } - StringToken readString() - { - this->hmx.beginPosition(); - StringToken stringToken; - bool started = false; - bool isended = false; - stringToken.start = this->hmx.getOffset(); - - while(this->hmx.isEnd() == false) - { - char c = this->hmx.getchar(); - stringToken.token.push_back(c); - switch(c) - { - case '"':{ - if(started == false) - { - started = true; - break; - }else{ - isended = true; - break; - } - } - case '\\':{ - this->hmx.nextChar(); - c = this->hmx.getchar(); - stringToken.token.push_back(c); - stringToken.context.push_back(c); - break; - } - default:{ - stringToken.context.push_back(c); - } - } - this->hmx.nextChar(); - if(isended) - { - break; - } - } - stringToken.end = this->hmx.getOffset(); - stringToken.size = stringToken.context.size(); - this->hmx.acceptPosition(); - return stringToken; - } - void skipOneLineComment() - { - std::cout << "SkipLineComment\n"; - while(this->hmx.isEnd() == false) - { - if(this->hmx.getchar() == '\n') - { - this->hmx.nextChar(); - this->hmx.skipWhiteSpace(); - return; - }else{ - this->hmx.nextChar(); - } - } - } - void skipMultiLineComment() - { - std::cout << "SkipBlockComment\n"; - while(this->hmx.isEnd() == false) - { - if(this->hmx.include("*/",true)) - { - this->hmx.skipWhiteSpace(); - return; - }else{ - this->hmx.nextChar(); - } - } - } + std::vector scan(std::string input); + Token scope(); + IdentifierToken readIndetifier(); + StringToken readString(); + void skipOneLineComment(); + void skipMultiLineComment(); }; + +std::vector Tokenizer::scan(std::string input) +{ + std::vector tokens; + this->hmx.setText(input); + while(1) + { + Token token = this->scope(); + tokens.push_back(token); + if(this->hmx.isEnd()) + { + break; + } + } + return tokens; +} +Token Tokenizer::scope() +{ + this->hmx.skipWhiteSpace(); + + // Yorum satırları + if(this->hmx.include("//", true)) + { + this->skipOneLineComment(); + } + if(this->hmx.include("/*", true)) + { + this->skipMultiLineComment(); + } + + if(this->hmx.isEnd()){ + Token token; + token.token = "EOL"; + return token; + }; + + // Stringler + if(this->hmx.getchar() == '"') + { + return this->readString(); + } + + // Sayılar + if(this->hmx.isNumeric()) + { + INumber lem = this->hmx.readNumeric(); + NumberToken numberToken; + numberToken.base = lem.base; + numberToken.start = lem.start; + numberToken.end = lem.end; + numberToken.hasEpsilon = lem.hasEpsilon; + numberToken.isFloat = lem.isFloat; + numberToken.token = lem.token; + return numberToken; + } + + for (const std::string_view& keys : keywords) { + if(this->hmx.include(std::string(keys),false)) + { + KeywordToken keytoken; + keytoken.start = this->hmx.getOffset(); + this->hmx.toChar(+keys.size()); + keytoken.end = this->hmx.getOffset(); + keytoken.token = keys; + return keytoken; + } + } + + for (const std::string_view& del : delimiters) { + if(this->hmx.include(std::string(del),false)) + { + DelimiterToken dtoken; + dtoken.start = this->hmx.getOffset(); + this->hmx.toChar(+del.size()); + dtoken.end = this->hmx.getOffset(); + dtoken.token = del; + return dtoken; + } + } + + for (const std::string_view& op : operators) { + if(this->hmx.include(std::string(op),false)) + { + OperatorToken optoken; + optoken.start = this->hmx.getOffset(); + this->hmx.toChar(+op.size()); + optoken.end = this->hmx.getOffset(); + optoken.token = op; + return optoken; + } + } + + return this->readIndetifier(); +} +IdentifierToken Tokenizer::readIndetifier() +{ + this->hmx.beginPosition(); + IdentifierToken idenditifierToken; + idenditifierToken.start = this->hmx.getOffset(); + + while(this->hmx.isEnd() == false) + { + bool readed = false; + char c = this->hmx.getchar(); + + if(c >= 'a' && c <= 'z') + { + readed = true; + idenditifierToken.token.push_back(c); + this->hmx.nextChar(); + continue; + } + + if(c >= 'A' && c <= 'Z') + { + readed = true; + idenditifierToken.token.push_back(c); + this->hmx.nextChar(); + continue; + } + + + if(c >= '0' && c <= '9') + { + readed = true; + idenditifierToken.token.push_back(c); + this->hmx.nextChar(); + continue; + } + + switch(c) + { + case '_':{ + readed = true; + idenditifierToken.token.push_back(c); + this->hmx.nextChar(); + break; + } + case '$':{ + readed = true; + idenditifierToken.token.push_back(c); + this->hmx.nextChar(); + break; + } + } + if(readed == false) + { + break; + } + } + idenditifierToken.end = this->hmx.getOffset(); + idenditifierToken.size = idenditifierToken.context.size(); + this->hmx.acceptPosition(); + return idenditifierToken; +} +StringToken Tokenizer::readString() +{ + this->hmx.beginPosition(); + StringToken stringToken; + bool started = false; + bool isended = false; + stringToken.start = this->hmx.getOffset(); + + while(this->hmx.isEnd() == false) + { + char c = this->hmx.getchar(); + stringToken.token.push_back(c); + switch(c) + { + case '"':{ + if(started == false) + { + started = true; + break; + }else{ + isended = true; + break; + } + } + case '\\':{ + this->hmx.nextChar(); + c = this->hmx.getchar(); + stringToken.token.push_back(c); + stringToken.context.push_back(c); + break; + } + default:{ + stringToken.context.push_back(c); + } + } + this->hmx.nextChar(); + if(isended) + { + break; + } + } + stringToken.end = this->hmx.getOffset(); + stringToken.size = stringToken.context.size(); + this->hmx.acceptPosition(); + return stringToken; +} +void Tokenizer::skipOneLineComment() +{ + std::cout << "SkipLineComment\n"; + while(this->hmx.isEnd() == false) + { + if(this->hmx.getchar() == '\n') + { + this->hmx.nextChar(); + this->hmx.skipWhiteSpace(); + return; + }else{ + this->hmx.nextChar(); + } + } +} +void Tokenizer::skipMultiLineComment() +{ + std::cout << "SkipBlockComment\n"; + while(this->hmx.isEnd() == false) + { + if(this->hmx.include("*/",true)) + { + this->hmx.skipWhiteSpace(); + return; + }else{ + this->hmx.nextChar(); + } + } +} + #endif \ No newline at end of file diff --git a/core/Tools.cpp b/core/Tools.cpp new file mode 100644 index 0000000..1471223 --- /dev/null +++ b/core/Tools.cpp @@ -0,0 +1,14 @@ +#include +#ifndef Tools +#define Tools + + +std::string padRight(std::string str, size_t totalLen) { + if (str.size() < totalLen) { + str.append(totalLen - str.size(), ' '); + } + return str; +} + + +#endif \ No newline at end of file