commit 41aec36d196e5900e6b79bbf3eecd8c5eadc8edf Author: abdussamedulutas Date: Tue Dec 23 13:50:09 2025 +0300 delimiters ve operators eklendi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..95e020d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode +saqut \ No newline at end of file diff --git a/Parsing.cpp b/Parsing.cpp new file mode 100644 index 0000000..b244272 --- /dev/null +++ b/Parsing.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include "./core/Tokenizer.cpp" + +int main() +{ + std::string girdi; + + std::cout << "\nsaQut Compiler\n\n"; + + while(true) + { + std::cout << ">> "; + std::getline(std::cin, girdi); + + Tokenizer token; + token.parse(girdi); + + if (girdi == ".exit") + { + exit(0); + }; + std::cout << "\n"; + } + + return 0; +} \ No newline at end of file diff --git a/core/Lexer.cpp b/core/Lexer.cpp new file mode 100644 index 0000000..459c453 --- /dev/null +++ b/core/Lexer.cpp @@ -0,0 +1,388 @@ +#include +#include +#include + +struct INumber { + int start = 0; + int end = 0; + std::string token; + bool isFloat = false; + bool hasEpsilon = false; + int base = 10; + bool positive = true; +}; + +class Lexer { +public: + std::string input = ""; + int size = 0; + int offset = 0; + std::vector offsetMap; + void beginPosition() + { + this->offsetMap.push_back(this->getLastPosition()); + } + int getLastPosition() + { + if(this->offsetMap.size() == 0) + { + return this->offset; + } + else + { + return this->offsetMap[this->offsetMap.size() - 1]; + } + } + void acceptPosition() + { + int T = this->offsetMap[this->offsetMap.size() - 1]; + this->setLastPosition(T); + } + void setLastPosition(int n) + { + if(this->offsetMap.size() == 0) + { + this->offset = n; + } + else + { + this->offsetMap[this->offsetMap.size() - 1] = n; + } + } + bool isEnd() + { + bool result = this->size <= this->getOffset(); + return result; + } + void rejectPosition() + { + this->offsetMap.pop_back(); + } + int * positionRange() + { + int len = this->offsetMap.size(); + if(len == 0) + { + return new int[2]{0, this->offset}; + } + else if(len == 1) + { + return new int[2]{ + this->offset, + this->offsetMap[len - 1] + }; + }else{ + return new int[2]{ + this->offsetMap[len - 2], + this->offsetMap[len - 1] + }; + } + } + std::string getPositionRange() + { + int *A = this->positionRange(); + std::string mem; + + for (int i = A[0]; i < A[1];i++) + { + mem.push_back(this->input.at(i)); + } + return mem; + } + + bool include(std::string word,bool accept = true) + { + this->beginPosition(); + for (int i = 0; i < word.size(); i++) + { + if(this->isEnd()) + { + if(word.size() == i) + { + break; + }else{ + this->rejectPosition(); + return false; + } + } + if(word.at(i) != this->getchar()) + { + this->rejectPosition(); + return false; + } + this->nextChar(); + } + if(accept) + { + this->acceptPosition(); + } + else + { + this->rejectPosition(); + }; + return true; + } + int getOffset() + { + return this->getLastPosition(); + } + int setOffset(int n) + { + this->setLastPosition(n); + return this->getLastPosition(); + } + char getchar(int additionalOffset = 0) + { + int target = this->getOffset() + additionalOffset; + if(this->size - 1 < target) + { + std::cerr << "Hata yanlış erişim\n"; + return '\0'; + }else{ + return this->input.at(target); + } + } + void nextChar() + { + if(this->isEnd() == true) + { + return; + }; + this->setOffset(this->getOffset() + 1); + } + void toChar(int n) + { + if(this->isEnd() == true) + { + return; + }; + this->setOffset(this->getOffset() + n); + } + void setText(std::string input) { + this->input = input; + this->size = input.length(); + } + void skipWhiteSpace() + { + while(this->isEnd() == false) + { + switch(this->getchar()) + { + case '\r': + case '\n': + case '\b': + case '\t': + case ' ':{ + this->nextChar(); + break; + } + default:{ + return; + } + } + } + } + + bool isNumeric() + { + char c = this->getchar(); + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9':{ + return true; + } + default:{ + return false; + } + } + } + + INumber readNumeric(){ + INumber numberToken; + numberToken.start = this->getLastPosition(); + if(this->getchar() == '-') + { + this->nextChar(); + numberToken.positive = false; + }else if(this->getchar() == '+'){ + this->nextChar(); + numberToken.positive = true; + }else{ + numberToken.positive = true; + } + + bool nextDot = false; + if(this->getchar() == '0') + { + numberToken.token.push_back('0'); + this->nextChar(); + char c = this->getchar(); + switch(c) + { + case 'x':{ + numberToken.token.push_back(c); + numberToken.base = 16; + break; + } + case 'b':{ + numberToken.token.push_back(c); + numberToken.base = 2; + break; + } + default:{ + if(c != '.') + { + numberToken.token.push_back(c); + numberToken.base = 8; + }else{ + numberToken.token.push_back(c); + numberToken.base = 10; + nextDot = true; + numberToken.isFloat = true; + } + break; + } + } + this->nextChar(); + }else{ + numberToken.base = 10; + } + + + while(this->isEnd() == false) + { + char c = this->getchar(); + switch (c) + { + case '0': + case '1':{ + numberToken.token.push_back(c); + break; + } + case '2': + case '3': + case '4': + case '5': + case '6': + case '7':{ + if(numberToken.base >= 8) + { + numberToken.token.push_back(c); + break; + }else{ + numberToken.end = this->getLastPosition(); + return numberToken; + } + } + case '8': + case '9':{ + if(numberToken.base >= 10) + { + numberToken.token.push_back(c); + break; + }else{ + numberToken.end = this->getLastPosition(); + return numberToken; + } + } + case 'a': case 'A': + case 'b': case 'B': + case 'c': case 'C': + case 'd': case 'D': + case 'f': case 'F':{ + if(numberToken.base >= 16) + { + numberToken.token.push_back(c); + break; + }else{ + numberToken.end = this->getLastPosition(); + return numberToken; + } + } + case '.':{ + if(nextDot == false) + { + if(numberToken.token.size() == 0) + { + numberToken.token.push_back('0'); + numberToken.token.push_back('.'); + }else{ + numberToken.token.push_back('.'); + } + nextDot = true; + numberToken.isFloat = true; + break; + }else{ + numberToken.end = this->getLastPosition(); + return numberToken; + } + } + case 'e':case 'E':{ + if(numberToken.base == 16) + { + numberToken.token.push_back(c); + break; + } + if(numberToken.base == 10) + { + numberToken.hasEpsilon = true; + numberToken.token.push_back(c); + this->nextChar(); + c = this->getchar(); + + if(c == '+' || c == '-') + { + numberToken.token.push_back(c); + this->nextChar(); + } + + while(this->isEnd() == false) + { + char c = this->getchar(); + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9':{ + numberToken.token.push_back(c); + break; + } + default:{ + numberToken.end = this->getLastPosition(); + return numberToken; + } + } + this->nextChar(); + } + break; + } + numberToken.end = this->getLastPosition(); + return numberToken; + } + default:{ + numberToken.end = this->getLastPosition(); + return numberToken; + } + } + this->nextChar(); + } + numberToken.end = this->getLastPosition(); + return numberToken; + } +}; \ No newline at end of file diff --git a/core/Tokenizer.cpp b/core/Tokenizer.cpp new file mode 100644 index 0000000..e535e5c --- /dev/null +++ b/core/Tokenizer.cpp @@ -0,0 +1,237 @@ +#include +#include +#include +#include + +#include "./Lexer.cpp" + +class Token { + private: + std::string type = ""; + public: + int start = 0; + int end = 0; + std::string token; +}; + +class StringToken : public Token { + private: + std::string type = "string"; + public: + std::string context; + int size = 0; + void log() + { + std::cout << "Token String{" << this->token<<"} Start=" << this->start << " End=" << this->end << " Context{"<< this->context << "} Size="<< this->context.size() <<"\n"; + } +}; +class NumberToken : public Token { + private: + std::string type = "number"; + public: + bool isFloat = false; + bool hasEpsilon = false; + int base = 10; + void log() + { + std::cout << "NumberToken "<< (this->isFloat ? "Float" : "Integer") <<"{" << this->token << "} HasExponent="<< (this->hasEpsilon ? "Yes" : "No") << " Base=" << this->base << " Start=" << this->start << " End=" << this->end << "\n"; + } +}; +class BoolToken : public Token { + private: + std::string type = "boolean"; + public: + void log() + { + std::cout << "BoolToken Value{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; + } +}; +class OperatorToken : public Token { + private: + std::string type = "operator"; + public: + void log() + { + std::cout << "OperatorToken Context{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; + } +}; +class DelimiterToken : public Token { + private: + std::string type = "delimiter"; + public: + void log() + { + std::cout << "DelimiterToken Context{"<token<<"} Start=" << this->start << " End=" << this->end << " \n"; + } +}; + + +const constexpr std::string_view operators[] = { + // --- Mantıksal Karşılaştırma --- + "==", "!=", "<=", ">=", "&&", "||", + + // --- Aritmetik (Çift Karakterli) --- + "++", "--", "<<", ">>", + + // --- Atama Operatörleri --- + "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", + + // --- Aritmetik (Tek Karakterli) --- + "+", "-", "*", "/", "%", "<", ">", + + // --- Bitwise ve Mantıksal (Tek Karakterli) --- + "^", "!", "~", "&", "|", + + // --- Temel Atama --- + "=" +}; + +const constexpr std::string_view delimiters[] = { + // Bağlayıcılar + "->", + "::", + // Sınırlandırıcılar + "[", + "]", + "(", + ")", + "{", + "}", + // Ayırıcılar + ";", + ",", + ":", + // Bağlayıcılar + ".", +}; + + +class Tokenizer { +public: + Lexer hmx; + void parse(std::string input) + { + this->hmx.setText(input); + this->scope(); + } + void scope() + { + this->hmx.skipWhiteSpace(); + // Stringler + if(this->hmx.getchar() == '"') + { + StringToken t = this->readString(); + t.log(); + return; + } + + // Sayılar + if(this->hmx.isNumeric()) + { + INumber lem = this->hmx.readNumeric(); + NumberToken numberToken; + numberToken.base = lem.base; + numberToken.start = lem.start; + numberToken.end = lem.end; + numberToken.hasEpsilon = lem.hasEpsilon; + numberToken.isFloat = lem.isFloat; + numberToken.token = lem.token; + numberToken.log(); + return; + } + + // Boolean + if(this->hmx.include("true",false)) + { + BoolToken BoolToken; + BoolToken.token = "true"; + BoolToken.start = this->hmx.getOffset(); + this->hmx.toChar(+4); + BoolToken.end = this->hmx.getOffset(); + BoolToken.log(); + return; + } + if(this->hmx.include("false",false)) + { + BoolToken BoolToken; + BoolToken.token = "false"; + BoolToken.start = this->hmx.getOffset(); + this->hmx.toChar(+5); + BoolToken.end = this->hmx.getOffset(); + BoolToken.log(); + return; + } + + for (const std::string_view& del : delimiters) { + if(this->hmx.include(std::string(del),false)) + { + DelimiterToken dtoken; + dtoken.start = this->hmx.getOffset(); + this->hmx.toChar(+del.size()); + dtoken.end = this->hmx.getOffset(); + dtoken.token = del; + dtoken.log(); + return; + } + } + + for (const std::string_view& op : operators) { + if(this->hmx.include(std::string(op),false)) + { + OperatorToken optoken; + optoken.start = this->hmx.getOffset(); + this->hmx.toChar(+op.size()); + optoken.end = this->hmx.getOffset(); + optoken.token = op; + optoken.log(); + return; + } + } + } + StringToken readString() + { + this->hmx.beginPosition(); + StringToken stringToken; + bool started = false; + bool isended = false; + stringToken.start = this->hmx.getOffset(); + + while(this->hmx.isEnd() == false) + { + char c = this->hmx.getchar(); + stringToken.token.push_back(c); + switch(c) + { + case '"':{ + if(started == false) + { + started = true; + break; + }else{ + isended = true; + break; + } + } + case '\\':{ + this->hmx.nextChar(); + c = this->hmx.getchar(); + stringToken.token.push_back(c); + stringToken.context.push_back(c); + break; + } + default:{ + stringToken.context.push_back(c); + } + } + this->hmx.nextChar(); + if(isended) + { + break; + } + } + stringToken.end = this->hmx.getOffset(); + stringToken.size = stringToken.context.size(); + this->hmx.acceptPosition(); + return stringToken; + } +}; \ No newline at end of file