From 0b0c588c6478b44410b3653f02fe821605022dbe Mon Sep 17 00:00:00 2001 From: Marcus Huderle Date: Sat, 2 Jul 2022 12:40:04 -0500 Subject: [PATCH] Read object event gfx info by parsing its C file, rather than relying on regex --- include/lib/fex/LICENSE | 21 ++ include/lib/fex/README | 2 + include/lib/fex/array.h | 56 +++ include/lib/fex/array_value.h | 151 ++++++++ include/lib/fex/define_statement.h | 22 ++ include/lib/fex/lexer.h | 121 +++++++ include/lib/fex/parser.h | 48 +++ include/lib/fex/parser_util.h | 19 + include/project.h | 1 + porymap.pro | 9 + src/lib/fex/LICENSE | 21 ++ src/lib/fex/README | 2 + src/lib/fex/lexer.cpp | 505 +++++++++++++++++++++++++++ src/lib/fex/parser.cpp | 538 +++++++++++++++++++++++++++++ src/lib/fex/parser_util.cpp | 50 +++ src/project.cpp | 38 +- 16 files changed, 1599 insertions(+), 5 deletions(-) create mode 100644 include/lib/fex/LICENSE create mode 100644 include/lib/fex/README create mode 100644 include/lib/fex/array.h create mode 100644 include/lib/fex/array_value.h create mode 100644 include/lib/fex/define_statement.h create mode 100644 include/lib/fex/lexer.h create mode 100644 include/lib/fex/parser.h create mode 100644 include/lib/fex/parser_util.h create mode 100644 src/lib/fex/LICENSE create mode 100644 src/lib/fex/README create mode 100644 src/lib/fex/lexer.cpp create mode 100644 src/lib/fex/parser.cpp create mode 100644 src/lib/fex/parser_util.cpp diff --git a/include/lib/fex/LICENSE b/include/lib/fex/LICENSE new file mode 100644 index 00000000..68d1c4a9 --- /dev/null +++ b/include/lib/fex/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Ashley Coleman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/include/lib/fex/README b/include/lib/fex/README new file mode 100644 index 00000000..564d1ae1 --- /dev/null +++ b/include/lib/fex/README @@ -0,0 +1,2 @@ +This is a slightly-modified library for parsing C code originally written by Ashley Coleman +(https://github.com/V-FEXrt). diff --git a/include/lib/fex/array.h b/include/lib/fex/array.h new file mode 100644 index 00000000..60482947 --- /dev/null +++ b/include/lib/fex/array.h @@ -0,0 +1,56 @@ +#ifndef INCLUDE_CORE_ARRAY_H +#define INCLUDE_CORE_ARRAY_H + +#include +#include + +#include "array_value.h" + +namespace fex +{ + class Array + { + public: + Array(std::string type, std::string name) : type_(type), name_(name) {} + + void Add(ArrayValue value) + { + values_.push_back(std::move(value)); + } + + const std::string &type() const + { + return type_; + } + const std::string &name() const + { + return name_; + } + const std::vector &values() const + { + return values_; + } + + std::vector release_values() + { + return std::move(values_); + } + + std::string ToString() const + { + std::string out = name_ + ":\n"; + for (const ArrayValue &v : values_) + { + out += v.ToString() + "\n"; + } + return out; + } + + private: + std::string type_; + std::string name_; + std::vector values_; + }; +} // namespace fex + +#endif // INCLUDE_CORE_ARRAY_H diff --git a/include/lib/fex/array_value.h b/include/lib/fex/array_value.h new file mode 100644 index 00000000..88bb0a93 --- /dev/null +++ b/include/lib/fex/array_value.h @@ -0,0 +1,151 @@ +#ifndef INCLUDE_CORE_ARRAY_VALUE_H +#define INCLUDE_CORE_ARRAY_VALUE_H + +#include +#include +#include + +namespace fex +{ + class ArrayValue + { + public: + enum class Type + { + kNumber, // Number literal + kString, // String literal + kIdentifier, // Identifier + + kValueList, // Value, Value, Value, Value + kValuePair, // Identifier = ArrayValue + + kEmpty, + }; + + Type type() const + { + return type_; + } + + const std::vector &values() const + { + return values_; + } + + std::vector release_values() + { + return std::move(values_); + } + + const std::pair> &pair() const + { + return pair_; + } + + const std::string &string_value() const + { + return string_value_; + } + + int int_value() const + { + return int_value_; + } + + void set_type(const Type& type) + { + type_ = type; + } + + void set_string_value(const std::string& value) + { + string_value_ = value; + } + + void set_int_value(int value) + { + int_value_ = value; + } + + void set_values(std::vector values) + { + values_ = std::move(values); + } + + std::string ToString() const + { + switch (type_) + { + case Type::kEmpty: + return "kEmpty: {}"; + case Type::kNumber: + return "kNumber: " + std::to_string(int_value_); + case Type::kString: + return "kString: \"" + string_value_ + "\""; + case Type::kIdentifier: + return "kIdentifier: " + string_value_; + case Type::kValueList: + { + std::string out = "kValueList: {\n"; + for (const ArrayValue &v : values_) + { + out += "\t" + v.ToString() + ",\n"; + } + return out + "}\n"; + } + case Type::kValuePair: + return "kValuePair: " + pair_.first + " = " + pair_.second->ToString() + "\n"; + } + } + + static ArrayValue Empty() + { + return ArrayValue(ArrayValue::Type::kEmpty); + } + + static ArrayValue Number(int value) + { + return ArrayValue(ArrayValue::Type::kNumber, value); + } + + static ArrayValue String(std::string value) + { + return ArrayValue(ArrayValue::Type::kString, value); + } + + static ArrayValue Identifier(std::string value) + { + return ArrayValue(ArrayValue::Type::kIdentifier, value); + } + + static ArrayValue ValueList(std::vector values) + { + return ArrayValue(ArrayValue::Type::kValueList, std::move(values)); + } + static ArrayValue ValuePair(std::pair> value) + { + return ArrayValue(ArrayValue::Type::kValuePair, std::move(value)); + } + + ArrayValue(Type type) : type_(type) {} + ArrayValue(Type type, int value) : type_(type), int_value_(value) {} + ArrayValue(Type type, std::string value) : type_(type), string_value_(value) {} + ArrayValue(Type type, std::vector values) : type_(type), values_(std::move(values)) {} + ArrayValue(Type type, std::pair> pair) : type_(type), pair_(std::move(pair)) {} + ArrayValue() {} + + private: + Type type_; + + // Number + int int_value_; + // String, Identifier + std::string string_value_; + // ValueList + std::vector values_; + // ValuePair + std::pair> pair_ = std::pair>("", nullptr); + }; +} // namespace fex + +#endif // INCLUDE_CORE_ARRAY_VALUE_H diff --git a/include/lib/fex/define_statement.h b/include/lib/fex/define_statement.h new file mode 100644 index 00000000..8bf90b46 --- /dev/null +++ b/include/lib/fex/define_statement.h @@ -0,0 +1,22 @@ +#ifndef INCLUDE_CORE_DEFINE_STATEMENT_H +#define INCLUDE_CORE_DEFINE_STATEMENT_H + +#include + +namespace fex +{ + class DefineStatement + { + public: + DefineStatement(std::string name, int value) : name_(name), value_(value) {} + + const std::string &name() const { return name_; } + int value() const { return value_; } + + private: + std::string name_; + int value_; + }; +} // namespace fex + +#endif // INCLUDE_CORE_DEFINE_STATEMENT_H diff --git a/include/lib/fex/lexer.h b/include/lib/fex/lexer.h new file mode 100644 index 00000000..b888e86b --- /dev/null +++ b/include/lib/fex/lexer.h @@ -0,0 +1,121 @@ +#ifndef INCLUDE_CORE_LEXER_H +#define INCLUDE_CORE_LEXER_H + +#include +#include + +namespace fex +{ + class Token + { + public: + enum class Type + { + // Macros + kIfDef, + kIfNDef, + kDefine, + kEndIf, + kInclude, + + // Identifiers + kIdentifier, + + // Keywords + kExtern, + kConst, + kStruct, + + // Literals + kNumber, + kString, + + // Symbols + kOpenParen, + kCloseParen, + kLessThan, + kGreaterThan, + kLessThanEqual, + kGreaterThanEqual, + kEqual, + kLeftShift, + kRightShift, + kPlus, + kMinus, + kTimes, + kDivide, + kBitXor, + kBitAnd, + kBitOr, + kLogicalAnd, + kLogicalOr, + kQuote, + kComma, + kSemicolon, + kOpenSquare, + kCloseSquare, + kOpenCurly, + kCloseCurly, + kPeriod, + kUnderscore, + }; + + Token(Type type, std::string filename, int line_number) : type_(type), filename_(filename), line_number_(line_number) {} + Token(Type type, std::string filename, int line_number, std::string string_value) : type_(type), string_value_(string_value) , filename_(filename), line_number_(line_number) {} + Token(Type type, std::string filename, int line_number, int int_value) : type_(type), int_value_(int_value), filename_(filename), line_number_(line_number) {} + + Type type() const { return type_; } + const std::string &string_value() const { return string_value_; } + int int_value() const { return int_value_; } + + const std::string &filename() const { return filename_; } + int line_number() const { return line_number_; } + + std::string ToString() const; + + private: + Type type_; + std::string string_value_; + int int_value_; + + std::string filename_ = ""; + int line_number_ = 0; + }; + + class Lexer + { + public: + Lexer() = default; + ~Lexer() = default; + + std::vector LexFile(const std::string &path); + std::vector LexString(const std::string &data); + void LexFileDumpTokens(const std::string &path, const std::string &out); + + private: + std::vector Lex(); + char Peek(); + char Next(); + bool IsNumber(); + bool IsAlpha(); + bool IsHexAlpha(); + bool IsAlphaNumber(); + bool IsWhitespace(); + + Token ConsumeIdentifier(); + Token ConsumeKeyword(Token identifier); + Token ConsumeNumber(); + Token ConsumeString(); + Token ConsumeMacro(); + + std::string ReadIdentifier(); + + std::string data_ = ""; + uint32_t index_ = 0; + + std::string filename_ = ""; + int line_number_ = 1; + }; +} // namespace fex + +#endif // INCLUDE_CORE_LEXER_H diff --git a/include/lib/fex/parser.h b/include/lib/fex/parser.h new file mode 100644 index 00000000..6a6b9e43 --- /dev/null +++ b/include/lib/fex/parser.h @@ -0,0 +1,48 @@ +#ifndef INCLUDE_CORE_PARSER_H +#define INCLUDE_CORE_PARSER_H + +#include +#include +#include + +#include "array.h" +#include "array_value.h" +#include "define_statement.h" +#include "lexer.h" + +namespace fex +{ + class Parser + { + public: + Parser() = default; + + std::vector Parse(std::vector tokens); + std::vector ParseTopLevelArrays(std::vector tokens); + std::map ParseTopLevelObjects(std::vector tokens); + + std::map ReadDefines(const std::string &filename, std::vector matching); + + private: + int EvaluateExpression(std::vector tokens); + int ResolveIdentifier(); + int ResolveIdentifier(const Token &token); + int GetPrecedence(const Token &token); + bool IsOperator(const Token &token); + bool IsParamMacro(); + std::vector ToPostfix(); + DefineStatement ParseDefine(); + + ArrayValue ParseObject(); + + Token Peek(); + Token Next(); + + unsigned long index_; + std::vector tokens_; + + std::map top_level_; + }; +} // namespace fex + +#endif // INCLUDE_CORE_PARSER_H diff --git a/include/lib/fex/parser_util.h b/include/lib/fex/parser_util.h new file mode 100644 index 00000000..58ff89fc --- /dev/null +++ b/include/lib/fex/parser_util.h @@ -0,0 +1,19 @@ +#ifndef PARSER_UTIL_H +#define PARSER_UTIL_H + +#include +#include + +class ParserUtil +{ +public: + ParserUtil(QString root); + QStringList ReadDefines(QString filename, QString prefix); + QStringList ReadDefinesValueSort(QString filename, QString prefix); + +private: + QString root_; +}; + + +#endif // PARSER_UTIL_H diff --git a/include/project.h b/include/project.h index 97730481..9cca1936 100644 --- a/include/project.h +++ b/include/project.h @@ -187,6 +187,7 @@ public: bool readObjEventGfxConstants(); bool readSongNames(); bool readEventGraphics(); + QMap> readObjEventGfxInfo(); void setEventPixmap(Event * event, bool forceLoad = false); diff --git a/porymap.pro b/porymap.pro index 89808d2c..1b78ebcc 100644 --- a/porymap.pro +++ b/porymap.pro @@ -32,6 +32,9 @@ SOURCES += src/core/block.cpp \ src/core/regionmap.cpp \ src/core/wildmoninfo.cpp \ src/core/editcommands.cpp \ + src/lib/fex/lexer.cpp \ + src/lib/fex/parser.cpp \ + src/lib/fex/parser_util.cpp \ src/lib/orderedjson.cpp \ src/core/regionmapeditcommands.cpp \ src/mainwindow_scriptapi.cpp \ @@ -108,6 +111,12 @@ HEADERS += include/core/block.h \ include/core/wildmoninfo.h \ include/core/editcommands.h \ include/core/regionmapeditcommands.h \ + include/lib/fex/array.h \ + include/lib/fex/array_value.h \ + include/lib/fex/define_statement.h \ + include/lib/fex/lexer.h \ + include/lib/fex/parser.h \ + include/lib/fex/parser_util.h \ include/lib/orderedmap.h \ include/lib/orderedjson.h \ include/ui/aboutporymap.h \ diff --git a/src/lib/fex/LICENSE b/src/lib/fex/LICENSE new file mode 100644 index 00000000..68d1c4a9 --- /dev/null +++ b/src/lib/fex/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Ashley Coleman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/lib/fex/README b/src/lib/fex/README new file mode 100644 index 00000000..564d1ae1 --- /dev/null +++ b/src/lib/fex/README @@ -0,0 +1,2 @@ +This is a slightly-modified library for parsing C code originally written by Ashley Coleman +(https://github.com/V-FEXrt). diff --git a/src/lib/fex/lexer.cpp b/src/lib/fex/lexer.cpp new file mode 100644 index 00000000..2dd4b249 --- /dev/null +++ b/src/lib/fex/lexer.cpp @@ -0,0 +1,505 @@ +#include "lib/fex/lexer.h" + +#include +#include +#include + +namespace fex +{ + + bool Lexer::IsNumber() + { + char c = Peek(); + return (c >= '0' && c <= '9'); + } + + bool Lexer::IsWhitespace() + { + char c = Peek(); + return (c == ' ' || c == '\t' || c == '\r' || c == '\n'); + } + + bool Lexer::IsHexAlpha() + { + char c = Peek(); + return ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); + } + + bool Lexer::IsAlpha() + { + char c = Peek(); + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); + } + + bool Lexer::IsAlphaNumber() + { + return IsAlpha() || IsNumber(); + }; + + char Lexer::Peek() + { + return data_[index_]; + } + + char Lexer::Next() + { + char c = Peek(); + index_++; + return c; + } + + Token Lexer::ConsumeKeyword(Token identifier) + { + const std::string &value = identifier.string_value(); + + if (value == "extern") + { + return Token(Token::Type::kExtern, identifier.filename(), identifier.line_number()); + } + if (value == "const") + { + return Token(Token::Type::kConst, identifier.filename(), identifier.line_number()); + } + if (value == "struct") + { + return Token(Token::Type::kStruct, identifier.filename(), identifier.line_number()); + } + + return identifier; + } + + Token Lexer::ConsumeIdentifier() + { + std::string identifer = ""; + + while (IsAlphaNumber() || Peek() == '_') + { + identifer += Next(); + } + + return ConsumeKeyword(Token(Token::Type::kIdentifier, filename_, line_number_, identifer)); + } + + Token Lexer::ConsumeNumber() + { + std::string identifer = ""; + + if (Peek() == '0') + { + identifer += Next(); + if (Peek() == 'x') + { + identifer += Next(); + } + + while (IsNumber() || IsHexAlpha()) + { + identifer += Next(); + } + + return Token(Token::Type::kNumber, filename_, line_number_, std::stoi(identifer, nullptr, 16)); + } + + while (IsNumber()) + { + identifer += Next(); + } + + return Token(Token::Type::kNumber, filename_, line_number_, std::stoi(identifer)); + } + + // TODO: Doesn't currently support escape characters + Token Lexer::ConsumeString() + { + std::string value = ""; + if (Next() != '\"') + { + // Error + } + + // TODO: error if we never see a quote + while (Peek() != '\"') + { + value += Next(); + } + Next(); // Consume final quote + return Token(Token::Type::kString, filename_, line_number_, value); + } + + Token Lexer::ConsumeMacro() + { + Token id = ConsumeIdentifier(); + + if (id.string_value() == "ifdef") + { + return Token(Token::Type::kIfDef, filename_, line_number_); + } + if (id.string_value() == "ifndef") + { + return Token(Token::Type::kIfNDef, filename_, line_number_); + } + if (id.string_value() == "define") + { + return Token(Token::Type::kDefine, filename_, line_number_); + } + if (id.string_value() == "endif") + { + return Token(Token::Type::kEndIf, filename_, line_number_); + } + + if (id.string_value() == "include") + { + return Token(Token::Type::kInclude, filename_, line_number_); + } + + return Token(Token::Type::kDefine, filename_, line_number_); + } + + std::vector Lexer::LexString(const std::string &data) + { + filename_ = "string literal"; + line_number_ = 1; + index_ = 0; + data_ = data; + + return Lex(); + } + + std::vector Lexer::LexFile(const std::string &path) + { + filename_ = path; + line_number_ = 1; + + std::ifstream file; + file.open(path); + + std::stringstream stream; + stream << file.rdbuf(); + + index_ = 0; + data_ = stream.str(); + + file.close(); + + return Lex(); + } + + void Lexer::LexFileDumpTokens(const std::string &path, const std::string &out) + { + std::ofstream file; + file.open(out); + + for (Token token : LexFile(path)) + { + file << token.ToString() << std::endl; + } + + file.close(); + } + + std::vector Lexer::Lex() + { + std::vector tokens; + + while (index_ < data_.length()) + { + while (IsWhitespace()) + { + if (Peek() == '\n') + { + line_number_++; + } + Next(); + } + + if (IsAlpha()) + { + tokens.push_back(ConsumeIdentifier()); + continue; + } + + if (IsNumber()) + { + tokens.push_back(ConsumeNumber()); + continue; + } + + switch (Peek()) + { + case '*': + Next(); + tokens.push_back(Token(Token::Type::kTimes, filename_, line_number_)); + break; + case '-': + Next(); + tokens.push_back(Token(Token::Type::kMinus, filename_, line_number_)); + break; + case '+': + Next(); + tokens.push_back(Token(Token::Type::kPlus, filename_, line_number_)); + break; + case '(': + Next(); + tokens.push_back(Token(Token::Type::kOpenParen, filename_, line_number_)); + break; + case ')': + Next(); + tokens.push_back(Token(Token::Type::kCloseParen, filename_, line_number_)); + break; + case '&': + Next(); + if (Peek() == '&') + { + Next(); + tokens.push_back(Token(Token::Type::kLogicalAnd, filename_, line_number_)); + break; + } + tokens.push_back(Token(Token::Type::kBitAnd, filename_, line_number_)); + break; + case '|': + Next(); + if (Peek() == '|') + { + Next(); + tokens.push_back(Token(Token::Type::kLogicalOr, filename_, line_number_)); + break; + } + tokens.push_back(Token(Token::Type::kBitOr, filename_, line_number_)); + break; + case '^': + Next(); + tokens.push_back(Token(Token::Type::kBitXor, filename_, line_number_)); + break; + case ',': + Next(); + tokens.push_back(Token(Token::Type::kComma, filename_, line_number_)); + break; + case '=': + Next(); + tokens.push_back(Token(Token::Type::kEqual, filename_, line_number_)); + break; + case ';': + Next(); + tokens.push_back(Token(Token::Type::kSemicolon, filename_, line_number_)); + break; + case '[': + Next(); + tokens.push_back(Token(Token::Type::kOpenSquare, filename_, line_number_)); + break; + case ']': + Next(); + tokens.push_back(Token(Token::Type::kCloseSquare, filename_, line_number_)); + break; + case '{': + Next(); + tokens.push_back(Token(Token::Type::kOpenCurly, filename_, line_number_)); + break; + case '}': + Next(); + tokens.push_back(Token(Token::Type::kCloseCurly, filename_, line_number_)); + break; + case '.': + Next(); + tokens.push_back(Token(Token::Type::kPeriod, filename_, line_number_)); + break; + case '_': + Next(); + tokens.push_back(Token(Token::Type::kUnderscore, filename_, line_number_)); + break; + case '#': + Next(); + tokens.push_back(ConsumeMacro()); + break; + case '\"': + tokens.push_back(ConsumeString()); + break; + case '<': + Next(); + if (Peek() == '<') + { + Next(); + tokens.push_back(Token(Token::Type::kLeftShift, filename_, line_number_)); + break; + } + if (Peek() == '=') + { + Next(); + tokens.push_back(Token(Token::Type::kLessThanEqual, filename_, line_number_)); + break; + } + tokens.push_back(Token(Token::Type::kLessThan, filename_, line_number_)); + break; + case '>': + Next(); + if (Peek() == '>') + { + Next(); + tokens.push_back(Token(Token::Type::kRightShift, filename_, line_number_)); + break; + } + if (Peek() == '=') + { + Next(); + tokens.push_back(Token(Token::Type::kGreaterThanEqual, filename_, line_number_)); + break; + } + tokens.push_back(Token(Token::Type::kGreaterThan, filename_, line_number_)); + break; + + case '/': + Next(); + switch (Peek()) + { + case '/': + while (Next() != '\n') + ; + continue; + case '*': + while (Next() != '*') + ; + Next(); // last / + continue; + default: + tokens.push_back(Token(Token::Type::kDivide, filename_, line_number_)); + continue; + } + + case '\0': + Next(); + break; + + default: + char c = Next(); + std::cout << "[WARNING] Unable to lex unknown char: '" << c << "' (0x" << std::hex << (int)c << ")" << std::endl; + break; + } + } + + return tokens; + } + + std::string Token::ToString() const + { + std::string out = filename() + ":" + std::to_string(line_number()) + " - "; + switch (type()) + { + case Token::Type::kIfDef: + out += "Macro: IfDef"; + break; + case Token::Type::kIfNDef: + out += "Macro: IfNDef"; + break; + case Token::Type::kDefine: + out += "Macro: Define"; + break; + case Token::Type::kEndIf: + out += "Macro: EndIf"; + break; + case Token::Type::kInclude: + out += "Macro: Include"; + break; + case Token::Type::kNumber: + out += "Number: " + std::to_string(int_value()); + break; + case Token::Type::kString: + out += "String: " + string_value(); + break; + case Token::Type::kIdentifier: + out += "Identifier: " + string_value(); + break; + case Token::Type::kOpenParen: + out += "Symbol: ("; + break; + case Token::Type::kCloseParen: + out += "Symbol: )"; + break; + case Token::Type::kLessThan: + out += "Symbol: <"; + break; + case Token::Type::kGreaterThan: + out += "Symbol: >"; + break; + case Token::Type::kLeftShift: + out += "Symbol: <<"; + break; + case Token::Type::kRightShift: + out += "Symbol: >>"; + break; + case Token::Type::kPlus: + out += "Symbol: +"; + break; + case Token::Type::kMinus: + out += "Symbol: -"; + break; + case Token::Type::kTimes: + out += "Symbol: *"; + break; + case Token::Type::kDivide: + out += "Symbol: /"; + break; + case Token::Type::kBitXor: + out += "Symbol: ^"; + break; + case Token::Type::kBitAnd: + out += "Symbol: &"; + break; + case Token::Type::kBitOr: + out += "Symbol: |"; + break; + case Token::Type::kQuote: + out += "Symbol: \""; + break; + case Token::Type::kComma: + out += "Symbol: ,"; + break; + case Token::Type::kLessThanEqual: + out += "Symbol: <="; + break; + case Token::Type::kGreaterThanEqual: + out += "Symbol: >="; + break; + case Token::Type::kEqual: + out += "Symbol: ="; + break; + case Token::Type::kLogicalAnd: + out += "Symbol: &&"; + break; + case Token::Type::kLogicalOr: + out += "Symbol: ||"; + break; + case Token::Type::kSemicolon: + out += "Symbol: ;"; + break; + case Token::Type::kExtern: + out += "Keyword: extern"; + break; + case Token::Type::kConst: + out += "Keyword: const"; + break; + case Token::Type::kStruct: + out += "Keyword: struct"; + break; + case Token::Type::kOpenSquare: + out += "Symbol: ["; + break; + case Token::Type::kCloseSquare: + out += "Symbol: ]"; + break; + case Token::Type::kOpenCurly: + out += "Symbol: {"; + break; + case Token::Type::kCloseCurly: + out += "Symbol: }"; + break; + case Token::Type::kPeriod: + out += "Symbol: ."; + break; + case Token::Type::kUnderscore: + out += "Symbol: _"; + break; + } + + return out; + } + +} // namespace fex diff --git a/src/lib/fex/parser.cpp b/src/lib/fex/parser.cpp new file mode 100644 index 00000000..437ab1aa --- /dev/null +++ b/src/lib/fex/parser.cpp @@ -0,0 +1,538 @@ +#include "lib/fex/parser.h" + +#include +#include +#include + +namespace fex +{ + int Parser::GetPrecedence(const Token &token) + { + switch (token.type()) + { + case Token::Type::kTimes: + return 3; + case Token::Type::kDivide: + return 3; + case Token::Type::kPlus: + return 4; + case Token::Type::kMinus: + return 4; + case Token::Type::kLeftShift: + return 5; + case Token::Type::kRightShift: + return 5; + case Token::Type::kBitAnd: + return 8; + case Token::Type::kBitXor: + return 9; + case Token::Type::kBitOr: + return 10; + + default: + { + std::cout << "Asked for precedence of unmapped token: " << token.ToString() << std::endl; + return 0; + } + } + } + + std::vector Parser::ToPostfix() + { + std::vector types = { + Token::Type::kNumber, + Token::Type::kIdentifier, + Token::Type::kOpenParen, + Token::Type::kCloseParen, + Token::Type::kLeftShift, + Token::Type::kRightShift, + Token::Type::kPlus, + Token::Type::kMinus, + Token::Type::kTimes, + Token::Type::kDivide, + Token::Type::kBitXor, + Token::Type::kBitAnd, + Token::Type::kBitOr, + }; + + std::vector output; + std::vector stack; + + while (std::find(types.begin(), types.end(), Peek().type()) != types.end()) + { + Token token = Next(); + if (token.type() == Token::Type::kNumber || token.type() == Token::Type::kIdentifier) + { + output.push_back(token); + } + else if (token.type() == Token::Type::kOpenParen) + { + stack.push_back(token); + } + else if (token.type() == Token::Type::kCloseParen) + { + while (!stack.empty() && stack.back().type() != Token::Type::kOpenParen) + { + Token back = stack.back(); + stack.pop_back(); + output.push_back(back); + } + + // Next(); + + if (!stack.empty()) + { + // pop the left parenthesis token + stack.pop_back(); + } + else + { + std::cout << "Mismatched parentheses detected in expression!" << std::endl; + } + } + else + { + // token is an operator + while (!stack.empty() && stack.back().type() != Token::Type::kOpenParen && GetPrecedence(stack.back()) <= GetPrecedence(token)) + { + Token back = stack.back(); + stack.pop_back(); + output.push_back(back); + } + stack.push_back(token); + } + } + + while (!stack.empty()) + { + if (stack.back().type() == Token::Type::kOpenParen || stack.back().type() == Token::Type::kCloseParen) + { + std::cout << "Mismatched parentheses detected in expression!" << std::endl; + } + else + { + Token back = stack.back(); + stack.pop_back(); + output.push_back(back); + } + } + + return output; + } + + Token Parser::Peek() { return tokens_[index_]; } + + Token Parser::Next() + { + Token t = Peek(); + index_++; + return t; + } + + int Parser::ResolveIdentifier(const Token &token) + { + std::string iden_val = token.string_value(); + + if (top_level_.find(iden_val) == top_level_.end()) + { + std::cout << "[WARNING] Unknown identifier " << iden_val << std::endl; + return 0; + } + + return top_level_[iden_val]; + } + + bool Parser::IsOperator(const Token &token) + { + std::vector types = { + Token::Type::kLeftShift, + Token::Type::kRightShift, + Token::Type::kPlus, + Token::Type::kMinus, + Token::Type::kTimes, + Token::Type::kDivide, + Token::Type::kBitXor, + Token::Type::kBitAnd, + Token::Type::kBitOr, + }; + return std::find(types.begin(), types.end(), token.type()) != types.end(); + } + + int Parser::EvaluateExpression(std::vector tokens) + { + std::vector stack; + for (Token token : tokens) + { + if (IsOperator(token) && stack.size() > 1) + { + int op2 = stack.back().int_value(); + stack.pop_back(); + int op1 = stack.back().int_value(); + stack.pop_back(); + int result = 0; + if (token.type() == Token::Type::kTimes) + { + result = op1 * op2; + } + if (token.type() == Token::Type::kDivide) + { + result = op1 / op2; + } + if (token.type() == Token::Type::kPlus) + { + result = op1 + op2; + } + if (token.type() == Token::Type::kMinus) + { + result = op1 - op2; + } + if (token.type() == Token::Type::kLeftShift) + { + result = op1 << op2; + } + if (token.type() == Token::Type::kRightShift) + { + result = op1 >> op2; + } + if (token.type() == Token::Type::kBitAnd) + { + result = op1 & op2; + } + if (token.type() == Token::Type::kBitXor) + { + result = op1 ^ op2; + } + if (token.type() == Token::Type::kBitOr) + { + result = op1 | op2; + } + + stack.push_back(Token(Token::Type::kNumber, token.filename(), token.line_number(), result)); + } + + if (token.type() == Token::Type::kNumber) + { + stack.push_back(token); + } + + if (token.type() == Token::Type::kIdentifier) + { + stack.push_back(Token(Token::Type::kNumber, token.filename(), token.line_number(), ResolveIdentifier(token))); + } + } + return stack.size() ? stack.back().int_value() : 0; + } + + bool Parser::IsParamMacro() + { + int save_index = index_; + + if (Peek().type() != Token::Type::kOpenParen) + { + return false; + } + + Next(); // Consume open so next if doesn't see it + + while (Peek().type() != Token::Type::kCloseParen) + { + // Nested parens aren't allowed in param list + if (Peek().type() == Token::Type::kOpenParen) + { + index_ = save_index; + return false; + } + + Next(); + } + // Consume closing + Next(); + + std::vector types = { + Token::Type::kNumber, + Token::Type::kIdentifier, + Token::Type::kOpenParen, + Token::Type::kCloseParen, + Token::Type::kLeftShift, + Token::Type::kRightShift, + Token::Type::kPlus, + Token::Type::kMinus, + Token::Type::kTimes, + Token::Type::kDivide, + Token::Type::kBitXor, + Token::Type::kBitAnd, + Token::Type::kBitOr, + }; + + // read value before resetting. + bool out = std::find(types.begin(), types.end(), Peek().type()) != types.end(); + index_ = save_index; + return out; + } + + DefineStatement Parser::ParseDefine() + { + if (Next().type() != Token::Type::kDefine) + { + // error + } + + if (Peek().type() != Token::Type::kIdentifier) + { + // error + } + + std::string identifer = Next().string_value(); + int value = 0; + + if (IsParamMacro()) + { + std::cout << "[WARNING] Macro:" << identifer << " has parameters which is not currently supported. Returning a dummy value instead." << std::endl; + value = 0xDEAD; + // Parameters (x, y, x) Expression + Next(); + + while (Peek().type() != Token::Type::kCloseParen) + { + auto formal = Next().string_value(); + if (Peek().type() == Token::Type::kComma) + { + Next(); + } + } + + Next(); + + // In all current use cases, the macro is #define MACRO(a, b, c) (( something )) + // we have consumed through the parameter list at this point. Consume all remaing + // contents inside of parens + if (Peek().type() != Token::Type::kOpenParen) + { + std::cout << "[FATAL] Must seen open parenthesis to continue processing." << std::endl; + abort(); + } + + Next(); + int paren_count = 1; + while (paren_count > 0) + { + if (Peek().type() == Token::Type::kOpenParen) + { + paren_count++; + } + if (Peek().type() == Token::Type::kCloseParen) + { + paren_count--; + } + + Next(); + } + } + else + { + value = EvaluateExpression(ToPostfix()); + } + + top_level_[identifer] = value; + return DefineStatement(identifer, value); + } + + std::map Parser::ReadDefines(const std::string &filename, std::vector matching) + { + std::map out; + + Lexer lexer; + auto tokens = lexer.LexFile(filename); + auto defines = Parse(tokens); + + for (const auto &define : defines) + { + for (const std::string &match : matching) + { + if (std::regex_match(define.name(), std::regex(match))) + { + out[define.name()] = define.value(); + } + } + } + + return out; + } + + ArrayValue Parser::ParseObject() + { + if (Peek().type() == Token::Type::kOpenSquare) + { + Next(); // [ + std::string identifier = Next().string_value(); + Next(); // ] + Next(); // = + std::unique_ptr value = std::unique_ptr(new ArrayValue(ParseObject())); + + std::pair> pair(identifier, std::move(value)); + return ArrayValue::ValuePair(std::move(pair)); + } + + if (Peek().type() == Token::Type::kOpenCurly) + { + std::vector values; + Next(); // { + values.push_back(ParseObject()); + while(Peek().type() == Token::Type::kComma) { + Next(); + values.push_back(ParseObject()); + } + Next(); // } + + if (values.size() == 1) { + return std::move(values[0]); + } + + return ArrayValue::ValueList(std::move(values)); + } + + if (Peek().type() == Token::Type::kNumber) + { + int value = Next().int_value(); + return ArrayValue::Number(value); + } + + if (Peek().type() == Token::Type::kBitAnd) + { + // Just skip past any reference indicators before identifiers. + // This is not the right way to handle this, but it's good enough + // for our parsing needs. + Next(); // & + } + + if (Peek().type() == Token::Type::kIdentifier) + { + std::vector idens = {}; + idens.push_back(ArrayValue::Identifier(Next().string_value())); + + // NELEMS(...) + if (Peek().type() == Token::Type::kOpenParen) + { + while (Peek().type() != Token::Type::kCloseParen) + { + std::string out = Next().ToString(); + } + Next(); // ) + } + + // ABC | DEF | GHI + while (Peek().type() == Token::Type::kBitOr) { + Next(); + idens.push_back(ArrayValue::Identifier(Next().string_value())); + } + + if (idens.size() == 1) + { + return std::move(idens[0]); + } + + return ArrayValue::ValueList(std::move(idens)); + } + + if (Peek().type() == Token::Type::kUnderscore) + { + Next(); // _ + Next(); // ( + std::string value = Next().string_value(); + Next(); // ) + return ArrayValue::String(value); + } + + if (Peek().type() == Token::Type::kPeriod) + { + Next(); // . + std::string identifier = Next().string_value(); + Next(); // = + + std::unique_ptr value = std::unique_ptr(new ArrayValue(ParseObject())); + + std::pair> pair(identifier, std::move(value)); + return ArrayValue::ValuePair(std::move(pair)); + } + + return ArrayValue::Empty(); + } + + std::vector Parser::ParseTopLevelArrays(std::vector tokens) + { + index_ = 0; + tokens_ = std::move(tokens); + + std::vector items; + + while (index_ < tokens_.size()) + { + while (Next().type() != Token::Type::kConst) + ; + Next(); // struct + + std::string type = Next().string_value(); + std::string name = Next().string_value(); + + Array value(type, name); + + Next(); // [ + Next(); // ] + Next(); // = + value.Add(ParseObject()); + Next(); // ; + + items.push_back(std::move(value)); + } + + return items; + } + + std::map Parser::ParseTopLevelObjects(std::vector tokens) + { + index_ = 0; + tokens_ = std::move(tokens); + + std::map items; + + while (index_ < tokens_.size()) + { + while (Next().type() != Token::Type::kConst) + ; + Next(); // struct + + Next(); // type + std::string name = Next().string_value(); + + Next(); // = + items[name] = ParseObject(); + Next(); // ; + } + + return items; + } + + std::vector Parser::Parse(std::vector tokens) + { + index_ = 0; + tokens_ = std::move(tokens); + std::vector statements; + + while (index_ < tokens_.size()) + { + switch (Peek().type()) + { + case Token::Type::kDefine: + statements.push_back(ParseDefine()); + break; + + default: + Next(); + break; + } + } + + return statements; + } + +} // namespace fex diff --git a/src/lib/fex/parser_util.cpp b/src/lib/fex/parser_util.cpp new file mode 100644 index 00000000..0f375b81 --- /dev/null +++ b/src/lib/fex/parser_util.cpp @@ -0,0 +1,50 @@ +#include "lib/fex/parser_util.h" + +#include + +#include "lib/fex/parser.h" + +ParserUtil::ParserUtil(QString root): root_(root) {} + +QStringList ParserUtil::ReadDefines(QString filename, QString prefix) +{ + if (filename.isEmpty()) { + return QStringList(); + } + + QString filepath = root_ + "/" + filename; + + fex::Parser parser; + + std::vector match_list = { prefix.toStdString() + ".*" }; + std::map defines = parser.ReadDefines(filepath.toStdString(), match_list); + + QStringList out; + for(auto const& define : defines) { + out.append(QString::fromStdString(define.first)); + } + + return out; +} + +QStringList ParserUtil::ReadDefinesValueSort(QString filename, QString prefix) +{ + + if (filename.isEmpty()) { + return QStringList(); + } + + QString filepath = root_ + "/" + filename; + + fex::Parser parser; + + std::vector match_list = { prefix.toStdString() + ".*" }; + std::map defines = parser.ReadDefines(filepath.toStdString(), match_list); + + QMultiMap defines_keyed_by_value; + for (const auto& pair : defines) { + defines_keyed_by_value.insert(pair.second, QString::fromStdString(pair.first)); + } + + return defines_keyed_by_value.values(); +} diff --git a/src/project.cpp b/src/project.cpp index 797ca4f1..39e55150 100644 --- a/src/project.cpp +++ b/src/project.cpp @@ -10,6 +10,8 @@ #include "map.h" #include "orderedjson.h" +#include "lib/fex/lexer.h" +#include "lib/fex/parser.h" #include #include @@ -2471,16 +2473,20 @@ bool Project::readEventGraphics() { qDeleteAll(eventGraphicsMap); eventGraphicsMap.clear(); QStringList gfxNames = gfxDefines.keys(); + QMap> gfxInfos = readObjEventGfxInfo(); for (QString gfxName : gfxNames) { EventGraphics * eventGraphics = new EventGraphics; QString info_label = pointerHash[gfxName].replace("&", ""); - QStringList gfx_info = parser.readCArray("src/data/object_events/object_event_graphics_info.h", info_label); + if (!gfxInfos.contains(info_label)) + continue; - eventGraphics->inanimate = (gfx_info.value(8) == "TRUE"); - QString pic_label = gfx_info.value(14); - QString dimensions_label = gfx_info.value(11); - QString subsprites_label = gfx_info.value(12); + QMapgfxInfoAttributes = gfxInfos[info_label]; + + eventGraphics->inanimate = gfxInfoAttributes.value("inanimate") == "TRUE"; + QString pic_label = gfxInfoAttributes.value("images"); + QString dimensions_label = gfxInfoAttributes.value("oam"); + QString subsprites_label = gfxInfoAttributes.value("subspriteTables"); QString gfx_label = parser.readCArray("src/data/object_events/object_event_pic_tables.h", pic_label).value(0); gfx_label = gfx_label.section(QRegularExpression("[\\(\\)]"), 1, 1); @@ -2515,6 +2521,28 @@ bool Project::readEventGraphics() { return true; } +QMap> Project::readObjEventGfxInfo() { + // TODO: refactor this to be more general if we end up directly parsing C + // for more use cases in the future. + auto cParser = fex::Parser(); + auto tokens = fex::Lexer().LexFile((root + "/src/data/object_events/object_event_graphics_info.h").toStdString()); + auto gfxInfoObjects = cParser.ParseTopLevelObjects(tokens); + QMap> gfxInfos; + for (auto it = gfxInfoObjects.begin(); it != gfxInfoObjects.end(); it++) { + QMap values; + for (const fex::ArrayValue &v : it->second.values()) { + if (v.type() != fex::ArrayValue::Type::kValuePair) + continue; + QString key = QString::fromStdString(v.pair().first); + QString value = QString::fromStdString(v.pair().second->string_value()); + values.insert(key, value); + } + gfxInfos.insert(QString::fromStdString(it->first), values); + } + + return gfxInfos; +} + bool Project::readSpeciesIconPaths() { speciesToIconPath.clear(); QString srcfilename = "src/pokemon_icon.c";