mirror of
https://github.com/MaSzyna-EU07/maszyna.git
synced 2026-03-22 15:05:03 +01:00
Merge pull request #43 from jerrrrycho/parser-enhancements
split parser with time-cutting enhancements
This commit is contained in:
335
parser.cpp
335
parser.cpp
@@ -23,9 +23,35 @@ http://mozilla.org/MPL/2.0/.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// cParser -- generic class for parsing text data.
|
||||
|
||||
namespace
|
||||
{
|
||||
inline std::array<bool, 256> makeBreakTable(const char *brk)
|
||||
{
|
||||
std::array<bool, 256> arr{};
|
||||
for (unsigned char c : std::string_view(brk ? brk : ""))
|
||||
{
|
||||
arr[c] = true;
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
inline char toLowerChar(char c)
|
||||
{
|
||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
}
|
||||
|
||||
inline bool startsWithBOM(const std::string &s)
|
||||
{
|
||||
return s.size() >= 3
|
||||
&& static_cast<unsigned char>(s[0]) == 0xEF
|
||||
&& static_cast<unsigned char>(s[1]) == 0xBB
|
||||
&& static_cast<unsigned char>(s[2]) == 0xBF;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// constructors
|
||||
cParser::cParser(std::string const &Stream, buffertype const Type, std::string Path, bool const Loadtraction, std::vector<std::string> Parameters, bool allowRandom)
|
||||
: mPath(Path), LoadTraction(Loadtraction), allowRandomIncludes(allowRandom)
|
||||
: allowRandomIncludes(allowRandom), LoadTraction(Loadtraction), mPath(Path)
|
||||
{
|
||||
// store to calculate sub-sequent includes from relative path
|
||||
if (Type == buffertype::buffer_FILE)
|
||||
@@ -192,181 +218,196 @@ bool cParser::getTokens(unsigned int Count, bool ToLower, const char *Break)
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string cParser::readToken(bool ToLower, const char *Break)
|
||||
std::string cParser::readTokenFromDelegate(bool ToLower, const char *Break)
|
||||
{
|
||||
|
||||
std::string token;
|
||||
if (mIncludeParser)
|
||||
{
|
||||
// see if there's include parsing going on. clean up when it's done.
|
||||
token = mIncludeParser->readToken(ToLower, Break);
|
||||
if (true == token.empty())
|
||||
if (!mIncludeParser)
|
||||
return {};
|
||||
std::string token = mIncludeParser->readToken(ToLower, Break);
|
||||
if (token.empty())
|
||||
{
|
||||
mIncludeParser = nullptr;
|
||||
}
|
||||
}
|
||||
if (true == token.empty())
|
||||
{
|
||||
return token;
|
||||
}
|
||||
|
||||
std::string cParser::readTokenFromStream(bool ToLower, const char *Break)
|
||||
{
|
||||
std::string token;
|
||||
// get the token yourself if the delegation attempt failed
|
||||
char c{0};
|
||||
do
|
||||
{
|
||||
while (mStream->peek() != EOF && strchr(Break, c = mStream->get()) == NULL)
|
||||
{
|
||||
if (ToLower)
|
||||
c = tolower(c);
|
||||
token += c;
|
||||
if (findQuotes(token)) // do glue together words enclosed in quotes
|
||||
continue;
|
||||
if (skipComments && trimComments(token)) // don't glue together words separated with comment
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
{
|
||||
// update line counter
|
||||
|
||||
const auto breakTable = makeBreakTable(Break);
|
||||
char c = 0;
|
||||
|
||||
|
||||
while (token.empty() && mStream->peek() != EOF) {
|
||||
while (mStream->peek() != EOF) {
|
||||
c = static_cast<char>(mStream->get());
|
||||
|
||||
if (c == '\n') {
|
||||
++mLine;
|
||||
}
|
||||
} while (token == "" && mStream->peek() != EOF); // double check in case of consecutive separators
|
||||
|
||||
const unsigned char uc = static_cast<unsigned char>(c);
|
||||
if (breakTable[uc]) {
|
||||
// separator ends token (or continues skipping if token empty)
|
||||
if (!token.empty())
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
// check the first token for potential presence of utf bom
|
||||
if (mFirstToken)
|
||||
{
|
||||
|
||||
if (ToLower) c = toLowerChar(c);
|
||||
token.push_back(c);
|
||||
|
||||
if (findQuotes(token)) {
|
||||
continue; // glue quoted content
|
||||
}
|
||||
if (skipComments && trimComments(token)) {
|
||||
break; // don't glue tokens separated by comment
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void cParser::stripFirstTokenBOM(std::string& token, bool ToLower, const char* Break) {
|
||||
if (!mFirstToken) return;
|
||||
mFirstToken = false;
|
||||
if (token.rfind("\xef\xbb\xbf", 0) == 0)
|
||||
{
|
||||
|
||||
if (startsWithBOM(token)) {
|
||||
token.erase(0, 3);
|
||||
}
|
||||
if (true == token.empty())
|
||||
{
|
||||
// potentially possible if our first token was standalone utf bom
|
||||
|
||||
// if first "token" was standalone BOM, read the next real token (avoid recursion)
|
||||
while (token.empty() && mStream->peek() != EOF) {
|
||||
token = readToken(ToLower, Break);
|
||||
// readToken will not re-enter BOM stripping because mFirstToken is now false
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void cParser::substituteParameters(std::string& token, bool ToLower) {
|
||||
if (parameters.empty()) return;
|
||||
|
||||
// Replace occurrences of "(pN)" anywhere in token.
|
||||
// Keep behavior: if missing parameter -> "none".
|
||||
size_t pos = 0;
|
||||
while ((pos = token.find("(p", pos)) != std::string::npos) {
|
||||
const size_t close = token.find(')', pos);
|
||||
if (close == std::string::npos) break; // malformed -> stop like old behavior (it would substr weirdly)
|
||||
|
||||
const std::string idxStr = token.substr(pos + 2, close - (pos + 2));
|
||||
token.erase(pos, (close - pos) + 1);
|
||||
|
||||
const size_t nr = static_cast<size_t>(std::atoi(idxStr.c_str()));
|
||||
const std::string repl = (nr >= 1 && (nr - 1) < parameters.size())
|
||||
? parameters[nr - 1]
|
||||
: std::string("none");
|
||||
|
||||
const size_t insertPos = pos;
|
||||
token.insert(insertPos, repl);
|
||||
|
||||
if (ToLower) {
|
||||
// Lowercase only what we inserted (same intent as original)
|
||||
for (size_t i = insertPos; i < insertPos + repl.size(); ++i) {
|
||||
token[i] = toLowerChar(token[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (false == parameters.empty())
|
||||
{
|
||||
// if there's parameter list, check the token for potential parameters to replace
|
||||
size_t pos; // początek podmienianego ciągu
|
||||
while ((pos = token.find("(p")) != std::string::npos)
|
||||
{
|
||||
// check if the token is a parameter which should be replaced with stored true value
|
||||
auto const parameter{token.substr(pos + 2, token.find(")", pos) - (pos + 2))}; // numer parametru
|
||||
token.erase(pos, token.find(")", pos) - pos + 1); // najpierw usunięcie "(pN)"
|
||||
size_t nr = atoi(parameter.c_str()) - 1;
|
||||
if (nr < parameters.size())
|
||||
{
|
||||
token.insert(pos, parameters.at(nr)); // wklejenie wartości parametru
|
||||
if (ToLower)
|
||||
for (; pos < parameters.at(nr).size(); ++pos)
|
||||
token[pos] = tolower(token[pos]);
|
||||
}
|
||||
else
|
||||
token.insert(pos, "none"); // zabezpieczenie przed brakiem parametru
|
||||
}
|
||||
pos = insertPos + repl.size(); // continue after inserted text
|
||||
}
|
||||
}
|
||||
|
||||
// launch child parser if include directive found.
|
||||
// NOTE: parameter collecting uses default set of token separators.
|
||||
if (expandIncludes && token == "include")
|
||||
{
|
||||
std::string includefile = allowRandomIncludes ? deserialize_random_set(*this) : readToken(ToLower); // nazwa pliku
|
||||
void cParser::skipIncludeBlock() {
|
||||
// mimic original: while token != "end" readToken(true)
|
||||
std::string t;
|
||||
do {
|
||||
t = readToken(true);
|
||||
} while (t != "end" && !t.empty());
|
||||
}
|
||||
|
||||
void cParser::startIncludeFromParser(cParser& srcParser, bool ToLower, std::string includefile) {
|
||||
replace_slashes(includefile);
|
||||
if ((true == LoadTraction) || ((false == contains(includefile, "tr/")) && (false == contains(includefile, "tra/"))))
|
||||
{
|
||||
if (false == contains(includefile, "_ter.scm"))
|
||||
{
|
||||
if (Global.ParserLogIncludes)
|
||||
{
|
||||
|
||||
const bool allowTraction =
|
||||
(true == LoadTraction) ||
|
||||
((false == contains(includefile, "tr/")) && (false == contains(includefile, "tra/")));
|
||||
|
||||
if (!allowTraction) {
|
||||
// skip include block until "end" (original behavior in token-mode include)
|
||||
skipIncludeBlock();
|
||||
return;
|
||||
}
|
||||
|
||||
const bool isTerrain = contains(includefile, "_ter.scm");
|
||||
if (isTerrain && true == Global.file_binary_terrain_state) {
|
||||
WriteLog("SBT found, ignoring: " + includefile);
|
||||
readParameters(srcParser); // preserve original side-effect: still consume parameters
|
||||
return;
|
||||
}
|
||||
|
||||
if (Global.ParserLogIncludes) {
|
||||
if (isTerrain) WriteLog("including terrain: " + includefile);
|
||||
else {
|
||||
// WriteLog("including: " + includefile);
|
||||
}
|
||||
mIncludeParser = std::make_shared<cParser>(includefile, buffer_FILE, mPath, LoadTraction, readParameters(*this));
|
||||
}
|
||||
|
||||
mIncludeParser = std::make_shared<cParser>(
|
||||
includefile, /*buffer_FILE*/ static_cast<buffertype>(/*buffer_FILE*/ 0), mPath, LoadTraction, readParameters(srcParser)
|
||||
);
|
||||
mIncludeParser->allowRandomIncludes = allowRandomIncludes;
|
||||
mIncludeParser->autoclear(m_autoclear);
|
||||
if (mIncludeParser->mSize <= 0)
|
||||
{
|
||||
|
||||
if (mIncludeParser->mSize <= 0) {
|
||||
ErrorLog("Bad include: can't open file \"" + includefile + "\"");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (true == Global.file_binary_terrain_state)
|
||||
{
|
||||
WriteLog("SBT found, ignoring: " + includefile);
|
||||
readParameters(*this);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (Global.ParserLogIncludes)
|
||||
{
|
||||
WriteLog("including terrain: " + includefile);
|
||||
}
|
||||
mIncludeParser = std::make_shared<cParser>(includefile, buffer_FILE, mPath, LoadTraction, readParameters(*this));
|
||||
mIncludeParser->allowRandomIncludes = allowRandomIncludes;
|
||||
mIncludeParser->autoclear(m_autoclear);
|
||||
if (mIncludeParser->mSize <= 0)
|
||||
{
|
||||
ErrorLog("Bad include: can't open file \"" + includefile + "\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (token != "end")
|
||||
{
|
||||
token = readToken(true); // minimize risk of case mismatch on comparison
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool cParser::handleIncludeIfPresent(std::string& token, bool ToLower, const char* Break) {
|
||||
// token-mode include: token == "include"
|
||||
if (expandIncludes && token == "include") {
|
||||
std::string includefile =
|
||||
allowRandomIncludes ? deserialize_random_set(*this) : readToken(ToLower);
|
||||
|
||||
startIncludeFromParser(*this, ToLower, std::move(includefile));
|
||||
|
||||
// after processing include, return next token from current parser
|
||||
token = readToken(ToLower, Break);
|
||||
return true;
|
||||
}
|
||||
else if ((std::strcmp(Break, "\n\r") == 0) && (token.compare(0, 7, "include") == 0))
|
||||
{
|
||||
// HACK: if the parser reads full lines we expect this line to contain entire include directive, to make parsing easier
|
||||
|
||||
// line-mode HACK: Break == "\n\r" and line begins with "include"
|
||||
if ((std::strcmp(Break, "\n\r") == 0) && token.compare(0, 7, "include") == 0) {
|
||||
cParser includeparser(token.substr(7));
|
||||
std::string includefile = allowRandomIncludes ? deserialize_random_set(includeparser) : includeparser.readToken(ToLower); // nazwa pliku
|
||||
replace_slashes(includefile);
|
||||
if ((true == LoadTraction) || ((false == contains(includefile, "tr/")) && (false == contains(includefile, "tra/"))))
|
||||
{
|
||||
if (false == contains(includefile, "_ter.scm"))
|
||||
{
|
||||
if (Global.ParserLogIncludes)
|
||||
{
|
||||
// WriteLog("including: " + includefile);
|
||||
}
|
||||
mIncludeParser = std::make_shared<cParser>(includefile, buffer_FILE, mPath, LoadTraction, readParameters(includeparser));
|
||||
mIncludeParser->allowRandomIncludes = allowRandomIncludes;
|
||||
mIncludeParser->autoclear(m_autoclear);
|
||||
if (mIncludeParser->mSize <= 0)
|
||||
{
|
||||
ErrorLog("Bad include: can't open file \"" + includefile + "\"");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (true == Global.file_binary_terrain_state)
|
||||
{
|
||||
WriteLog("SBT found, ignoring: " + includefile);
|
||||
readParameters(includeparser);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (Global.ParserLogIncludes)
|
||||
{
|
||||
WriteLog("including terrain: " + includefile);
|
||||
}
|
||||
mIncludeParser = std::make_shared<cParser>(includefile, buffer_FILE, mPath, LoadTraction, readParameters(includeparser));
|
||||
mIncludeParser->allowRandomIncludes = allowRandomIncludes;
|
||||
mIncludeParser->autoclear(m_autoclear);
|
||||
if (mIncludeParser->mSize <= 0)
|
||||
{
|
||||
ErrorLog("Bad include: can't open file \"" + includefile + "\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string includefile =
|
||||
allowRandomIncludes ? deserialize_random_set(includeparser) : includeparser.readToken(ToLower);
|
||||
|
||||
startIncludeFromParser(includeparser, ToLower, std::move(includefile));
|
||||
|
||||
token = readToken(ToLower, Break);
|
||||
return true;
|
||||
}
|
||||
// all done
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string cParser::readToken(bool ToLower, const char *Break)
|
||||
{
|
||||
std::string token;
|
||||
|
||||
token = readTokenFromDelegate(ToLower, Break);
|
||||
if (token.empty())
|
||||
{
|
||||
token = readTokenFromStream(ToLower, Break);
|
||||
}
|
||||
|
||||
stripFirstTokenBOM(token, ToLower, Break);
|
||||
|
||||
substituteParameters(token, ToLower);
|
||||
|
||||
handleIncludeIfPresent(token, ToLower, Break);
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
|
||||
9
parser.h
9
parser.h
@@ -66,6 +66,11 @@ class cParser //: public std::stringstream
|
||||
return m_autoclear; }
|
||||
bool
|
||||
getTokens( unsigned int Count = 1, bool ToLower = true, char const *Break = "\n\r\t ;" );
|
||||
std::string readTokenFromDelegate(bool ToLower, const char *Break);
|
||||
std::string readTokenFromStream(bool ToLower, const char *Break);
|
||||
void stripFirstTokenBOM(std::string &token, bool ToLower, const char *Break);
|
||||
void substituteParameters(std::string &token, bool ToLower);
|
||||
void skipIncludeBlock();
|
||||
// returns next incoming token, if any, without removing it from the set
|
||||
inline
|
||||
std::string
|
||||
@@ -95,9 +100,11 @@ class cParser //: public std::stringstream
|
||||
bool skipComments = true;
|
||||
|
||||
private:
|
||||
void startIncludeFromParser(cParser &srcParser, bool ToLower, std::string includefile);
|
||||
bool handleIncludeIfPresent(std::string &token, bool ToLower, const char *Break);
|
||||
// methods:
|
||||
std::string readToken(bool ToLower = true, const char *Break = "\n\r\t ;");
|
||||
std::vector<std::string> readParameters( cParser &Input );
|
||||
static std::vector<std::string> readParameters( cParser &Input );
|
||||
std::string readQuotes( char const Quote = '\"' );
|
||||
void skipComment( std::string const &Endmark );
|
||||
bool findQuotes( std::string &String );
|
||||
|
||||
Reference in New Issue
Block a user