Work on parser

This commit is contained in:
orosmatthew 2023-09-29 17:18:32 -04:00
parent 4751f436fc
commit 8d13ea0fb8
3 changed files with 152 additions and 8 deletions

View File

@ -5,7 +5,7 @@ Defined Tokens
--- ---
<doc> ::= [<doctype>] <elem> <doc> ::= [<doctype>] <elem>*
<doctype> ::= "<" "!" "doctype" "html" ">" <doctype> ::= "<" "!" "doctype" "html" ">"
<elem> ::= <elem> ::=
| "<" <ident> <attr>* ">" (<elem> | <inner>)* "<" "/" <ident> ">" | "<" <ident> <attr>* ">" (<elem> | <inner>)* "<" "/" <ident> ">"

View File

@ -182,4 +182,94 @@ std::string Token::to_string()
} }
} }
Parser::Parser(std::vector<Token> tokens)
: m_tokens(std::move(tokens))
, m_index(0)
{
}
NodeDoc Parser::parse()
{
return {};
}
std::optional<std::reference_wrapper<Token>> Parser::peek(size_t ahead)
{
if (m_index + ahead >= m_tokens.size()) {
return {};
}
return m_tokens.at(m_index + ahead);
}
Token& Parser::consume()
{
return m_tokens.at(m_index++);
}
bool is_ci_equal(const std::string& s1, const std::string& s2)
{
if (s1.size() != s2.size()) {
return false;
}
for (size_t i = 0; i < s1.size(); i++) {
if (std::tolower(s1.at(i) != std::tolower(s2.at(i)))) {
return false;
}
}
return true;
}
std::optional<NodeDocType> Parser::parse_doc_type()
{
if (!peek().has_value()) {
return {};
}
Token& ahead1 = peek().value().get();
if (ahead1.type != TokenType::lt) {
return {};
}
if (!peek(2).has_value()) {
return {};
}
Token& ahead2 = peek(2).value().get();
if (ahead2.type != TokenType::ident || !ahead2.value.has_value() || !is_ci_equal(ahead2.value.value(), "doctype")) {
return {};
}
consume();
consume();
Token& ahead3 = peek(3).value().get();
if (ahead3.type != TokenType::ident) {
std::cerr << "[ERROR] Expected identifier" << std::endl;
exit(EXIT_FAILURE);
}
NodeDocType doc_type;
doc_type.type = consume().value.value();
}
bool Parser::peek_is(size_t ahead, TokenType type)
{
if (!peek(ahead).has_value()) {
return {};
}
return peek(ahead).value().get().type == type;
}
bool Parser::peek_is_with_val(size_t ahead, TokenType type)
{
if (!peek_is(ahead, type)) {
return false;
}
return peek(ahead).value().get().value.has_value();
}
bool Parser::peek_is(size_t ahead, TokenType type, const std::string& val, Parser::StrCmp cmp)
{
if (!peek_is_with_val(ahead, type)) {
return false;
}
switch (cmp) {
case StrCmp::case_sensitive:
return peek(ahead).value().get().value.value() == val;
case StrCmp::case_insensitive:
return is_ci_equal(peek(ahead).value().get().value.value(), val);
}
}
} }

View File

@ -1,7 +1,9 @@
#pragma once #pragma once
#include <memory>
#include <optional> #include <optional>
#include <string> #include <string>
#include <variant>
#include <vector> #include <vector>
#include <utf8.h> #include <utf8.h>
@ -37,12 +39,6 @@ struct Token {
std::string to_string(); std::string to_string();
}; };
enum class NodeType {
};
struct Node { };
class Tokenizer { class Tokenizer {
public: public:
explicit Tokenizer(std::string source); explicit Tokenizer(std::string source);
@ -53,4 +49,62 @@ private:
std::string m_source; std::string m_source;
}; };
struct NodeDocType {
std::string type;
};
struct NodeAttr {
std::string key;
std::string val;
};
struct NodeInner {
std::string value;
};
struct NodeElem;
struct NodeElemReg {
std::string tag;
std::vector<NodeAttr> attributes;
std::vector<std::variant<NodeElem, NodeInner>> inner;
};
struct NodeElemSelfClose {
std::string tag;
std::vector<NodeAttr> attributes;
};
struct NodeElem {
std::variant<NodeElemReg, NodeElemSelfClose> var;
};
struct NodeDoc {
std::optional<NodeDocType> doc_type;
std::vector<NodeElem> children;
};
class Parser {
public:
explicit Parser(std::vector<Token> tokens);
NodeDoc parse();
std::optional<NodeDocType> parse_doc_type();
private:
std::optional<std::reference_wrapper<Token>> peek(size_t ahead = 0);
enum class StrCmp { case_sensitive, case_insensitive };
bool peek_is(size_t ahead, TokenType type);
bool peek_is_with_val(size_t ahead, TokenType type);
bool peek_is(size_t ahead, TokenType type, const std::string& val, StrCmp cmp);
Token& consume();
std::vector<Token> m_tokens;
size_t m_index;
};
} }