Work on parser
This commit is contained in:
parent
4751f436fc
commit
8d13ea0fb8
@ -5,7 +5,7 @@ Defined Tokens
|
||||
|
||||
---
|
||||
|
||||
<doc> ::= [<doctype>] <elem>
|
||||
<doc> ::= [<doctype>] <elem>*
|
||||
<doctype> ::= "<" "!" "doctype" "html" ">"
|
||||
<elem> ::=
|
||||
| "<" <ident> <attr>* ">" (<elem> | <inner>)* "<" "/" <ident> ">"
|
||||
|
@ -182,4 +182,94 @@ std::string Token::to_string()
|
||||
}
|
||||
}
|
||||
|
||||
Parser::Parser(std::vector<Token> tokens)
|
||||
: m_tokens(std::move(tokens))
|
||||
, m_index(0)
|
||||
{
|
||||
}
|
||||
|
||||
NodeDoc Parser::parse()
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::reference_wrapper<Token>> Parser::peek(size_t ahead)
|
||||
{
|
||||
if (m_index + ahead >= m_tokens.size()) {
|
||||
return {};
|
||||
}
|
||||
return m_tokens.at(m_index + ahead);
|
||||
}
|
||||
Token& Parser::consume()
|
||||
{
|
||||
return m_tokens.at(m_index++);
|
||||
}
|
||||
|
||||
bool is_ci_equal(const std::string& s1, const std::string& s2)
|
||||
{
|
||||
if (s1.size() != s2.size()) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < s1.size(); i++) {
|
||||
if (std::tolower(s1.at(i) != std::tolower(s2.at(i)))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<NodeDocType> Parser::parse_doc_type()
|
||||
{
|
||||
if (!peek().has_value()) {
|
||||
return {};
|
||||
}
|
||||
Token& ahead1 = peek().value().get();
|
||||
if (ahead1.type != TokenType::lt) {
|
||||
return {};
|
||||
}
|
||||
if (!peek(2).has_value()) {
|
||||
return {};
|
||||
}
|
||||
Token& ahead2 = peek(2).value().get();
|
||||
if (ahead2.type != TokenType::ident || !ahead2.value.has_value() || !is_ci_equal(ahead2.value.value(), "doctype")) {
|
||||
return {};
|
||||
}
|
||||
consume();
|
||||
consume();
|
||||
Token& ahead3 = peek(3).value().get();
|
||||
if (ahead3.type != TokenType::ident) {
|
||||
std::cerr << "[ERROR] Expected identifier" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
NodeDocType doc_type;
|
||||
doc_type.type = consume().value.value();
|
||||
}
|
||||
bool Parser::peek_is(size_t ahead, TokenType type)
|
||||
{
|
||||
if (!peek(ahead).has_value()) {
|
||||
return {};
|
||||
}
|
||||
return peek(ahead).value().get().type == type;
|
||||
}
|
||||
|
||||
bool Parser::peek_is_with_val(size_t ahead, TokenType type)
|
||||
{
|
||||
if (!peek_is(ahead, type)) {
|
||||
return false;
|
||||
}
|
||||
return peek(ahead).value().get().value.has_value();
|
||||
}
|
||||
bool Parser::peek_is(size_t ahead, TokenType type, const std::string& val, Parser::StrCmp cmp)
|
||||
{
|
||||
if (!peek_is_with_val(ahead, type)) {
|
||||
return false;
|
||||
}
|
||||
switch (cmp) {
|
||||
case StrCmp::case_sensitive:
|
||||
return peek(ahead).value().get().value.value() == val;
|
||||
case StrCmp::case_insensitive:
|
||||
return is_ci_equal(peek(ahead).value().get().value.value(), val);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include <utf8.h>
|
||||
@ -37,12 +39,6 @@ struct Token {
|
||||
std::string to_string();
|
||||
};
|
||||
|
||||
enum class NodeType {
|
||||
|
||||
};
|
||||
|
||||
struct Node { };
|
||||
|
||||
class Tokenizer {
|
||||
public:
|
||||
explicit Tokenizer(std::string source);
|
||||
@ -53,4 +49,62 @@ private:
|
||||
std::string m_source;
|
||||
};
|
||||
|
||||
struct NodeDocType {
|
||||
std::string type;
|
||||
};
|
||||
|
||||
struct NodeAttr {
|
||||
std::string key;
|
||||
std::string val;
|
||||
};
|
||||
|
||||
struct NodeInner {
|
||||
std::string value;
|
||||
};
|
||||
|
||||
struct NodeElem;
|
||||
|
||||
struct NodeElemReg {
|
||||
std::string tag;
|
||||
std::vector<NodeAttr> attributes;
|
||||
std::vector<std::variant<NodeElem, NodeInner>> inner;
|
||||
};
|
||||
|
||||
struct NodeElemSelfClose {
|
||||
std::string tag;
|
||||
std::vector<NodeAttr> attributes;
|
||||
};
|
||||
|
||||
struct NodeElem {
|
||||
std::variant<NodeElemReg, NodeElemSelfClose> var;
|
||||
};
|
||||
|
||||
struct NodeDoc {
|
||||
std::optional<NodeDocType> doc_type;
|
||||
std::vector<NodeElem> children;
|
||||
};
|
||||
|
||||
class Parser {
|
||||
public:
|
||||
explicit Parser(std::vector<Token> tokens);
|
||||
|
||||
NodeDoc parse();
|
||||
|
||||
std::optional<NodeDocType> parse_doc_type();
|
||||
|
||||
private:
|
||||
std::optional<std::reference_wrapper<Token>> peek(size_t ahead = 0);
|
||||
|
||||
enum class StrCmp { case_sensitive, case_insensitive };
|
||||
|
||||
bool peek_is(size_t ahead, TokenType type);
|
||||
bool peek_is_with_val(size_t ahead, TokenType type);
|
||||
bool peek_is(size_t ahead, TokenType type, const std::string& val, StrCmp cmp);
|
||||
|
||||
Token& consume();
|
||||
|
||||
std::vector<Token> m_tokens;
|
||||
size_t m_index;
|
||||
};
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user