diff --git a/docs/html_grammar.txt b/docs/html_grammar.txt
index 5e2f889..412109a 100644
--- a/docs/html_grammar.txt
+++ b/docs/html_grammar.txt
@@ -5,7 +5,7 @@ Defined Tokens
---
- ::= []
+ ::= [] *
::= "<" "!" "doctype" "html" ">"
::=
| "<" * ">" ( | )* "<" "/" ">"
diff --git a/src/html_parse.cpp b/src/html_parse.cpp
index ea0abf1..f3c942f 100644
--- a/src/html_parse.cpp
+++ b/src/html_parse.cpp
@@ -182,4 +182,94 @@ std::string Token::to_string()
}
}
+Parser::Parser(std::vector tokens)
+ : m_tokens(std::move(tokens))
+ , m_index(0)
+{
+}
+
+NodeDoc Parser::parse()
+{
+ return {};
+}
+
+std::optional> Parser::peek(size_t ahead)
+{
+ if (m_index + ahead >= m_tokens.size()) {
+ return {};
+ }
+ return m_tokens.at(m_index + ahead);
+}
+Token& Parser::consume()
+{
+ return m_tokens.at(m_index++);
+}
+
+bool is_ci_equal(const std::string& s1, const std::string& s2)
+{
+ if (s1.size() != s2.size()) {
+ return false;
+ }
+ for (size_t i = 0; i < s1.size(); i++) {
+ if (std::tolower(s1.at(i) != std::tolower(s2.at(i)))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+std::optional Parser::parse_doc_type()
+{
+ if (!peek().has_value()) {
+ return {};
+ }
+ Token& ahead1 = peek().value().get();
+ if (ahead1.type != TokenType::lt) {
+ return {};
+ }
+ if (!peek(2).has_value()) {
+ return {};
+ }
+ Token& ahead2 = peek(2).value().get();
+ if (ahead2.type != TokenType::ident || !ahead2.value.has_value() || !is_ci_equal(ahead2.value.value(), "doctype")) {
+ return {};
+ }
+ consume();
+ consume();
+ Token& ahead3 = peek(3).value().get();
+ if (ahead3.type != TokenType::ident) {
+ std::cerr << "[ERROR] Expected identifier" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ NodeDocType doc_type;
+ doc_type.type = consume().value.value();
+}
+bool Parser::peek_is(size_t ahead, TokenType type)
+{
+ if (!peek(ahead).has_value()) {
+ return {};
+ }
+ return peek(ahead).value().get().type == type;
+}
+
+bool Parser::peek_is_with_val(size_t ahead, TokenType type)
+{
+ if (!peek_is(ahead, type)) {
+ return false;
+ }
+ return peek(ahead).value().get().value.has_value();
+}
+bool Parser::peek_is(size_t ahead, TokenType type, const std::string& val, Parser::StrCmp cmp)
+{
+ if (!peek_is_with_val(ahead, type)) {
+ return false;
+ }
+ switch (cmp) {
+ case StrCmp::case_sensitive:
+ return peek(ahead).value().get().value.value() == val;
+ case StrCmp::case_insensitive:
+ return is_ci_equal(peek(ahead).value().get().value.value(), val);
+ }
+}
+
}
\ No newline at end of file
diff --git a/src/html_parse.hpp b/src/html_parse.hpp
index d8e1915..be9b838 100644
--- a/src/html_parse.hpp
+++ b/src/html_parse.hpp
@@ -1,7 +1,9 @@
#pragma once
+#include
#include
#include
+#include
#include
#include
@@ -37,12 +39,6 @@ struct Token {
std::string to_string();
};
-enum class NodeType {
-
-};
-
-struct Node { };
-
class Tokenizer {
public:
explicit Tokenizer(std::string source);
@@ -53,4 +49,62 @@ private:
std::string m_source;
};
-}
\ No newline at end of file
+struct NodeDocType {
+ std::string type;
+};
+
+struct NodeAttr {
+ std::string key;
+ std::string val;
+};
+
+struct NodeInner {
+ std::string value;
+};
+
+struct NodeElem;
+
+struct NodeElemReg {
+ std::string tag;
+ std::vector attributes;
+ std::vector> inner;
+};
+
+struct NodeElemSelfClose {
+ std::string tag;
+ std::vector attributes;
+};
+
+struct NodeElem {
+ std::variant var;
+};
+
+struct NodeDoc {
+ std::optional doc_type;
+ std::vector children;
+};
+
+class Parser {
+public:
+ explicit Parser(std::vector tokens);
+
+ NodeDoc parse();
+
+ std::optional parse_doc_type();
+
+private:
+ std::optional> peek(size_t ahead = 0);
+
+ enum class StrCmp { case_sensitive, case_insensitive };
+
+ bool peek_is(size_t ahead, TokenType type);
+ bool peek_is_with_val(size_t ahead, TokenType type);
+ bool peek_is(size_t ahead, TokenType type, const std::string& val, StrCmp cmp);
+
+ Token& consume();
+
+ std::vector m_tokens;
+ size_t m_index;
+};
+
+}