From 474d4305f7cf84af7f0a8bfca15817f1cbdd7845 Mon Sep 17 00:00:00 2001 From: orosmatthew Date: Thu, 28 Sep 2023 11:15:49 -0400 Subject: [PATCH] Initial tokenization --- CMakeLists.txt | 3 ++- src/html_parse.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/html_parse.hpp | 43 +++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 17 ++++++++++------- 4 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 src/html_parse.cpp create mode 100644 src/html_parse.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f25f8f7..e81f568 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,8 @@ add_subdirectory(external/raylib-4.5.0) add_subdirectory(external/raylib-cpp-4.5.1) -add_executable(browser src/main.cpp src/fetch.cpp) +add_executable(browser src/main.cpp src/fetch.cpp + src/html_parse.cpp) target_include_directories(browser PRIVATE external/raygui-4.0/src) diff --git a/src/html_parse.cpp b/src/html_parse.cpp new file mode 100644 index 0000000..3da909a --- /dev/null +++ b/src/html_parse.cpp @@ -0,0 +1,46 @@ +#include "html_parse.hpp" + +#include + +namespace html { + +Tokenizer::Tokenizer(std::u32string source) + : m_source(std::move(source)) + , m_index(0) +{ +} + +std::vector Tokenizer::tokenize() +{ + std::vector tokens; + while (peek().has_value()) { + if (peek().value() == '<') { + consume(); + tokens.push_back({ .type = TokenType::lt }); + } + else if (peek().value() == '>') { + consume(); + tokens.push_back({ .type = TokenType::gt }); + } + else { + std::cout << "[WARN] Unexpected token: " << std::to_string(peek().value()) << std::endl; + consume(); + } + } + + return {}; +} +std::optional Tokenizer::peek(size_t ahead) +{ + if (m_index + ahead >= m_source.size()) { + return {}; + } + return m_source.at(m_index + ahead); +} + +char32_t Tokenizer::consume() +{ + return m_source.at(m_index++); +} + +} diff --git a/src/html_parse.hpp b/src/html_parse.hpp new file mode 100644 index 0000000..c011c84 --- /dev/null +++ b/src/html_parse.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include + +namespace html { + +enum class TokenType { + lt, + gt, + ident, + fslash, + quote, +}; + +struct Token { + TokenType type; + std::optional value = {}; +}; + +enum class NodeType { + +}; + +struct Node { }; + +class Tokenizer { +public: + explicit Tokenizer(std::u32string source); + + std::vector tokenize(); + +private: + std::optional peek(size_t ahead = 1); + + char32_t consume(); + + std::u32string m_source; + size_t m_index; +}; + +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 30c38b0..c91984a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,8 +1,9 @@ #include "fetch.hpp" -#include #include +#include "html_parse.hpp" + #define RAYGUI_IMPLEMENTATION #include #include @@ -11,7 +12,7 @@ int main() { init_curl(); - std::optional page_data; + std::optional page_data = fetch_url("https://example.com"); SetConfigFlags(ConfigFlags::FLAG_WINDOW_RESIZABLE | ConfigFlags::FLAG_MSAA_4X_HINT | ConfigFlags ::FLAG_VSYNC_HINT); @@ -24,11 +25,16 @@ int main() SetTargetFPS(60); bool is_editing_url = false; - std::string url_input; + std::string url_input = "https://example.com"; url_input.reserve(1024); - raylib::Vector2 page_content_size; float scroll_pos = 0.0f; + if (page_data.has_value()) { + std::u32string page_data_unicode(page_data.value().begin(), page_data.value().end()); + html::Tokenizer tokenizer(page_data_unicode); + tokenizer.tokenize(); + } + while (!window.ShouldClose()) { BeginDrawing(); window.ClearBackground(raylib::Color::White()); @@ -43,9 +49,6 @@ int main() if (GuiButton({ .x = (float)GetScreenWidth() - 50, .y = 0, .width = 50, .height = 24 }, "Go")) { page_data = fetch_url(url_input); - if (page_data.has_value()) { - page_content_size = MeasureTextEx(sans_font, page_data.value().data(), 24, 1.0f); - } } scroll_pos += GetMouseWheelMove();