简易编译器C++

CMakeLists.txt

cpp 复制代码
cmake_minimum_required(VERSION 3.20)

project(test)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

add_executable(test ${PROJECT_SOURCE_DIR}/main.cpp ${PROJECT_SOURCE_DIR}/parser.hpp ${PROJECT_SOURCE_DIR}/tokenization.hpp ${PROJECT_SOURCE_DIR}/generation.hpp)

target_compile_features(test PRIVATE cxx_std_20)

main.cpp

cpp 复制代码
#include <fstream>
#include <sstream>

#include "tokenization.hpp"
#include "parser.hpp"
#include "generation.hpp"
#include "arena.hpp"

int main(int argc, char* argv[])
{
    if (argc != 2) {
        std::cerr << "Incorrect usage. Correct usage ..." << std::endl;
        std::cerr << "./test args" << std::endl;
        return EXIT_FAILURE;
    }
    std::string contexts {};
    {
        std::stringstream stream {};
        std::fstream input(argv[1], std::ios::in);
        stream << input.rdbuf();
        contexts = stream.str();
    }

    Tokenizer tokenizer(std::move(contexts));
    std::vector<Token> tokens = tokenizer.tokenize();

    Parser parser(std::move(tokens));
    std::optional<NodeProg> prog = parser.parse_prog();

    if (!prog.has_value()) {
        std::cerr << "Invaild program" << std::endl;
        exit(EXIT_FAILURE);
    }
    Generator generation(prog.value());
    
    {
        std::fstream file("../out.asm", std::ios::out);
        file << generation.gen_prog();
    }

    system("nasm -felf64 ../out.asm");
    system("ld ../out.o -o out ");

    return EXIT_SUCCESS;
}

tokenization.hpp

cpp 复制代码
#pragma once
#include <string>
#include <optional>
#include <vector>
#include <iostream>

enum class TokenType {
    exit,
    int_lit,
    semi,
    open_paren,
    close_paren,
    ident,
    let,
    eq,
    plus,
    star,
    sub,
    div,
    open_curly,
    close_curly,
    if_
};

bool is_bin_op(TokenType type)
{
    switch(type) {
        case TokenType::plus :
        case TokenType::star :
            return true;
        default:
            return false;
    }
}

std::optional<int> bin_prec(TokenType type)
{
    switch(type) {
        case TokenType::sub :
        case TokenType::plus :
            return 0;
        case TokenType::div :
        case TokenType::star :
            return 1;
        default :
            return {};
    }
}

struct Token {
    TokenType type;
    std::optional<std::string> value {};
};

class Tokenizer 
{
public:
    inline explicit Tokenizer(const std::string& src) : m_src(std::move(src))
    {}

    inline std::vector<Token> tokenize()
    {
        std::vector<Token> tokens {};
        std::string buf {};

        while(peek().has_value()) {
            if (std::isalpha(peek().value())) {
                buf += consume();
                while (peek().has_value() && std::isalnum(peek().value())) {
                    buf += consume();
                }
                std::cout << "buf : " << buf << std::endl;
                if (buf == "exit") {
                    tokens.push_back({.type = TokenType::exit});
                    buf.clear();
                } else if (buf == "let") {
                    tokens.push_back({.type = TokenType::let});
                    buf.clear();
                } else if (buf == "if") {
                    tokens.push_back({.type = TokenType::if_});
                    buf.clear();
                } else {
                    tokens.push_back({.type = TokenType::ident, .value = buf});
                    buf.clear();
                } 
                
            } else if (std::isdigit(peek().value())) {
                buf += consume();
                while (peek().has_value() && std::isdigit(peek().value())) {
                    buf += consume();
                }
                tokens.push_back({.type = TokenType::int_lit, .value = buf});
                std::cout << buf << std::endl;
                buf.clear();
            } else if (peek().value() == '(') {
                consume();
                tokens.push_back({.type = TokenType::open_paren});
            } else if (peek().value() == ')') {
                consume();
                tokens.push_back({.type = TokenType::close_paren});
            } else if (std::isspace(peek().value())) {
                consume();
            } else if (peek().value() == '{') {
                consume();
                tokens.push_back({.type = TokenType::open_curly});
            } else if (peek().value() == '}') {
                consume();
                tokens.push_back({.type = TokenType::close_curly});
            } else if (peek().value() == '=') {
                consume();
                tokens.push_back({.type = TokenType::eq});
            } else if (peek().value() == '*') {
                consume();
                tokens.push_back({.type = TokenType::star});
            } else if (peek().value() == '+') {
                consume();
                tokens.push_back({.type = TokenType::plus});
            } else if (peek().value() == '/') {
                consume();
                tokens.push_back({.type = TokenType::div});
            } else if (peek().value() == '-') {
                consume();
                tokens.push_back({.type = TokenType::sub});
            } else if (peek().value() == ';') {
                consume();
                tokens.push_back({.type = TokenType::semi});
            } else {
                std::cerr << "You messed up!" << std::endl;
                exit(EXIT_FAILURE);
            }
        }
        m_index = 0;
        return tokens;
    }
private:

    [[nodiscard]] inline std::optional<char> peek(int offset = 0) const
    {
        if (m_index + offset >= m_src.length()) {
            return {};
        } else {
            return m_src[m_index + offset];
        }
    }

    inline char consume()
    {
        return m_src[m_index++];
    }

    std::string m_src;
    size_t m_index = 0;


};

parser.hpp

cpp 复制代码
#pragma once

#include <variant>
#include "tokenization.hpp"
#include "arena.hpp"

struct NodeTermIntLit {
    Token int_lit;
};

struct NodeTermIdent {
    Token ident;
};

struct NodeExpr;

struct NodeTermParen {
    NodeExpr* expr;
};

struct NodeBinExprAdd {
    NodeExpr* lhs;
    NodeExpr* rhs;
};

struct NodeBinExprMulti {
    NodeExpr* lhs;
    NodeExpr* rhs;
};

struct NodeBinExprSub {
    NodeExpr* lhs;
    NodeExpr* rhs;
};

struct NodeBinExprDiv {
    NodeExpr* lhs;
    NodeExpr* rhs;
};

struct NodeBinExpr {
    std::variant<NodeBinExprAdd*, NodeBinExprMulti*, NodeBinExprDiv*, NodeBinExprSub*> var;
};

struct NodeTerm {
    std::variant<NodeTermIntLit*, NodeTermIdent*, NodeTermParen*> var;
};

struct NodeExpr {
    std::variant<NodeTerm*, NodeBinExpr*> var;
};

struct NodeStmtExit {
    NodeExpr* expr;
};

struct NodeStmtLet {
    Token ident;
    NodeExpr* expr;
};

struct NodeStmt;

struct NodeScope {
    std::vector<NodeStmt*> stmts;
};

struct NodeStmtIf {
    NodeExpr* expr;
    NodeScope* scope;
};

struct NodeStmt {
    std::variant<NodeStmtExit*, NodeStmtLet*, NodeScope*, NodeStmtIf*> var;
};

struct NodeProg {
    std::vector<NodeStmt*> stmts;
};

class Parser {
public:

    inline explicit Parser(std::vector<Token> token)
        : m_token(std::move(token))
        , m_allocator(4 * 1024 * 1024)
    {

    }

    std::optional<NodeTerm*> parse_term()
    {
        if (auto int_lit = try_consume(TokenType::int_lit)) {
            std::cout << "value : " << int_lit.value().value.value() << std::endl;
            auto term_int_lit = m_allocator.alloc<NodeTermIntLit>();
            term_int_lit->int_lit = int_lit.value();
            auto term = m_allocator.alloc<NodeTerm>();
            term->var = term_int_lit;
            return term;

        } else if (auto ident = try_consume(TokenType::ident)) {
            auto term_ident = m_allocator.alloc<NodeTermIdent>();
            term_ident->ident = ident.value();
            auto term = m_allocator.alloc<NodeTerm>();
            term->var = term_ident;
            return term;
        } else if (auto  open_paren= try_consume(TokenType::open_paren)) {
            auto expr = parse_expr();
            if (!expr.has_value()) {
                std::cerr << "Expected expression" << std::endl;
                exit(EXIT_FAILURE);
            }
            try_consume(TokenType::close_paren, "Expected ')'");
            auto term_paren = m_allocator.alloc<NodeTermParen>();
            term_paren->expr = expr.value();
            auto term = m_allocator.alloc<NodeTerm>();
            term->var = term_paren;
            return term;
        } else {
            return {};
        }
    }

    inline std::optional<NodeExpr*> parse_expr(int min_prec = 0)
    {
        std::optional<NodeTerm*> term_lhs = parse_term();
        if (!term_lhs.has_value()) {
            return {};
        }

        auto expr_lhs = m_allocator.alloc<NodeExpr>();
        expr_lhs->var = term_lhs.value();

        while (true) {
            std::optional<Token> curr_tok = peek();
            std::optional<int> prec;
            
            if (curr_tok.has_value()) {
                prec = bin_prec(curr_tok->type);
                if (!prec.has_value() || prec < min_prec) {
                    break;
                }
            } else {
                break;
            }
            Token op = consume();
            int next_min_prec = prec.value();
            auto expr_rhs = parse_expr(next_min_prec);
            if (!expr_rhs.has_value()) {
                std::cerr << "Unable to parse expression" << std::endl;
                exit(EXIT_FAILURE);
            }

            auto expr = m_allocator.alloc<NodeBinExpr>();
            auto expr_lhs2 = m_allocator.alloc<NodeExpr>();
            if (op.type == TokenType::plus) {
                auto add = m_allocator.alloc<NodeBinExprAdd>();
                expr_lhs2->var = expr_lhs->var;
                add->lhs = expr_lhs2;
                add->rhs = expr_rhs.value();
                expr->var = add;
            } else if (op.type == TokenType::star) {
                auto multi = m_allocator.alloc<NodeBinExprMulti>();
                expr_lhs2->var = expr_lhs->var;
                multi->lhs = expr_lhs2;
                multi->rhs = expr_rhs.value();
                expr->var = multi;
            } else if (op.type == TokenType::sub) {
                auto sub = m_allocator.alloc<NodeBinExprSub>();
                expr_lhs2->var = expr_lhs->var;
                sub->lhs = expr_lhs2;
                sub->rhs = expr_rhs.value();
                expr->var = sub;
            } else if (op.type == TokenType::div) {
                auto div = m_allocator.alloc<NodeBinExprDiv>();
                expr_lhs2->var = expr_lhs->var;
                div->lhs = expr_lhs2;
                div->rhs = expr_rhs.value();
                expr->var = div;
            }

            expr_lhs->var = expr;
        }
        return expr_lhs;
    }

    std::optional<NodeScope*> parse_scope()
    {
        if (!try_consume(TokenType::open_curly).has_value()) {
            return {};
        }
        auto scope = m_allocator.alloc<NodeScope>();
        while (auto stmt = parse_stmt()) {
            scope->stmts.push_back(stmt.value());
        }
        try_consume(TokenType::close_curly, "Expected '}'");
        return scope;
    }

    std::optional<NodeStmt*> parse_stmt()
    {
        if (peek().value().type == TokenType::exit && peek(1).has_value()
            && peek(1).value().type == TokenType::open_paren) {
            consume();
            consume();
            auto stmt_exit = m_allocator.alloc<NodeStmtExit>();
            if (auto expr_node = parse_expr()) {
                stmt_exit->expr = expr_node.value();
            } else {
                std::cerr << "Invaild Expression" << std::endl;
                exit(EXIT_FAILURE);
            }
            try_consume(TokenType::close_paren, "Expection ')'");
            try_consume(TokenType::semi, "Expection ';'");
            auto stmt = m_allocator.alloc<NodeStmt>();
            stmt->var = stmt_exit;
            return stmt;
        } else if (peek().has_value() && peek().value().type == TokenType::let
                  && peek(1).has_value() && peek(1).value().type == TokenType::ident
                  && peek(2).has_value() && peek(2).value().type == TokenType::eq) {
            consume();
            auto stmt_let = m_allocator.alloc<NodeStmtLet>();
            stmt_let->ident = consume(); 
            consume();
            if (auto expr = parse_expr()) {
                stmt_let->expr = expr.value();
            } else {
                std::cerr << "Invaild expression" << std::endl;
                exit(EXIT_FAILURE);
            }
            try_consume(TokenType::semi, "Expection ';'");
            auto stmt = m_allocator.alloc<NodeStmt>();
            stmt->var = stmt_let;
            return stmt;
        } else if (peek().has_value() && peek().value().type == TokenType::open_curly) {
            if (auto scope = parse_scope()) {
                auto stmt = m_allocator.alloc<NodeStmt>();
                stmt->var = scope.value();
                return stmt;
            } else {
                std::cerr << "Invaild scope" << std::endl;
                exit(EXIT_FAILURE);
            }
            
        } else if (auto if_ = try_consume(TokenType::if_)) {
            try_consume(TokenType::open_paren, "Expected '('");
            auto stmt_if = m_allocator.alloc<NodeStmtIf>();
            if (auto expr = parse_expr()) {
                stmt_if->expr = expr.value();
            } else {
                std::cerr << "Invaild expression" << std::endl;
                exit(EXIT_FAILURE);
            }
            try_consume(TokenType::close_paren, "Expectede ')'");
            if (auto scope = parse_scope()) {
                stmt_if->scope = scope.value();
            } else {
                std::cerr << "Invaild scope" << std::endl;
                exit(EXIT_FAILURE);
            }
            auto stmt = m_allocator.alloc<NodeStmt>();
            stmt->var = stmt_if;
            return stmt;
        } else {
            return {};
        }
    }

    std::optional<NodeProg> parse_prog()
    {
        NodeProg prog;

        while (peek().has_value()) {
            if (auto stmt = parse_stmt()) {
                prog.stmts.push_back(stmt.value());
            } else {
                std::cerr << "Invaild Statement" << std::endl;
                exit(EXIT_FAILURE);
            }
        }
        return prog;
    } 

private:

    [[nodiscard]] inline std::optional<Token> peek(int offset = 0) const
    {
        if (m_index + offset >= m_token.size()) {
            return {};
        } else {
            return m_token[m_index + offset];
        }
    }

    inline Token try_consume(TokenType type, const std::string& err_msg)
    {
        if (peek().has_value() && peek().value().type == type) {
            return consume();
        } else {
            std::cerr << err_msg << std::endl;
            exit(EXIT_FAILURE);
        }
    }

    inline std::optional<Token> try_consume(TokenType type)
    {
        if (peek().has_value() && peek().value().type == type) {
            return consume();
        } else {
            return {};
        }
    }

    inline Token consume()
    {
        return m_token[m_index++];
    }
    std::vector<Token> m_token;
    size_t m_index = 0;
    ArenaAllocator m_allocator; 
};

generation.hpp

cpp 复制代码
#pragma once
#include "parser.hpp"
#include <sstream>
#include <map>
#include <algorithm>
#include <cassert>

class Generator
{
public:
    inline Generator(NodeProg prog)
        :m_prog(std::move(prog))
    {

    }

    void gen_term (const NodeTerm* term)
    {
        struct TermVisitor {
            Generator& gen;
            void operator()(const NodeTermIntLit* term_int_lit) const
            {
                gen.m_output << "    mov rax, " << term_int_lit->int_lit.value.value() << "\n";
                gen.push("rax");
            }
            void operator()(const NodeTermIdent* term_ident) const
            {
                auto it = std::find_if(gen.m_vars.cbegin(), gen.m_vars.cend(),
                    [&](const Var& var){return var.name == term_ident->ident.value.value();});
                if (it == gen.m_vars.cend()) {
                    std::cerr << "Undeclared identifier: " << term_ident->ident.value.value() << std::endl;
                    exit(EXIT_FAILURE);
                }
               
                std::stringstream offset;
                offset << "QWORD [rsp + " << (gen.m_stack_size - (*it).stack_loc - 1) * 8 << "]\n";
                gen.push(offset.str());
            }
            void operator()(const NodeTermParen* term_paren) const
            {
                gen.gen_expr(term_paren->expr);
            }
        };
        TermVisitor visitor{.gen = *this};
        std::visit(visitor, term->var);
    }

    void gen_bin_expr (const NodeBinExpr* bin_expr)
    {
        struct BinExprVisitor {
            Generator& gen;

            void operator()(const NodeBinExprSub* sub)
            {
                gen.gen_expr(sub->rhs);
                gen.gen_expr(sub->lhs);
                
                gen.pop("rax");
                gen.pop("rbx");
                gen.m_output << "    sub rax, rbx\n";
                gen.push("rax");
            }
            void operator()(const NodeBinExprDiv* div)
            {
                gen.gen_expr(div->rhs);
                gen.gen_expr(div->lhs);
                
                gen.pop("rax");
                gen.pop("rbx");
                gen.m_output << "    div rbx\n";
                gen.push("rax");
            }

            void operator()(const NodeBinExprAdd* add)
            {
                gen.gen_expr(add->rhs);
                gen.gen_expr(add->lhs);
                
                gen.pop("rax");
                gen.pop("rbx");
                gen.m_output << "    add rax, rbx\n";
                gen.push("rax");
            }
            void operator()(const NodeBinExprMulti* multi)
            {
                gen.gen_expr(multi->rhs);
                gen.gen_expr(multi->lhs);
                
                gen.pop("rax");
                gen.pop("rbx");
                gen.m_output << "    mul rbx\n";
                gen.push("rax");
            }
        };
        BinExprVisitor visitor{.gen = *this};
        std::visit(visitor, bin_expr->var);
    }

    void gen_expr(const NodeExpr* expr)
    {
        struct ExprVisitor {
            Generator& gen;
            void operator()(const NodeTerm* term)
            {
                gen.gen_term(term);
            }
            void operator()(const NodeBinExpr* bin_expr)
            {
                gen.gen_bin_expr(bin_expr);
            }
        };

        ExprVisitor visitor{.gen = *this};
        std::visit(visitor, expr->var);
    }

    void gen_scope(const NodeScope* scope) 
    {
        begin_scope();
        for (const NodeStmt* stmt : scope->stmts) {
            gen_stmt(stmt);
        }
        end_scope();
    }

    void gen_stmt(const NodeStmt* stmt)
    {
        struct StmtVisitor {
            Generator& gen;
            void operator()(const NodeStmtExit* stmt_exit)
            {
                gen.gen_expr(stmt_exit->expr);
                gen.m_output << "    mov rax, 60\n";
                gen.pop("rdi");
                gen.m_output << "    syscall\n";
            }
            void operator()(const NodeStmtLet* stmt_let)
            {
                auto it = std::find_if(gen.m_vars.cbegin(), gen.m_vars.cend(),
                    [&](const Var& var){return var.name == stmt_let->ident.value.value();});
                if (it != gen.m_vars.cend()) {
                    std::cerr << "Identifier already used : " << stmt_let->ident.value.value() << std::endl;
                    exit(EXIT_FAILURE);
                }
                gen.m_vars.push_back({.name = stmt_let->ident.value.value(), .stack_loc = gen.m_stack_size});
                gen.gen_expr(stmt_let->expr);
            }
            void operator()(const NodeScope* scope) const
            {
                gen.gen_scope(scope);
            }
            void operator()(const NodeStmtIf* stmt_if) 
            {
                gen.gen_expr(stmt_if->expr);
                gen.pop("rax");
                std::string label = gen.create_label();
                gen.m_output << "    test rax, rax\n";
                gen.m_output << "    jz " << label << "\n";
                gen.gen_scope(stmt_if->scope);
                gen.m_output << label << ":\n";

            }
        };

        StmtVisitor visitor{.gen = *this};
        std::visit(visitor, stmt->var);
    }

    [[nodiscard]] std::string gen_prog()
    {
        m_output << "global _start\n_start\n";

        for (const NodeStmt* stmt : m_prog.stmts) {
            gen_stmt(stmt);
        }
        return m_output.str();
    }

private:
    void push (const std::string& reg)
    {
        m_output << "    push " << reg << "\n";
        m_stack_size++; 
    }

    void pop (const std::string& reg)
    {
        m_output << "    pop " << reg << "\n";
        m_stack_size--; 
    }

    void begin_scope()
    {
        m_scopes.push_back(m_vars.size());
    }

    void end_scope()
    {
        size_t pop_count = m_vars.size() - m_scopes.back();
        m_output << "    add rsp, " << pop_count * 8 << "\n";
        m_stack_size -= pop_count;
        for (int i = 0; i < pop_count; i++) {
            m_vars.pop_back();
        }
        m_scopes.pop_back();
    }

    std::string create_label()
    {
        return "label" + std::to_string(m_label_count);
    }

    struct Var {
        std::string name;
        size_t stack_loc;
    };

    NodeProg m_prog;
    std::stringstream m_output;
    size_t m_stack_size = 0;
    std::vector<Var> m_vars {};
    std::vector<size_t> m_scopes {};
    int m_label_count = 0;
};

read.txt

cpp 复制代码
let y = (10 - 2 * 3) / 2;
let x = 0;
if (x + 1) {
    exit(69);
}
exit(1);
相关推荐
00后程序员张2 小时前
iOS App 如何上架,从准备到发布的完整流程方法论
android·macos·ios·小程序·uni-app·cocoa·iphone
Ujimatsu2 小时前
AMD锐龙H 255 + 780M核显 虚拟机安装macOS 15 Sequoia(2025.11)(可登录Apple ID)
macos·vmware·虚拟机·amd·sequoia·h 255
benxin12342 小时前
macOS 上使用 Homebrew 安装和配置 PostgreSQL 的详细步骤
macos·postgresql·策略模式
他们都不看好你,偏偏你最不争气2 小时前
【iOS】TableView的优化
macos·ios·objective-c·cocoa
雪花凌落的盛夏2 小时前
电脑安装黑苹果教程,MacOS教程(未验证版)
macos·电脑·mac
Morgana_Mo2 小时前
iOS_输入框键盘跟随最佳实践
ios·计算机外设·cocoa
他们都不看好你,偏偏你最不争气2 小时前
【iOS】数据持久化
jvm·数据库·macos·ios·oracle·objective-c·cocoa
开开心心loky2 小时前
[iOS] Block 的使用
macos·ios·cocoa