CMakeLists.txt
cpp
cmake_minimum_required(VERSION 3.20)
project(test)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_executable(test ${PROJECT_SOURCE_DIR}/main.cpp ${PROJECT_SOURCE_DIR}/parser.hpp ${PROJECT_SOURCE_DIR}/tokenization.hpp ${PROJECT_SOURCE_DIR}/generation.hpp)
target_compile_features(test PRIVATE cxx_std_20)

main.cpp
cpp
#include <fstream>
#include <sstream>
#include "tokenization.hpp"
#include "parser.hpp"
#include "generation.hpp"
#include "arena.hpp"
int main(int argc, char* argv[])
{
if (argc != 2) {
std::cerr << "Incorrect usage. Correct usage ..." << std::endl;
std::cerr << "./test args" << std::endl;
return EXIT_FAILURE;
}
std::string contexts {};
{
std::stringstream stream {};
std::fstream input(argv[1], std::ios::in);
stream << input.rdbuf();
contexts = stream.str();
}
Tokenizer tokenizer(std::move(contexts));
std::vector<Token> tokens = tokenizer.tokenize();
Parser parser(std::move(tokens));
std::optional<NodeProg> prog = parser.parse_prog();
if (!prog.has_value()) {
std::cerr << "Invaild program" << std::endl;
exit(EXIT_FAILURE);
}
Generator generation(prog.value());
{
std::fstream file("../out.asm", std::ios::out);
file << generation.gen_prog();
}
system("nasm -felf64 ../out.asm");
system("ld ../out.o -o out ");
return EXIT_SUCCESS;
}
tokenization.hpp
cpp
#pragma once
#include <string>
#include <optional>
#include <vector>
#include <iostream>
enum class TokenType {
exit,
int_lit,
semi,
open_paren,
close_paren,
ident,
let,
eq,
plus,
star,
sub,
div,
open_curly,
close_curly,
if_
};
bool is_bin_op(TokenType type)
{
switch(type) {
case TokenType::plus :
case TokenType::star :
return true;
default:
return false;
}
}
std::optional<int> bin_prec(TokenType type)
{
switch(type) {
case TokenType::sub :
case TokenType::plus :
return 0;
case TokenType::div :
case TokenType::star :
return 1;
default :
return {};
}
}
struct Token {
TokenType type;
std::optional<std::string> value {};
};
class Tokenizer
{
public:
inline explicit Tokenizer(const std::string& src) : m_src(std::move(src))
{}
inline std::vector<Token> tokenize()
{
std::vector<Token> tokens {};
std::string buf {};
while(peek().has_value()) {
if (std::isalpha(peek().value())) {
buf += consume();
while (peek().has_value() && std::isalnum(peek().value())) {
buf += consume();
}
std::cout << "buf : " << buf << std::endl;
if (buf == "exit") {
tokens.push_back({.type = TokenType::exit});
buf.clear();
} else if (buf == "let") {
tokens.push_back({.type = TokenType::let});
buf.clear();
} else if (buf == "if") {
tokens.push_back({.type = TokenType::if_});
buf.clear();
} else {
tokens.push_back({.type = TokenType::ident, .value = buf});
buf.clear();
}
} else if (std::isdigit(peek().value())) {
buf += consume();
while (peek().has_value() && std::isdigit(peek().value())) {
buf += consume();
}
tokens.push_back({.type = TokenType::int_lit, .value = buf});
std::cout << buf << std::endl;
buf.clear();
} else if (peek().value() == '(') {
consume();
tokens.push_back({.type = TokenType::open_paren});
} else if (peek().value() == ')') {
consume();
tokens.push_back({.type = TokenType::close_paren});
} else if (std::isspace(peek().value())) {
consume();
} else if (peek().value() == '{') {
consume();
tokens.push_back({.type = TokenType::open_curly});
} else if (peek().value() == '}') {
consume();
tokens.push_back({.type = TokenType::close_curly});
} else if (peek().value() == '=') {
consume();
tokens.push_back({.type = TokenType::eq});
} else if (peek().value() == '*') {
consume();
tokens.push_back({.type = TokenType::star});
} else if (peek().value() == '+') {
consume();
tokens.push_back({.type = TokenType::plus});
} else if (peek().value() == '/') {
consume();
tokens.push_back({.type = TokenType::div});
} else if (peek().value() == '-') {
consume();
tokens.push_back({.type = TokenType::sub});
} else if (peek().value() == ';') {
consume();
tokens.push_back({.type = TokenType::semi});
} else {
std::cerr << "You messed up!" << std::endl;
exit(EXIT_FAILURE);
}
}
m_index = 0;
return tokens;
}
private:
[[nodiscard]] inline std::optional<char> peek(int offset = 0) const
{
if (m_index + offset >= m_src.length()) {
return {};
} else {
return m_src[m_index + offset];
}
}
inline char consume()
{
return m_src[m_index++];
}
std::string m_src;
size_t m_index = 0;
};
parser.hpp
cpp
#pragma once
#include <variant>
#include "tokenization.hpp"
#include "arena.hpp"
struct NodeTermIntLit {
Token int_lit;
};
struct NodeTermIdent {
Token ident;
};
struct NodeExpr;
struct NodeTermParen {
NodeExpr* expr;
};
struct NodeBinExprAdd {
NodeExpr* lhs;
NodeExpr* rhs;
};
struct NodeBinExprMulti {
NodeExpr* lhs;
NodeExpr* rhs;
};
struct NodeBinExprSub {
NodeExpr* lhs;
NodeExpr* rhs;
};
struct NodeBinExprDiv {
NodeExpr* lhs;
NodeExpr* rhs;
};
struct NodeBinExpr {
std::variant<NodeBinExprAdd*, NodeBinExprMulti*, NodeBinExprDiv*, NodeBinExprSub*> var;
};
struct NodeTerm {
std::variant<NodeTermIntLit*, NodeTermIdent*, NodeTermParen*> var;
};
struct NodeExpr {
std::variant<NodeTerm*, NodeBinExpr*> var;
};
struct NodeStmtExit {
NodeExpr* expr;
};
struct NodeStmtLet {
Token ident;
NodeExpr* expr;
};
struct NodeStmt;
struct NodeScope {
std::vector<NodeStmt*> stmts;
};
struct NodeStmtIf {
NodeExpr* expr;
NodeScope* scope;
};
struct NodeStmt {
std::variant<NodeStmtExit*, NodeStmtLet*, NodeScope*, NodeStmtIf*> var;
};
struct NodeProg {
std::vector<NodeStmt*> stmts;
};
class Parser {
public:
inline explicit Parser(std::vector<Token> token)
: m_token(std::move(token))
, m_allocator(4 * 1024 * 1024)
{
}
std::optional<NodeTerm*> parse_term()
{
if (auto int_lit = try_consume(TokenType::int_lit)) {
std::cout << "value : " << int_lit.value().value.value() << std::endl;
auto term_int_lit = m_allocator.alloc<NodeTermIntLit>();
term_int_lit->int_lit = int_lit.value();
auto term = m_allocator.alloc<NodeTerm>();
term->var = term_int_lit;
return term;
} else if (auto ident = try_consume(TokenType::ident)) {
auto term_ident = m_allocator.alloc<NodeTermIdent>();
term_ident->ident = ident.value();
auto term = m_allocator.alloc<NodeTerm>();
term->var = term_ident;
return term;
} else if (auto open_paren= try_consume(TokenType::open_paren)) {
auto expr = parse_expr();
if (!expr.has_value()) {
std::cerr << "Expected expression" << std::endl;
exit(EXIT_FAILURE);
}
try_consume(TokenType::close_paren, "Expected ')'");
auto term_paren = m_allocator.alloc<NodeTermParen>();
term_paren->expr = expr.value();
auto term = m_allocator.alloc<NodeTerm>();
term->var = term_paren;
return term;
} else {
return {};
}
}
inline std::optional<NodeExpr*> parse_expr(int min_prec = 0)
{
std::optional<NodeTerm*> term_lhs = parse_term();
if (!term_lhs.has_value()) {
return {};
}
auto expr_lhs = m_allocator.alloc<NodeExpr>();
expr_lhs->var = term_lhs.value();
while (true) {
std::optional<Token> curr_tok = peek();
std::optional<int> prec;
if (curr_tok.has_value()) {
prec = bin_prec(curr_tok->type);
if (!prec.has_value() || prec < min_prec) {
break;
}
} else {
break;
}
Token op = consume();
int next_min_prec = prec.value();
auto expr_rhs = parse_expr(next_min_prec);
if (!expr_rhs.has_value()) {
std::cerr << "Unable to parse expression" << std::endl;
exit(EXIT_FAILURE);
}
auto expr = m_allocator.alloc<NodeBinExpr>();
auto expr_lhs2 = m_allocator.alloc<NodeExpr>();
if (op.type == TokenType::plus) {
auto add = m_allocator.alloc<NodeBinExprAdd>();
expr_lhs2->var = expr_lhs->var;
add->lhs = expr_lhs2;
add->rhs = expr_rhs.value();
expr->var = add;
} else if (op.type == TokenType::star) {
auto multi = m_allocator.alloc<NodeBinExprMulti>();
expr_lhs2->var = expr_lhs->var;
multi->lhs = expr_lhs2;
multi->rhs = expr_rhs.value();
expr->var = multi;
} else if (op.type == TokenType::sub) {
auto sub = m_allocator.alloc<NodeBinExprSub>();
expr_lhs2->var = expr_lhs->var;
sub->lhs = expr_lhs2;
sub->rhs = expr_rhs.value();
expr->var = sub;
} else if (op.type == TokenType::div) {
auto div = m_allocator.alloc<NodeBinExprDiv>();
expr_lhs2->var = expr_lhs->var;
div->lhs = expr_lhs2;
div->rhs = expr_rhs.value();
expr->var = div;
}
expr_lhs->var = expr;
}
return expr_lhs;
}
std::optional<NodeScope*> parse_scope()
{
if (!try_consume(TokenType::open_curly).has_value()) {
return {};
}
auto scope = m_allocator.alloc<NodeScope>();
while (auto stmt = parse_stmt()) {
scope->stmts.push_back(stmt.value());
}
try_consume(TokenType::close_curly, "Expected '}'");
return scope;
}
std::optional<NodeStmt*> parse_stmt()
{
if (peek().value().type == TokenType::exit && peek(1).has_value()
&& peek(1).value().type == TokenType::open_paren) {
consume();
consume();
auto stmt_exit = m_allocator.alloc<NodeStmtExit>();
if (auto expr_node = parse_expr()) {
stmt_exit->expr = expr_node.value();
} else {
std::cerr << "Invaild Expression" << std::endl;
exit(EXIT_FAILURE);
}
try_consume(TokenType::close_paren, "Expection ')'");
try_consume(TokenType::semi, "Expection ';'");
auto stmt = m_allocator.alloc<NodeStmt>();
stmt->var = stmt_exit;
return stmt;
} else if (peek().has_value() && peek().value().type == TokenType::let
&& peek(1).has_value() && peek(1).value().type == TokenType::ident
&& peek(2).has_value() && peek(2).value().type == TokenType::eq) {
consume();
auto stmt_let = m_allocator.alloc<NodeStmtLet>();
stmt_let->ident = consume();
consume();
if (auto expr = parse_expr()) {
stmt_let->expr = expr.value();
} else {
std::cerr << "Invaild expression" << std::endl;
exit(EXIT_FAILURE);
}
try_consume(TokenType::semi, "Expection ';'");
auto stmt = m_allocator.alloc<NodeStmt>();
stmt->var = stmt_let;
return stmt;
} else if (peek().has_value() && peek().value().type == TokenType::open_curly) {
if (auto scope = parse_scope()) {
auto stmt = m_allocator.alloc<NodeStmt>();
stmt->var = scope.value();
return stmt;
} else {
std::cerr << "Invaild scope" << std::endl;
exit(EXIT_FAILURE);
}
} else if (auto if_ = try_consume(TokenType::if_)) {
try_consume(TokenType::open_paren, "Expected '('");
auto stmt_if = m_allocator.alloc<NodeStmtIf>();
if (auto expr = parse_expr()) {
stmt_if->expr = expr.value();
} else {
std::cerr << "Invaild expression" << std::endl;
exit(EXIT_FAILURE);
}
try_consume(TokenType::close_paren, "Expectede ')'");
if (auto scope = parse_scope()) {
stmt_if->scope = scope.value();
} else {
std::cerr << "Invaild scope" << std::endl;
exit(EXIT_FAILURE);
}
auto stmt = m_allocator.alloc<NodeStmt>();
stmt->var = stmt_if;
return stmt;
} else {
return {};
}
}
std::optional<NodeProg> parse_prog()
{
NodeProg prog;
while (peek().has_value()) {
if (auto stmt = parse_stmt()) {
prog.stmts.push_back(stmt.value());
} else {
std::cerr << "Invaild Statement" << std::endl;
exit(EXIT_FAILURE);
}
}
return prog;
}
private:
[[nodiscard]] inline std::optional<Token> peek(int offset = 0) const
{
if (m_index + offset >= m_token.size()) {
return {};
} else {
return m_token[m_index + offset];
}
}
inline Token try_consume(TokenType type, const std::string& err_msg)
{
if (peek().has_value() && peek().value().type == type) {
return consume();
} else {
std::cerr << err_msg << std::endl;
exit(EXIT_FAILURE);
}
}
inline std::optional<Token> try_consume(TokenType type)
{
if (peek().has_value() && peek().value().type == type) {
return consume();
} else {
return {};
}
}
inline Token consume()
{
return m_token[m_index++];
}
std::vector<Token> m_token;
size_t m_index = 0;
ArenaAllocator m_allocator;
};
generation.hpp
cpp
#pragma once
#include "parser.hpp"
#include <sstream>
#include <map>
#include <algorithm>
#include <cassert>
class Generator
{
public:
inline Generator(NodeProg prog)
:m_prog(std::move(prog))
{
}
void gen_term (const NodeTerm* term)
{
struct TermVisitor {
Generator& gen;
void operator()(const NodeTermIntLit* term_int_lit) const
{
gen.m_output << " mov rax, " << term_int_lit->int_lit.value.value() << "\n";
gen.push("rax");
}
void operator()(const NodeTermIdent* term_ident) const
{
auto it = std::find_if(gen.m_vars.cbegin(), gen.m_vars.cend(),
[&](const Var& var){return var.name == term_ident->ident.value.value();});
if (it == gen.m_vars.cend()) {
std::cerr << "Undeclared identifier: " << term_ident->ident.value.value() << std::endl;
exit(EXIT_FAILURE);
}
std::stringstream offset;
offset << "QWORD [rsp + " << (gen.m_stack_size - (*it).stack_loc - 1) * 8 << "]\n";
gen.push(offset.str());
}
void operator()(const NodeTermParen* term_paren) const
{
gen.gen_expr(term_paren->expr);
}
};
TermVisitor visitor{.gen = *this};
std::visit(visitor, term->var);
}
void gen_bin_expr (const NodeBinExpr* bin_expr)
{
struct BinExprVisitor {
Generator& gen;
void operator()(const NodeBinExprSub* sub)
{
gen.gen_expr(sub->rhs);
gen.gen_expr(sub->lhs);
gen.pop("rax");
gen.pop("rbx");
gen.m_output << " sub rax, rbx\n";
gen.push("rax");
}
void operator()(const NodeBinExprDiv* div)
{
gen.gen_expr(div->rhs);
gen.gen_expr(div->lhs);
gen.pop("rax");
gen.pop("rbx");
gen.m_output << " div rbx\n";
gen.push("rax");
}
void operator()(const NodeBinExprAdd* add)
{
gen.gen_expr(add->rhs);
gen.gen_expr(add->lhs);
gen.pop("rax");
gen.pop("rbx");
gen.m_output << " add rax, rbx\n";
gen.push("rax");
}
void operator()(const NodeBinExprMulti* multi)
{
gen.gen_expr(multi->rhs);
gen.gen_expr(multi->lhs);
gen.pop("rax");
gen.pop("rbx");
gen.m_output << " mul rbx\n";
gen.push("rax");
}
};
BinExprVisitor visitor{.gen = *this};
std::visit(visitor, bin_expr->var);
}
void gen_expr(const NodeExpr* expr)
{
struct ExprVisitor {
Generator& gen;
void operator()(const NodeTerm* term)
{
gen.gen_term(term);
}
void operator()(const NodeBinExpr* bin_expr)
{
gen.gen_bin_expr(bin_expr);
}
};
ExprVisitor visitor{.gen = *this};
std::visit(visitor, expr->var);
}
void gen_scope(const NodeScope* scope)
{
begin_scope();
for (const NodeStmt* stmt : scope->stmts) {
gen_stmt(stmt);
}
end_scope();
}
void gen_stmt(const NodeStmt* stmt)
{
struct StmtVisitor {
Generator& gen;
void operator()(const NodeStmtExit* stmt_exit)
{
gen.gen_expr(stmt_exit->expr);
gen.m_output << " mov rax, 60\n";
gen.pop("rdi");
gen.m_output << " syscall\n";
}
void operator()(const NodeStmtLet* stmt_let)
{
auto it = std::find_if(gen.m_vars.cbegin(), gen.m_vars.cend(),
[&](const Var& var){return var.name == stmt_let->ident.value.value();});
if (it != gen.m_vars.cend()) {
std::cerr << "Identifier already used : " << stmt_let->ident.value.value() << std::endl;
exit(EXIT_FAILURE);
}
gen.m_vars.push_back({.name = stmt_let->ident.value.value(), .stack_loc = gen.m_stack_size});
gen.gen_expr(stmt_let->expr);
}
void operator()(const NodeScope* scope) const
{
gen.gen_scope(scope);
}
void operator()(const NodeStmtIf* stmt_if)
{
gen.gen_expr(stmt_if->expr);
gen.pop("rax");
std::string label = gen.create_label();
gen.m_output << " test rax, rax\n";
gen.m_output << " jz " << label << "\n";
gen.gen_scope(stmt_if->scope);
gen.m_output << label << ":\n";
}
};
StmtVisitor visitor{.gen = *this};
std::visit(visitor, stmt->var);
}
[[nodiscard]] std::string gen_prog()
{
m_output << "global _start\n_start\n";
for (const NodeStmt* stmt : m_prog.stmts) {
gen_stmt(stmt);
}
return m_output.str();
}
private:
void push (const std::string& reg)
{
m_output << " push " << reg << "\n";
m_stack_size++;
}
void pop (const std::string& reg)
{
m_output << " pop " << reg << "\n";
m_stack_size--;
}
void begin_scope()
{
m_scopes.push_back(m_vars.size());
}
void end_scope()
{
size_t pop_count = m_vars.size() - m_scopes.back();
m_output << " add rsp, " << pop_count * 8 << "\n";
m_stack_size -= pop_count;
for (int i = 0; i < pop_count; i++) {
m_vars.pop_back();
}
m_scopes.pop_back();
}
std::string create_label()
{
return "label" + std::to_string(m_label_count);
}
struct Var {
std::string name;
size_t stack_loc;
};
NodeProg m_prog;
std::stringstream m_output;
size_t m_stack_size = 0;
std::vector<Var> m_vars {};
std::vector<size_t> m_scopes {};
int m_label_count = 0;
};
read.txt
cpp
let y = (10 - 2 * 3) / 2;
let x = 0;
if (x + 1) {
exit(69);
}
exit(1);