1#pragma once
2
3#include "utils.h"
4
5#include <cctype>
6#include <map>
7#include <stdexcept>
8#include <string>
9#include <vector>
10
11namespace jinja {
12
13struct token {
14 enum type {
15 eof, // end of source
16 text, // The text between Jinja statements or expressions
17
18 numeric_literal, // e.g., 123, 1.0
19 string_literal, // 'string'
20 identifier, // Variables, functions, statements, booleans, etc.
21 equals, // =
22 open_paren, // (
23 close_paren, // )
24 open_statement, // {%
25 close_statement, // %}
26 open_expression, // {{
27 close_expression, // }}
28 open_square_bracket, // [
29 close_square_bracket, // ]
30 open_curly_bracket, // {
31 close_curly_bracket, // }
32 comma, // ,
33 dot, // .
34 colon, // :
35 pipe, // |
36
37 call_operator, // ()
38 additive_binary_operator, // + - ~
39 multiplicative_binary_operator, // * / %
40 comparison_binary_operator, // < > <= >= == !=
41 unary_operator, // ! - +
42 comment, // {# ... #}
43 };
44 type t;
45 std::string value;
46 size_t pos;
47};
48
49static std::string type_to_string(token::type t) {
50 switch (t) {
51 case token::eof: return "eof";
52 case token::text: return "text";
53 case token::numeric_literal: return "numeric_literal";
54 case token::string_literal: return "string_literal";
55 case token::identifier: return "identifier";
56 case token::equals: return "equals";
57 case token::open_paren: return "open_paren";
58 case token::close_paren: return "close_paren";
59 case token::open_statement: return "open_statement";
60 case token::close_statement: return "close_statement";
61 case token::open_expression: return "open_expression";
62 case token::close_expression: return "close_expression";
63 case token::open_square_bracket: return "open_square_bracket";
64 case token::close_square_bracket: return "close_square_bracket";
65 case token::open_curly_bracket: return "open_curly_bracket";
66 case token::close_curly_bracket: return "close_curly_bracket";
67 case token::comma: return "comma";
68 case token::dot: return "dot";
69 case token::colon: return "colon";
70 case token::pipe: return "pipe";
71 case token::call_operator: return "call_operator";
72 case token::additive_binary_operator: return "additive_binary_operator";
73 case token::multiplicative_binary_operator: return "multiplicative_binary_operator";
74 case token::comparison_binary_operator: return "comparison_binary_operator";
75 case token::unary_operator: return "unary_operator";
76 case token::comment: return "comment";
77 default: return "unknown";
78 }
79}
80
81struct lexer_result {
82 std::vector<token> tokens;
83 std::string source;
84};
85
86struct lexer {
87 const std::map<char, char> escape_chars = {
88 {'n', '\n'},
89 {'t', '\t'},
90 {'r', '\r'},
91 {'b', '\b'},
92 {'f', '\f'},
93 {'v', '\v'},
94 {'\\', '\\'},
95 {'\'', '\''},
96 {'\"', '\"'},
97 };
98
99 static bool is_word(char c) {
100 return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
101 }
102
103 static bool is_integer(char c) {
104 return std::isdigit(static_cast<unsigned char>(c));
105 }
106
107 const std::vector<std::pair<std::string, token::type>> ordered_mapping_table = {
108 // Trimmed control sequences
109 {"{%-", token::open_statement},
110 {"-%}", token::close_statement},
111 {"{{-", token::open_expression},
112 {"-}}", token::close_expression},
113 // Control sequences
114 {"{%", token::open_statement},
115 {"%}", token::close_statement},
116 {"{{", token::open_expression},
117 {"}}", token::close_expression},
118 // Single character tokens
119 {"(", token::open_paren},
120 {")", token::close_paren},
121 {"{", token::open_curly_bracket},
122 {"}", token::close_curly_bracket},
123 {"[", token::open_square_bracket},
124 {"]", token::close_square_bracket},
125 {",", token::comma},
126 {".", token::dot},
127 {":", token::colon},
128 {"|", token::pipe},
129 // Comparison operators
130 {"<=", token::comparison_binary_operator},
131 {">=", token::comparison_binary_operator},
132 {"==", token::comparison_binary_operator},
133 {"!=", token::comparison_binary_operator},
134 {"<", token::comparison_binary_operator},
135 {">", token::comparison_binary_operator},
136 // Arithmetic operators
137 {"+", token::additive_binary_operator},
138 {"-", token::additive_binary_operator},
139 {"~", token::additive_binary_operator},
140 {"*", token::multiplicative_binary_operator},
141 {"/", token::multiplicative_binary_operator},
142 {"%", token::multiplicative_binary_operator},
143 // Assignment operator
144 {"=", token::equals},
145 };
146
147 // tokenize the source string into a list of tokens
148 // may throw lexer_exception on error
149 lexer_result tokenize(const std::string & source);
150};
151
152struct lexer_exception : public std::runtime_error {
153 lexer_exception(const std::string & msg, const std::string & source, size_t pos)
154 : std::runtime_error(fmt_error_with_source("lexer", msg, source, pos)) {}
155};
156
157} // namespace jinja