1 #ifndef TOKEN_H 2 #define TOKEN_H 3 /* 4 * Basic tokenization structures. NOTE! Those tokens had better 5 * be pretty small, since we're going to keep them all in memory 6 * indefinitely. 7 * 8 * Copyright (C) 2003 Transmeta Corp. 9 * 2003 Linus Torvalds 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this software and associated documentation files (the "Software"), to deal 13 * in the Software without restriction, including without limitation the rights 14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 * copies of the Software, and to permit persons to whom the Software is 16 * furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 * THE SOFTWARE. 28 */ 29 30 #include <sys/types.h> 31 #include "lib.h" 32 33 /* 34 * This describes the pure lexical elements (tokens), with 35 * no semantic meaning. In other words, an identifier doesn't 36 * have a type or meaning, it is only a specific string in 37 * the input stream. 38 * 39 * Semantic meaning is handled elsewhere. 40 */ 41 42 enum constantfile { 43 CONSTANT_FILE_MAYBE, // To be determined, not inside any #ifs in this file 44 CONSTANT_FILE_IFNDEF, // To be determined, currently inside #ifndef 45 CONSTANT_FILE_NOPE, // No 46 CONSTANT_FILE_YES // Yes 47 }; 48 49 extern const char *includepath[]; 50 51 struct stream { 52 int fd; 53 const char *name; 54 const char *path; // input-file path - see set_stream_include_path() 55 const char **next_path; 56 57 /* Use these to check for "already parsed" */ 58 enum constantfile constant; 59 int dirty, next_stream, once; 60 struct ident *protect; 61 struct token *ifndef; 62 struct token *top_if; 63 }; 64 65 extern int input_stream_nr; 66 extern struct stream *input_streams; 67 extern unsigned int tabstop; 68 extern int no_lineno; 69 extern int *hash_stream(const char *name); 70 71 struct ident { 72 struct ident *next; /* Hash chain of identifiers */ 73 struct symbol *symbols; /* Pointer to semantic meaning list */ 74 unsigned char len; /* Length of identifier name */ 75 unsigned char tainted:1, 76 reserved:1, 77 keyword:1; 78 char name[]; /* Actual identifier */ 79 }; 80 81 enum token_type { 82 TOKEN_EOF, 83 TOKEN_BAD, 84 TOKEN_ERROR, 85 TOKEN_IDENT, 86 TOKEN_ZERO_IDENT, 87 TOKEN_NUMBER, 88 TOKEN_CHAR, 89 TOKEN_CHAR_EMBEDDED_0, 90 TOKEN_CHAR_EMBEDDED_1, 91 TOKEN_CHAR_EMBEDDED_2, 92 TOKEN_CHAR_EMBEDDED_3, 93 TOKEN_WIDE_CHAR, 94 TOKEN_WIDE_CHAR_EMBEDDED_0, 95 TOKEN_WIDE_CHAR_EMBEDDED_1, 96 TOKEN_WIDE_CHAR_EMBEDDED_2, 97 TOKEN_WIDE_CHAR_EMBEDDED_3, 98 TOKEN_STRING, 99 TOKEN_WIDE_STRING, 100 TOKEN_SPECIAL, 101 TOKEN_STREAMBEGIN, 102 TOKEN_STREAMEND, 103 TOKEN_MACRO_ARGUMENT, 104 TOKEN_STR_ARGUMENT, 105 TOKEN_QUOTED_ARGUMENT, 106 TOKEN_CONCAT, 107 TOKEN_GNU_KLUDGE, 108 TOKEN_UNTAINT, 109 TOKEN_ARG_COUNT, 110 TOKEN_IF, 111 TOKEN_SKIP_GROUPS, 112 TOKEN_ELSE, 113 }; 114 115 /* Combination tokens */ 116 #define COMBINATION_STRINGS { \ 117 "+=", "++", \ 118 "-=", "--", "->", \ 119 "*=", \ 120 "/=", \ 121 "%=", \ 122 "<=", ">=", \ 123 "==", "!=", \ 124 "&&", "&=", \ 125 "||", "|=", \ 126 "^=", "##", \ 127 "<<", ">>", "..", \ 128 "<<=", ">>=", "...", \ 129 "", \ 130 "<", ">", "<=", ">=" \ 131 } 132 133 extern unsigned char combinations[][4]; 134 135 enum special_token { 136 SPECIAL_BASE = 256, 137 SPECIAL_ADD_ASSIGN = SPECIAL_BASE, 138 SPECIAL_INCREMENT, 139 SPECIAL_SUB_ASSIGN, 140 SPECIAL_DECREMENT, 141 SPECIAL_DEREFERENCE, 142 SPECIAL_MUL_ASSIGN, 143 SPECIAL_DIV_ASSIGN, 144 SPECIAL_MOD_ASSIGN, 145 SPECIAL_LTE, 146 SPECIAL_GTE, 147 SPECIAL_EQUAL, 148 SPECIAL_NOTEQUAL, 149 SPECIAL_LOGICAL_AND, 150 SPECIAL_AND_ASSIGN, 151 SPECIAL_LOGICAL_OR, 152 SPECIAL_OR_ASSIGN, 153 SPECIAL_XOR_ASSIGN, 154 SPECIAL_HASHHASH, 155 SPECIAL_LEFTSHIFT, 156 SPECIAL_RIGHTSHIFT, 157 SPECIAL_DOTDOT, 158 SPECIAL_SHL_ASSIGN, 159 SPECIAL_SHR_ASSIGN, 160 SPECIAL_ELLIPSIS, 161 SPECIAL_ARG_SEPARATOR, 162 SPECIAL_UNSIGNED_LT, 163 SPECIAL_UNSIGNED_GT, 164 SPECIAL_UNSIGNED_LTE, 165 SPECIAL_UNSIGNED_GTE, 166 }; 167 168 struct string { 169 unsigned int length:31; 170 unsigned int immutable:1; 171 char data[]; 172 }; 173 174 /* will fit into 32 bits */ 175 struct argcount { 176 unsigned normal:10; 177 unsigned quoted:10; 178 unsigned str:10; 179 unsigned vararg:1; 180 }; 181 182 /* 183 * This is a very common data structure, it should be kept 184 * as small as humanly possible. Big (rare) types go as 185 * pointers. 186 */ 187 struct token { 188 struct position pos; 189 struct token *next; 190 union { 191 const char *number; 192 struct ident *ident; 193 unsigned int special; 194 struct string *string; 195 int argnum; 196 struct argcount count; 197 char embedded[4]; 198 }; 199 }; 200 201 #define MAX_STRING 8191 202 203 static inline struct token *containing_token(struct token **p) 204 { 205 void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0); 206 return addr; 207 } 208 209 #define token_type(x) ((x)->pos.type) 210 211 /* 212 * Last token in the stream - points to itself. 213 * This allows us to not test for NULL pointers 214 * when following the token->next chain.. 215 */ 216 extern struct token eof_token_entry; 217 #define eof_token(x) ((x) == &eof_token_entry) 218 219 extern int init_stream(const char *, int fd, const char **next_path); 220 extern const char *stream_name(int stream); 221 extern struct ident *hash_ident(struct ident *); 222 extern struct ident *built_in_ident(const char *); 223 extern struct token *built_in_token(int, struct ident *); 224 extern const char *show_special(int); 225 extern const char *show_ident(const struct ident *); 226 extern const char *show_string(const struct string *string); 227 extern const char *show_token(const struct token *); 228 extern const char *quote_token(const struct token *); 229 extern struct token * tokenize(const char *, int, struct token *, const char **next_path); 230 extern struct token * tokenize_buffer(void *, unsigned long, struct token **); 231 232 extern void show_identifier_stats(void); 233 extern void init_include_path(void); 234 extern struct token *preprocess(struct token *); 235 236 extern void store_all_tokens(struct token *token); 237 extern struct token *pos_get_token(struct position pos); 238 extern char *pos_ident(struct position pos); 239 240 extern void store_macro_pos(struct token *); 241 extern char *get_macro_name(struct position pos); 242 extern char *get_inner_macro(struct position pos); 243 extern struct string_list *get_all_macros(struct position pos); 244 245 static inline int match_op(struct token *token, unsigned int op) 246 { 247 return token->pos.type == TOKEN_SPECIAL && token->special == op; 248 } 249 250 static inline int match_ident(struct token *token, struct ident *id) 251 { 252 return token->pos.type == TOKEN_IDENT && token->ident == id; 253 } 254 255 #endif