1 #ifndef TOKEN_H
   2 #define TOKEN_H
   3 /*
   4  * Basic tokenization structures. NOTE! Those tokens had better
   5  * be pretty small, since we're going to keep them all in memory
   6  * indefinitely.
   7  *
   8  * Copyright (C) 2003 Transmeta Corp.
   9  *               2003 Linus Torvalds
  10  *
  11  * Permission is hereby granted, free of charge, to any person obtaining a copy
  12  * of this software and associated documentation files (the "Software"), to deal
  13  * in the Software without restriction, including without limitation the rights
  14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15  * copies of the Software, and to permit persons to whom the Software is
  16  * furnished to do so, subject to the following conditions:
  17  *
  18  * The above copyright notice and this permission notice shall be included in
  19  * all copies or substantial portions of the Software.
  20  *
  21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27  * THE SOFTWARE.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include "lib.h"
  32 
  33 /*
  34  * This describes the pure lexical elements (tokens), with
  35  * no semantic meaning. In other words, an identifier doesn't
  36  * have a type or meaning, it is only a specific string in
  37  * the input stream.
  38  *
  39  * Semantic meaning is handled elsewhere.
  40  */
  41 
  42 enum constantfile {
  43   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
  44   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
  45   CONSTANT_FILE_NOPE,     // No
  46   CONSTANT_FILE_YES       // Yes
  47 };
  48 
  49 extern const char *includepath[];
  50 
  51 struct stream {
  52         int fd;
  53         const char *name;
  54         const char *path;    // input-file path - see set_stream_include_path()
  55         const char **next_path;
  56 
  57         /* Use these to check for "already parsed" */
  58         enum constantfile constant;
  59         int dirty, next_stream, once;
  60         struct ident *protect;
  61         struct token *ifndef;
  62         struct token *top_if;
  63 };
  64 
  65 extern int input_stream_nr;
  66 extern struct stream *input_streams;
  67 extern unsigned int tabstop;
  68 extern int no_lineno;
  69 extern int *hash_stream(const char *name);
  70 
  71 struct ident {
  72         struct ident *next;     /* Hash chain of identifiers */
  73         struct symbol *symbols; /* Pointer to semantic meaning list */
  74         unsigned char len;      /* Length of identifier name */
  75         unsigned char tainted:1,
  76                       reserved:1,
  77                       keyword:1;
  78         char name[];            /* Actual identifier */
  79 };
  80 
  81 enum token_type {
  82         TOKEN_EOF,
  83         TOKEN_BAD,
  84         TOKEN_ERROR,
  85         TOKEN_IDENT,
  86         TOKEN_ZERO_IDENT,
  87         TOKEN_NUMBER,
  88         TOKEN_CHAR,
  89         TOKEN_CHAR_EMBEDDED_0,
  90         TOKEN_CHAR_EMBEDDED_1,
  91         TOKEN_CHAR_EMBEDDED_2,
  92         TOKEN_CHAR_EMBEDDED_3,
  93         TOKEN_WIDE_CHAR,
  94         TOKEN_WIDE_CHAR_EMBEDDED_0,
  95         TOKEN_WIDE_CHAR_EMBEDDED_1,
  96         TOKEN_WIDE_CHAR_EMBEDDED_2,
  97         TOKEN_WIDE_CHAR_EMBEDDED_3,
  98         TOKEN_STRING,
  99         TOKEN_WIDE_STRING,
 100         TOKEN_SPECIAL,
 101         TOKEN_STREAMBEGIN,
 102         TOKEN_STREAMEND,
 103         TOKEN_MACRO_ARGUMENT,
 104         TOKEN_STR_ARGUMENT,
 105         TOKEN_QUOTED_ARGUMENT,
 106         TOKEN_CONCAT,
 107         TOKEN_GNU_KLUDGE,
 108         TOKEN_UNTAINT,
 109         TOKEN_ARG_COUNT,
 110         TOKEN_IF,
 111         TOKEN_SKIP_GROUPS,
 112         TOKEN_ELSE,
 113 };
 114 
 115 /* Combination tokens */
 116 #define COMBINATION_STRINGS {   \
 117         "+=", "++",             \
 118         "-=", "--", "->",    \
 119         "*=",                   \
 120         "/=",                   \
 121         "%=",                   \
 122         "<=", ">=",               \
 123         "==", "!=",             \
 124         "&&", "&=",         \
 125         "||", "|=",             \
 126         "^=", "##",             \
 127         "<<", ">>", "..",   \
 128         "<<=", ">>=", "...",        \
 129         "",                     \
 130         "<", ">", "<=", ">="        \
 131 }
 132 
 133 extern unsigned char combinations[][4];
 134 
 135 enum special_token {
 136         SPECIAL_BASE = 256,
 137         SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
 138         SPECIAL_INCREMENT,
 139         SPECIAL_SUB_ASSIGN,
 140         SPECIAL_DECREMENT,
 141         SPECIAL_DEREFERENCE,
 142         SPECIAL_MUL_ASSIGN,
 143         SPECIAL_DIV_ASSIGN,
 144         SPECIAL_MOD_ASSIGN,
 145         SPECIAL_LTE,
 146         SPECIAL_GTE,
 147         SPECIAL_EQUAL,
 148         SPECIAL_NOTEQUAL,
 149         SPECIAL_LOGICAL_AND,
 150         SPECIAL_AND_ASSIGN,
 151         SPECIAL_LOGICAL_OR,
 152         SPECIAL_OR_ASSIGN,
 153         SPECIAL_XOR_ASSIGN,
 154         SPECIAL_HASHHASH,
 155         SPECIAL_LEFTSHIFT,
 156         SPECIAL_RIGHTSHIFT,
 157         SPECIAL_DOTDOT,
 158         SPECIAL_SHL_ASSIGN,
 159         SPECIAL_SHR_ASSIGN,
 160         SPECIAL_ELLIPSIS,
 161         SPECIAL_ARG_SEPARATOR,
 162         SPECIAL_UNSIGNED_LT,
 163         SPECIAL_UNSIGNED_GT,
 164         SPECIAL_UNSIGNED_LTE,
 165         SPECIAL_UNSIGNED_GTE,
 166 };
 167 
 168 struct string {
 169         unsigned int length:31;
 170         unsigned int immutable:1;
 171         char data[];
 172 };
 173 
 174 /* will fit into 32 bits */
 175 struct argcount {
 176         unsigned normal:10;
 177         unsigned quoted:10;
 178         unsigned str:10;
 179         unsigned vararg:1;
 180 };
 181 
 182 /*
 183  * This is a very common data structure, it should be kept
 184  * as small as humanly possible. Big (rare) types go as
 185  * pointers.
 186  */
 187 struct token {
 188         struct position pos;
 189         struct token *next;
 190         union {
 191                 const char *number;
 192                 struct ident *ident;
 193                 unsigned int special;
 194                 struct string *string;
 195                 int argnum;
 196                 struct argcount count;
 197                 char embedded[4];
 198         };
 199 };
 200 
 201 #define MAX_STRING 8191
 202 
 203 static inline struct token *containing_token(struct token **p)
 204 {
 205         void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
 206         return addr;
 207 }
 208 
 209 #define token_type(x) ((x)->pos.type)
 210 
 211 /*
 212  * Last token in the stream - points to itself.
 213  * This allows us to not test for NULL pointers
 214  * when following the token->next chain..
 215  */
 216 extern struct token eof_token_entry;
 217 #define eof_token(x) ((x) == &eof_token_entry)
 218 
 219 extern int init_stream(const char *, int fd, const char **next_path);
 220 extern const char *stream_name(int stream);
 221 extern struct ident *hash_ident(struct ident *);
 222 extern struct ident *built_in_ident(const char *);
 223 extern struct token *built_in_token(int, struct ident *);
 224 extern const char *show_special(int);
 225 extern const char *show_ident(const struct ident *);
 226 extern const char *show_string(const struct string *string);
 227 extern const char *show_token(const struct token *);
 228 extern const char *quote_token(const struct token *);
 229 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 230 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 231 
 232 extern void show_identifier_stats(void);
 233 extern void init_include_path(void);
 234 extern struct token *preprocess(struct token *);
 235 
 236 extern void store_all_tokens(struct token *token);
 237 extern struct token *pos_get_token(struct position pos);
 238 extern char *pos_ident(struct position pos);
 239 
 240 extern void store_macro_pos(struct token *);
 241 extern char *get_macro_name(struct position pos);
 242 extern char *get_inner_macro(struct position pos);
 243 extern struct string_list *get_all_macros(struct position pos);
 244 
 245 static inline int match_op(struct token *token, unsigned int op)
 246 {
 247         return token->pos.type == TOKEN_SPECIAL && token->special == op;
 248 }
 249 
 250 static inline int match_ident(struct token *token, struct ident *id)
 251 {
 252         return token->pos.type == TOKEN_IDENT && token->ident == id;
 253 }
 254 
 255 #endif