Print this page
OS-1723 DTrace should speak JSON (review fixes)

*** 877,886 **** --- 877,890 ---- return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); } /* * Convert a string to a signed integer using safe loads. + * + * NOTE: This function uses various macros from strtolctype.h to manipulate + * digit values, etc -- these have all been checked to ensure they make + * no additional function calls. */ static int64_t dtrace_strtoll(char *input, int base, size_t limit) { uintptr_t pos = (uintptr_t)input;
*** 888,916 **** int x; boolean_t neg = B_FALSE; char c, cc, ccc; uintptr_t end = pos + limit; ! /* eat whitespace */ while ((c = dtrace_load8(pos)) == ' ' || c == '\t') pos++; ! /* sign? */ if (c == '-' || c == '+') { if (c == '-') neg = B_TRUE; c = dtrace_load8(++pos); } ! /* hex prefix? */ if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { ! pos += 2; /* skip over leading "0x" or "0X" */ c = ccc; } ! /* read in digits */ for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; c = dtrace_load8(++pos)) val = val * base + x; return (neg ? -val : val); --- 892,929 ---- int x; boolean_t neg = B_FALSE; char c, cc, ccc; uintptr_t end = pos + limit; ! /* ! * Consume any whitespace preceding digits. ! */ while ((c = dtrace_load8(pos)) == ' ' || c == '\t') pos++; ! /* ! * Handle an explicit sign if one is present. ! */ if (c == '-' || c == '+') { if (c == '-') neg = B_TRUE; c = dtrace_load8(++pos); } ! /* ! * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it ! * if present. ! */ if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { ! pos += 2; c = ccc; } ! /* ! * Read in contiguous digits until the first non-digit character. ! */ for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; c = dtrace_load8(++pos)) val = val * base + x; return (neg ? -val : val);
*** 3392,3416 **** return (0); } } ! typedef enum json_state { ! JSON_REST = 1, ! JSON_OBJECT, ! JSON_STRING, ! JSON_STRING_ESCAPE, ! JSON_STRING_ESCAPE_UNICODE, ! JSON_COLON, ! JSON_COMMA, ! JSON_VALUE, ! JSON_IDENTIFIER, ! JSON_NUMBER, ! JSON_NUMBER_FRAC, ! JSON_NUMBER_EXP, ! JSON_COLLECT_OBJECT ! } json_state_t; /* * This function possesses just enough knowledge about JSON to extract a single * value from a JSON string and store it in the scratch buffer. It is able * to extract nested object values, and members of arrays by index. --- 3405,3429 ---- return (0); } } ! typedef enum dtrace_json_state { ! DTRACE_JSON_REST = 1, ! DTRACE_JSON_OBJECT, ! DTRACE_JSON_STRING, ! DTRACE_JSON_STRING_ESCAPE, ! DTRACE_JSON_STRING_ESCAPE_UNICODE, ! DTRACE_JSON_COLON, ! DTRACE_JSON_COMMA, ! DTRACE_JSON_VALUE, ! DTRACE_JSON_IDENTIFIER, ! DTRACE_JSON_NUMBER, ! DTRACE_JSON_NUMBER_FRAC, ! DTRACE_JSON_NUMBER_EXP, ! DTRACE_JSON_COLLECT_OBJECT ! } dtrace_json_state_t; /* * This function possesses just enough knowledge about JSON to extract a single * value from a JSON string and store it in the scratch buffer. It is able * to extract nested object values, and members of arrays by index.
*** 3418,3488 **** * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to * be looked up as we descend into the object tree. e.g. * * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL * with nelems = 5. */ static char * dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, char *dest) { ! json_state_t state = JSON_REST; uint64_t i; int64_t array_elem = INT64_MIN; int64_t array_pos = 0; uint8_t escape_unicount = 0; boolean_t string_is_key = B_FALSE; boolean_t collect_object = B_FALSE; boolean_t found_key = B_FALSE; boolean_t in_array = B_FALSE; ! uint8_t braces = 0, brackets = 0; char *elem = elemlist; char *dd = dest; uintptr_t cur; for (cur = json; cur < json + size; cur++) { char cc = dtrace_load8(cur); ! if (cc == '\0' || braces > 250) return (NULL); switch (state) { ! case JSON_REST: ! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r') ! break; /* eat whitespace */ if (cc == '{') { ! state = JSON_OBJECT; break; } if (cc == '[') { in_array = B_TRUE; array_pos = 0; array_elem = dtrace_strtoll(elem, 10, size); ! found_key = !!(array_elem == 0); ! state = JSON_VALUE; break; } ! /* ERROR: expected object or array */ return (NULL); ! case JSON_OBJECT: ! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r') ! break; /* eat whitespace */ if (cc == '"') { ! state = JSON_STRING; string_is_key = B_TRUE; break; } ! /* ERROR: key not found! */ return (NULL); ! case JSON_STRING: if (cc == '\\') { *dd++ = '\\'; ! state = JSON_STRING_ESCAPE; break; } if (cc == '"') { if (collect_object) { --- 3431,3573 ---- * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to * be looked up as we descend into the object tree. e.g. * * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL * with nelems = 5. + * + * The run time of this function must be bounded above by strsize to limit the + * amount of work done in probe context. As such, it is implemented as a + * simple state machine, reading one character at a time using safe loads + * until we find the requested element, hit a parsing error or run off the + * end of the object or string. + * + * As there is no way for a subroutine to return an error without interrupting + * clause execution, we simply return NULL in the event of a missing key or any + * other error condition. Each NULL return in this function is commented with + * the error condition it represents -- parsing or otherwise. + * + * The set of states for the state machine closely matches the JSON + * specification (http://json.org/). Briefly: + * + * DTRACE_JSON_REST: + * Skip whitespace until we find either a top-level Object, moving + * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE. + * + * DTRACE_JSON_OBJECT: + * Locate the next key String in an Object. Sets a flag to denote + * the next String as a key string and moves to DTRACE_JSON_STRING. + * + * DTRACE_JSON_COLON: + * Skip whitespace until we find the colon that separates key Strings + * from their values. Once found, move to DTRACE_JSON_VALUE. + * + * DTRACE_JSON_VALUE: + * Detects the type of the next value (String, Number, Identifier, Object + * or Array) and routes to the states that process that type. Here we also + * deal with the element selector list if we are requested to traverse down + * into the object tree. + * + * DTRACE_JSON_COMMA: + * Skip whitespace until we find the comma that separates key-value pairs + * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays + * (similarly DTRACE_JSON_VALUE). All following literal value processing + * states return to this state at the end of their value, unless otherwise + * noted. + * + * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP: + * Processes a Number literal from the JSON, including any exponent + * component that may be present. Numbers are returned as strings, which + * may be passed to strtoll() if an integer is required. + * + * DTRACE_JSON_IDENTIFIER: + * Processes a "true", "false" or "null" literal in the JSON. + * + * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, + * DTRACE_JSON_STRING_ESCAPE_UNICODE: + * Processes a String literal from the JSON, whether the String denotes + * a key, a value or part of a larger Object. Handles all escape sequences + * present in the specification, including four-digit unicode characters, + * but merely includes the escape sequence without converting it to the + * actual escaped character. If the String is flagged as a key, we + * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA. + * + * DTRACE_JSON_COLLECT_OBJECT: + * This state collects an entire Object (or Array), correctly handling + * embedded strings. If the full element selector list matches this nested + * object, we return the Object in full as a string. If not, we use this + * state to skip to the next value at this level and continue processing. + * + * NOTE: This function uses various macros from strtolctype.h to manipulate + * digit values, etc -- these have all been checked to ensure they make + * no additional function calls. */ static char * dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, char *dest) { ! dtrace_json_state_t state = DTRACE_JSON_REST; uint64_t i; int64_t array_elem = INT64_MIN; int64_t array_pos = 0; uint8_t escape_unicount = 0; boolean_t string_is_key = B_FALSE; boolean_t collect_object = B_FALSE; boolean_t found_key = B_FALSE; boolean_t in_array = B_FALSE; ! uint32_t braces = 0, brackets = 0; char *elem = elemlist; char *dd = dest; uintptr_t cur; for (cur = json; cur < json + size; cur++) { char cc = dtrace_load8(cur); ! if (cc == '\0') return (NULL); switch (state) { ! case DTRACE_JSON_REST: ! if (isspace(cc)) ! break; if (cc == '{') { ! state = DTRACE_JSON_OBJECT; break; } if (cc == '[') { in_array = B_TRUE; array_pos = 0; array_elem = dtrace_strtoll(elem, 10, size); ! found_key = array_elem == 0 ? B_TRUE : B_FALSE; ! state = DTRACE_JSON_VALUE; break; } ! /* ! * ERROR: expected to find a top-level object or array. ! */ return (NULL); ! case DTRACE_JSON_OBJECT: ! if (isspace(cc)) ! break; if (cc == '"') { ! state = DTRACE_JSON_STRING; string_is_key = B_TRUE; break; } ! /* ! * ERROR: either the object did not start with a key ! * string, or we've run off the end of the object ! * without finding the requested key. ! */ return (NULL); ! case DTRACE_JSON_STRING: if (cc == '\\') { *dd++ = '\\'; ! state = DTRACE_JSON_STRING_ESCAPE; break; } if (cc == '"') { if (collect_object) {
*** 3491,3501 **** * the string is part of a larger * object being collected. */ *dd++ = cc; collect_object = B_FALSE; ! state = JSON_COLLECT_OBJECT; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (string_is_key) { --- 3576,3586 ---- * the string is part of a larger * object being collected. */ *dd++ = cc; collect_object = B_FALSE; ! state = DTRACE_JSON_COLLECT_OBJECT; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (string_is_key) {
*** 3510,3575 **** */ return (NULL); } return (dest); } ! state = string_is_key ? JSON_COLON : ! JSON_COMMA; string_is_key = B_FALSE; break; } *dd++ = cc; break; ! case JSON_STRING_ESCAPE: *dd++ = cc; if (cc == 'u') { escape_unicount = 0; ! state = JSON_STRING_ESCAPE_UNICODE; } else { ! state = JSON_STRING; } break; ! case JSON_STRING_ESCAPE_UNICODE: ! if (!isxdigit(cc)) ! /* ERROR: unvalid unicode escape */ return (NULL); *dd++ = cc; if (++escape_unicount == 4) ! state = JSON_STRING; break; ! case JSON_COLON: ! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r') ! break; /* eat whitespace */ if (cc == ':') { ! state = JSON_VALUE; break; } ! /* ERROR: expected colon */ return (NULL); ! case JSON_COMMA: ! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r') ! break; /* eat whitespace */ if (cc == ',') { if (in_array) { ! state = JSON_VALUE; if (++array_pos == array_elem) found_key = B_TRUE; } else { ! state = JSON_OBJECT; } break; } ! /* ERROR: key not found or expected comma */ return (NULL); ! case JSON_IDENTIFIER: ! if (cc >= 'a' && cc <= 'z') { *dd++ = cc; break; } *dd = '\0'; --- 3595,3670 ---- */ return (NULL); } return (dest); } ! state = string_is_key ? DTRACE_JSON_COLON : ! DTRACE_JSON_COMMA; string_is_key = B_FALSE; break; } *dd++ = cc; break; ! case DTRACE_JSON_STRING_ESCAPE: *dd++ = cc; if (cc == 'u') { escape_unicount = 0; ! state = DTRACE_JSON_STRING_ESCAPE_UNICODE; } else { ! state = DTRACE_JSON_STRING; } break; ! case DTRACE_JSON_STRING_ESCAPE_UNICODE: ! if (!isxdigit(cc)) { ! /* ! * ERROR: invalid unicode escape, expected ! * four valid hexidecimal digits. ! */ return (NULL); + } *dd++ = cc; if (++escape_unicount == 4) ! state = DTRACE_JSON_STRING; break; ! case DTRACE_JSON_COLON: ! if (isspace(cc)) ! break; if (cc == ':') { ! state = DTRACE_JSON_VALUE; break; } ! /* ! * ERROR: expected a colon. ! */ return (NULL); ! case DTRACE_JSON_COMMA: ! if (isspace(cc)) ! break; if (cc == ',') { if (in_array) { ! state = DTRACE_JSON_VALUE; if (++array_pos == array_elem) found_key = B_TRUE; } else { ! state = DTRACE_JSON_OBJECT; } break; } ! /* ! * ERROR: either we hit an unexpected character, or ! * we reached the end of the object or array without ! * finding the requested key. ! */ return (NULL); ! case DTRACE_JSON_IDENTIFIER: ! if (islower(cc)) { *dd++ = cc; break; } *dd = '\0';
*** 3579,3659 **** dtrace_strncmp(dest, "false", 6) == 0 || dtrace_strncmp(dest, "null", 5) == 0) { if (found_key) { if (nelems > 1) { /* ! * We expected an object, not ! * this identifier. */ return (NULL); } return (dest); } else { cur--; ! state = JSON_COMMA; break; } } ! /* ERROR: unexpected identifier */ return (NULL); ! case JSON_NUMBER: if (cc == '.') { *dd++ = cc; ! state = JSON_NUMBER_FRAC; break; } ! if (cc == 'x' || cc == 'X') ! /* ERROR: spec explicitly excludes hex */ return (NULL); /* FALLTHRU */ ! case JSON_NUMBER_FRAC: if (cc == 'e' || cc == 'E') { *dd++ = cc; ! state = JSON_NUMBER_EXP; break; } if (cc == '+' || cc == '-') { /* ! * ERROR: expect sign as part of exponent only */ return (NULL); } /* FALLTHRU */ ! case JSON_NUMBER_EXP: ! if ((cc >= '0' && cc <= '9') || cc == '+' || ! cc == '-') { *dd++ = cc; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (found_key) { if (nelems > 1) { /* ! * We expected an object, not this ! * number. */ return (NULL); } return (dest); } cur--; ! state = JSON_COMMA; break; ! case JSON_VALUE: ! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r') ! break; /* eat whitespace */ if (cc == '{' || cc == '[') { if (nelems > 1 && found_key) { ! in_array = !!(cc == '['); /* * If our element selector directs us * to descend into this nested object, * then move to the next selector * element in the list and restart the --- 3674,3760 ---- dtrace_strncmp(dest, "false", 6) == 0 || dtrace_strncmp(dest, "null", 5) == 0) { if (found_key) { if (nelems > 1) { /* ! * ERROR: We expected an object, ! * not this identifier. */ return (NULL); } return (dest); } else { cur--; ! state = DTRACE_JSON_COMMA; break; } } ! /* ! * ERROR: we did not recognise the identifier as one ! * of those in the JSON specification. ! */ return (NULL); ! case DTRACE_JSON_NUMBER: if (cc == '.') { *dd++ = cc; ! state = DTRACE_JSON_NUMBER_FRAC; break; } ! if (cc == 'x' || cc == 'X') { ! /* ! * ERROR: specification explicitly excludes ! * hexidecimal or octal numbers. ! */ return (NULL); + } /* FALLTHRU */ ! case DTRACE_JSON_NUMBER_FRAC: if (cc == 'e' || cc == 'E') { *dd++ = cc; ! state = DTRACE_JSON_NUMBER_EXP; break; } if (cc == '+' || cc == '-') { /* ! * ERROR: expect sign as part of exponent only. */ return (NULL); } /* FALLTHRU */ ! case DTRACE_JSON_NUMBER_EXP: ! if (isdigit(cc) || cc == '+' || cc == '-') { *dd++ = cc; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (found_key) { if (nelems > 1) { /* ! * ERROR: We expected an object, not ! * this number. */ return (NULL); } return (dest); } cur--; ! state = DTRACE_JSON_COMMA; break; ! case DTRACE_JSON_VALUE: ! if (isspace(cc)) ! break; if (cc == '{' || cc == '[') { if (nelems > 1 && found_key) { ! in_array = cc == '[' ? B_TRUE : B_FALSE; /* * If our element selector directs us * to descend into this nested object, * then move to the next selector * element in the list and restart the
*** 3663,3680 **** elem++; elem++; /* skip the inter-element NUL */ nelems--; dd = dest; if (in_array) { ! state = JSON_VALUE; array_pos = 0; array_elem = dtrace_strtoll( elem, 10, size); ! found_key = !!(array_elem == 0); } else { found_key = B_FALSE; ! state = JSON_OBJECT; } break; } /* --- 3764,3782 ---- elem++; elem++; /* skip the inter-element NUL */ nelems--; dd = dest; if (in_array) { ! state = DTRACE_JSON_VALUE; array_pos = 0; array_elem = dtrace_strtoll( elem, 10, size); ! found_key = array_elem == 0 ? ! B_TRUE : B_FALSE; } else { found_key = B_FALSE; ! state = DTRACE_JSON_OBJECT; } break; } /*
*** 3684,3737 **** if (cc == '[') brackets = 1; else braces = 1; *dd++ = cc; ! state = JSON_COLLECT_OBJECT; break; } if (cc == '"') { ! state = JSON_STRING; break; } ! if (cc >= 'a' && cc <= 'z') { ! /* Here we deal with true, false and null */ *dd++ = cc; ! state = JSON_IDENTIFIER; break; } ! if (cc == '-' || (cc >= '0' && cc <= '9')) { *dd++ = cc; ! state = JSON_NUMBER; break; } ! /* ERROR: unexpected character */ return (NULL); ! case JSON_COLLECT_OBJECT: if (cc == '\0') ! /* ERROR: unexpected end of input */ return (NULL); *dd++ = cc; if (cc == '"') { collect_object = B_TRUE; ! state = JSON_STRING; break; } if (cc == ']') { if (brackets-- == 0) { ! /* ERROR: unbalanced brackets */ return (NULL); } } else if (cc == '}') { if (braces-- == 0) { ! /* ERROR: unbalanced braces */ return (NULL); } } else if (cc == '{') { braces++; } else if (cc == '[') { --- 3786,3849 ---- if (cc == '[') brackets = 1; else braces = 1; *dd++ = cc; ! state = DTRACE_JSON_COLLECT_OBJECT; break; } if (cc == '"') { ! state = DTRACE_JSON_STRING; break; } ! if (islower(cc)) { ! /* ! * Here we deal with true, false and null. ! */ *dd++ = cc; ! state = DTRACE_JSON_IDENTIFIER; break; } ! if (cc == '-' || isdigit(cc)) { *dd++ = cc; ! state = DTRACE_JSON_NUMBER; break; } ! /* ! * ERROR: unexpected character at start of value. ! */ return (NULL); ! case DTRACE_JSON_COLLECT_OBJECT: if (cc == '\0') ! /* ! * ERROR: unexpected end of input. ! */ return (NULL); *dd++ = cc; if (cc == '"') { collect_object = B_TRUE; ! state = DTRACE_JSON_STRING; break; } if (cc == ']') { if (brackets-- == 0) { ! /* ! * ERROR: unbalanced brackets. ! */ return (NULL); } } else if (cc == '}') { if (braces-- == 0) { ! /* ! * ERROR: unbalanced braces. ! */ return (NULL); } } else if (cc == '{') { braces++; } else if (cc == '[') {
*** 3742,3752 **** if (found_key) { *dd = '\0'; return (dest); } dd = dest; /* reset string buffer */ ! state = JSON_COMMA; } break; } } return (NULL); --- 3854,3864 ---- if (found_key) { *dd = '\0'; return (dest); } dd = dest; /* reset string buffer */ ! state = DTRACE_JSON_COMMA; } break; } } return (NULL);
*** 4482,4494 **** * of strings. */ for (cur = elem; cur < elem + elemlen; cur++) { char cc = dtrace_load8(cur); ! if (cur == elem && cc == '[') ! /* first element selector may be an array */ continue; if (cc == ']') continue; if (cc == '.' || cc == '[') { --- 4594,4611 ---- * of strings. */ for (cur = elem; cur < elem + elemlen; cur++) { char cc = dtrace_load8(cur); ! if (cur == elem && cc == '[') { ! /* ! * If the first element selector key is ! * actually an array index then ignore the ! * bracket. ! */ continue; + } if (cc == ']') continue; if (cc == '.' || cc == '[') {