Print this page
OS-1723 DTrace should speak JSON (review fixes)
*** 877,886 ****
--- 877,890 ----
return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
}
/*
* Convert a string to a signed integer using safe loads.
+ *
+ * NOTE: This function uses various macros from strtolctype.h to manipulate
+ * digit values, etc -- these have all been checked to ensure they make
+ * no additional function calls.
*/
static int64_t
dtrace_strtoll(char *input, int base, size_t limit)
{
uintptr_t pos = (uintptr_t)input;
*** 888,916 ****
int x;
boolean_t neg = B_FALSE;
char c, cc, ccc;
uintptr_t end = pos + limit;
! /* eat whitespace */
while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
pos++;
! /* sign? */
if (c == '-' || c == '+') {
if (c == '-')
neg = B_TRUE;
c = dtrace_load8(++pos);
}
! /* hex prefix? */
if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
! pos += 2; /* skip over leading "0x" or "0X" */
c = ccc;
}
! /* read in digits */
for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
c = dtrace_load8(++pos))
val = val * base + x;
return (neg ? -val : val);
--- 892,929 ----
int x;
boolean_t neg = B_FALSE;
char c, cc, ccc;
uintptr_t end = pos + limit;
! /*
! * Consume any whitespace preceding digits.
! */
while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
pos++;
! /*
! * Handle an explicit sign if one is present.
! */
if (c == '-' || c == '+') {
if (c == '-')
neg = B_TRUE;
c = dtrace_load8(++pos);
}
! /*
! * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
! * if present.
! */
if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
! pos += 2;
c = ccc;
}
! /*
! * Read in contiguous digits until the first non-digit character.
! */
for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
c = dtrace_load8(++pos))
val = val * base + x;
return (neg ? -val : val);
*** 3392,3416 ****
return (0);
}
}
! typedef enum json_state {
! JSON_REST = 1,
! JSON_OBJECT,
! JSON_STRING,
! JSON_STRING_ESCAPE,
! JSON_STRING_ESCAPE_UNICODE,
! JSON_COLON,
! JSON_COMMA,
! JSON_VALUE,
! JSON_IDENTIFIER,
! JSON_NUMBER,
! JSON_NUMBER_FRAC,
! JSON_NUMBER_EXP,
! JSON_COLLECT_OBJECT
! } json_state_t;
/*
* This function possesses just enough knowledge about JSON to extract a single
* value from a JSON string and store it in the scratch buffer. It is able
* to extract nested object values, and members of arrays by index.
--- 3405,3429 ----
return (0);
}
}
! typedef enum dtrace_json_state {
! DTRACE_JSON_REST = 1,
! DTRACE_JSON_OBJECT,
! DTRACE_JSON_STRING,
! DTRACE_JSON_STRING_ESCAPE,
! DTRACE_JSON_STRING_ESCAPE_UNICODE,
! DTRACE_JSON_COLON,
! DTRACE_JSON_COMMA,
! DTRACE_JSON_VALUE,
! DTRACE_JSON_IDENTIFIER,
! DTRACE_JSON_NUMBER,
! DTRACE_JSON_NUMBER_FRAC,
! DTRACE_JSON_NUMBER_EXP,
! DTRACE_JSON_COLLECT_OBJECT
! } dtrace_json_state_t;
/*
* This function possesses just enough knowledge about JSON to extract a single
* value from a JSON string and store it in the scratch buffer. It is able
* to extract nested object values, and members of arrays by index.
*** 3418,3488 ****
* elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
* be looked up as we descend into the object tree. e.g.
*
* foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
* with nelems = 5.
*/
static char *
dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
char *dest)
{
! json_state_t state = JSON_REST;
uint64_t i;
int64_t array_elem = INT64_MIN;
int64_t array_pos = 0;
uint8_t escape_unicount = 0;
boolean_t string_is_key = B_FALSE;
boolean_t collect_object = B_FALSE;
boolean_t found_key = B_FALSE;
boolean_t in_array = B_FALSE;
! uint8_t braces = 0, brackets = 0;
char *elem = elemlist;
char *dd = dest;
uintptr_t cur;
for (cur = json; cur < json + size; cur++) {
char cc = dtrace_load8(cur);
! if (cc == '\0' || braces > 250)
return (NULL);
switch (state) {
! case JSON_REST:
! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r')
! break; /* eat whitespace */
if (cc == '{') {
! state = JSON_OBJECT;
break;
}
if (cc == '[') {
in_array = B_TRUE;
array_pos = 0;
array_elem = dtrace_strtoll(elem, 10, size);
! found_key = !!(array_elem == 0);
! state = JSON_VALUE;
break;
}
! /* ERROR: expected object or array */
return (NULL);
! case JSON_OBJECT:
! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r')
! break; /* eat whitespace */
if (cc == '"') {
! state = JSON_STRING;
string_is_key = B_TRUE;
break;
}
! /* ERROR: key not found! */
return (NULL);
! case JSON_STRING:
if (cc == '\\') {
*dd++ = '\\';
! state = JSON_STRING_ESCAPE;
break;
}
if (cc == '"') {
if (collect_object) {
--- 3431,3573 ----
* elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
* be looked up as we descend into the object tree. e.g.
*
* foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
* with nelems = 5.
+ *
+ * The run time of this function must be bounded above by strsize to limit the
+ * amount of work done in probe context. As such, it is implemented as a
+ * simple state machine, reading one character at a time using safe loads
+ * until we find the requested element, hit a parsing error or run off the
+ * end of the object or string.
+ *
+ * As there is no way for a subroutine to return an error without interrupting
+ * clause execution, we simply return NULL in the event of a missing key or any
+ * other error condition. Each NULL return in this function is commented with
+ * the error condition it represents -- parsing or otherwise.
+ *
+ * The set of states for the state machine closely matches the JSON
+ * specification (http://json.org/). Briefly:
+ *
+ * DTRACE_JSON_REST:
+ * Skip whitespace until we find either a top-level Object, moving
+ * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE.
+ *
+ * DTRACE_JSON_OBJECT:
+ * Locate the next key String in an Object. Sets a flag to denote
+ * the next String as a key string and moves to DTRACE_JSON_STRING.
+ *
+ * DTRACE_JSON_COLON:
+ * Skip whitespace until we find the colon that separates key Strings
+ * from their values. Once found, move to DTRACE_JSON_VALUE.
+ *
+ * DTRACE_JSON_VALUE:
+ * Detects the type of the next value (String, Number, Identifier, Object
+ * or Array) and routes to the states that process that type. Here we also
+ * deal with the element selector list if we are requested to traverse down
+ * into the object tree.
+ *
+ * DTRACE_JSON_COMMA:
+ * Skip whitespace until we find the comma that separates key-value pairs
+ * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays
+ * (similarly DTRACE_JSON_VALUE). All following literal value processing
+ * states return to this state at the end of their value, unless otherwise
+ * noted.
+ *
+ * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP:
+ * Processes a Number literal from the JSON, including any exponent
+ * component that may be present. Numbers are returned as strings, which
+ * may be passed to strtoll() if an integer is required.
+ *
+ * DTRACE_JSON_IDENTIFIER:
+ * Processes a "true", "false" or "null" literal in the JSON.
+ *
+ * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE,
+ * DTRACE_JSON_STRING_ESCAPE_UNICODE:
+ * Processes a String literal from the JSON, whether the String denotes
+ * a key, a value or part of a larger Object. Handles all escape sequences
+ * present in the specification, including four-digit unicode characters,
+ * but merely includes the escape sequence without converting it to the
+ * actual escaped character. If the String is flagged as a key, we
+ * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA.
+ *
+ * DTRACE_JSON_COLLECT_OBJECT:
+ * This state collects an entire Object (or Array), correctly handling
+ * embedded strings. If the full element selector list matches this nested
+ * object, we return the Object in full as a string. If not, we use this
+ * state to skip to the next value at this level and continue processing.
+ *
+ * NOTE: This function uses various macros from strtolctype.h to manipulate
+ * digit values, etc -- these have all been checked to ensure they make
+ * no additional function calls.
*/
static char *
dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
char *dest)
{
! dtrace_json_state_t state = DTRACE_JSON_REST;
uint64_t i;
int64_t array_elem = INT64_MIN;
int64_t array_pos = 0;
uint8_t escape_unicount = 0;
boolean_t string_is_key = B_FALSE;
boolean_t collect_object = B_FALSE;
boolean_t found_key = B_FALSE;
boolean_t in_array = B_FALSE;
! uint32_t braces = 0, brackets = 0;
char *elem = elemlist;
char *dd = dest;
uintptr_t cur;
for (cur = json; cur < json + size; cur++) {
char cc = dtrace_load8(cur);
! if (cc == '\0')
return (NULL);
switch (state) {
! case DTRACE_JSON_REST:
! if (isspace(cc))
! break;
if (cc == '{') {
! state = DTRACE_JSON_OBJECT;
break;
}
if (cc == '[') {
in_array = B_TRUE;
array_pos = 0;
array_elem = dtrace_strtoll(elem, 10, size);
! found_key = array_elem == 0 ? B_TRUE : B_FALSE;
! state = DTRACE_JSON_VALUE;
break;
}
! /*
! * ERROR: expected to find a top-level object or array.
! */
return (NULL);
! case DTRACE_JSON_OBJECT:
! if (isspace(cc))
! break;
if (cc == '"') {
! state = DTRACE_JSON_STRING;
string_is_key = B_TRUE;
break;
}
! /*
! * ERROR: either the object did not start with a key
! * string, or we've run off the end of the object
! * without finding the requested key.
! */
return (NULL);
! case DTRACE_JSON_STRING:
if (cc == '\\') {
*dd++ = '\\';
! state = DTRACE_JSON_STRING_ESCAPE;
break;
}
if (cc == '"') {
if (collect_object) {
*** 3491,3501 ****
* the string is part of a larger
* object being collected.
*/
*dd++ = cc;
collect_object = B_FALSE;
! state = JSON_COLLECT_OBJECT;
break;
}
*dd = '\0';
dd = dest; /* reset string buffer */
if (string_is_key) {
--- 3576,3586 ----
* the string is part of a larger
* object being collected.
*/
*dd++ = cc;
collect_object = B_FALSE;
! state = DTRACE_JSON_COLLECT_OBJECT;
break;
}
*dd = '\0';
dd = dest; /* reset string buffer */
if (string_is_key) {
*** 3510,3575 ****
*/
return (NULL);
}
return (dest);
}
! state = string_is_key ? JSON_COLON :
! JSON_COMMA;
string_is_key = B_FALSE;
break;
}
*dd++ = cc;
break;
! case JSON_STRING_ESCAPE:
*dd++ = cc;
if (cc == 'u') {
escape_unicount = 0;
! state = JSON_STRING_ESCAPE_UNICODE;
} else {
! state = JSON_STRING;
}
break;
! case JSON_STRING_ESCAPE_UNICODE:
! if (!isxdigit(cc))
! /* ERROR: unvalid unicode escape */
return (NULL);
*dd++ = cc;
if (++escape_unicount == 4)
! state = JSON_STRING;
break;
! case JSON_COLON:
! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r')
! break; /* eat whitespace */
if (cc == ':') {
! state = JSON_VALUE;
break;
}
! /* ERROR: expected colon */
return (NULL);
! case JSON_COMMA:
! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r')
! break; /* eat whitespace */
if (cc == ',') {
if (in_array) {
! state = JSON_VALUE;
if (++array_pos == array_elem)
found_key = B_TRUE;
} else {
! state = JSON_OBJECT;
}
break;
}
! /* ERROR: key not found or expected comma */
return (NULL);
! case JSON_IDENTIFIER:
! if (cc >= 'a' && cc <= 'z') {
*dd++ = cc;
break;
}
*dd = '\0';
--- 3595,3670 ----
*/
return (NULL);
}
return (dest);
}
! state = string_is_key ? DTRACE_JSON_COLON :
! DTRACE_JSON_COMMA;
string_is_key = B_FALSE;
break;
}
*dd++ = cc;
break;
! case DTRACE_JSON_STRING_ESCAPE:
*dd++ = cc;
if (cc == 'u') {
escape_unicount = 0;
! state = DTRACE_JSON_STRING_ESCAPE_UNICODE;
} else {
! state = DTRACE_JSON_STRING;
}
break;
! case DTRACE_JSON_STRING_ESCAPE_UNICODE:
! if (!isxdigit(cc)) {
! /*
! * ERROR: invalid unicode escape, expected
! * four valid hexidecimal digits.
! */
return (NULL);
+ }
*dd++ = cc;
if (++escape_unicount == 4)
! state = DTRACE_JSON_STRING;
break;
! case DTRACE_JSON_COLON:
! if (isspace(cc))
! break;
if (cc == ':') {
! state = DTRACE_JSON_VALUE;
break;
}
! /*
! * ERROR: expected a colon.
! */
return (NULL);
! case DTRACE_JSON_COMMA:
! if (isspace(cc))
! break;
if (cc == ',') {
if (in_array) {
! state = DTRACE_JSON_VALUE;
if (++array_pos == array_elem)
found_key = B_TRUE;
} else {
! state = DTRACE_JSON_OBJECT;
}
break;
}
! /*
! * ERROR: either we hit an unexpected character, or
! * we reached the end of the object or array without
! * finding the requested key.
! */
return (NULL);
! case DTRACE_JSON_IDENTIFIER:
! if (islower(cc)) {
*dd++ = cc;
break;
}
*dd = '\0';
*** 3579,3659 ****
dtrace_strncmp(dest, "false", 6) == 0 ||
dtrace_strncmp(dest, "null", 5) == 0) {
if (found_key) {
if (nelems > 1) {
/*
! * We expected an object, not
! * this identifier.
*/
return (NULL);
}
return (dest);
} else {
cur--;
! state = JSON_COMMA;
break;
}
}
! /* ERROR: unexpected identifier */
return (NULL);
! case JSON_NUMBER:
if (cc == '.') {
*dd++ = cc;
! state = JSON_NUMBER_FRAC;
break;
}
! if (cc == 'x' || cc == 'X')
! /* ERROR: spec explicitly excludes hex */
return (NULL);
/* FALLTHRU */
! case JSON_NUMBER_FRAC:
if (cc == 'e' || cc == 'E') {
*dd++ = cc;
! state = JSON_NUMBER_EXP;
break;
}
if (cc == '+' || cc == '-') {
/*
! * ERROR: expect sign as part of exponent only
*/
return (NULL);
}
/* FALLTHRU */
! case JSON_NUMBER_EXP:
! if ((cc >= '0' && cc <= '9') || cc == '+' ||
! cc == '-') {
*dd++ = cc;
break;
}
*dd = '\0';
dd = dest; /* reset string buffer */
if (found_key) {
if (nelems > 1) {
/*
! * We expected an object, not this
! * number.
*/
return (NULL);
}
return (dest);
}
cur--;
! state = JSON_COMMA;
break;
! case JSON_VALUE:
! if (cc == ' ' || cc == '\t' || cc == '\n' || cc == '\r')
! break; /* eat whitespace */
if (cc == '{' || cc == '[') {
if (nelems > 1 && found_key) {
! in_array = !!(cc == '[');
/*
* If our element selector directs us
* to descend into this nested object,
* then move to the next selector
* element in the list and restart the
--- 3674,3760 ----
dtrace_strncmp(dest, "false", 6) == 0 ||
dtrace_strncmp(dest, "null", 5) == 0) {
if (found_key) {
if (nelems > 1) {
/*
! * ERROR: We expected an object,
! * not this identifier.
*/
return (NULL);
}
return (dest);
} else {
cur--;
! state = DTRACE_JSON_COMMA;
break;
}
}
! /*
! * ERROR: we did not recognise the identifier as one
! * of those in the JSON specification.
! */
return (NULL);
! case DTRACE_JSON_NUMBER:
if (cc == '.') {
*dd++ = cc;
! state = DTRACE_JSON_NUMBER_FRAC;
break;
}
! if (cc == 'x' || cc == 'X') {
! /*
! * ERROR: specification explicitly excludes
! * hexidecimal or octal numbers.
! */
return (NULL);
+ }
/* FALLTHRU */
! case DTRACE_JSON_NUMBER_FRAC:
if (cc == 'e' || cc == 'E') {
*dd++ = cc;
! state = DTRACE_JSON_NUMBER_EXP;
break;
}
if (cc == '+' || cc == '-') {
/*
! * ERROR: expect sign as part of exponent only.
*/
return (NULL);
}
/* FALLTHRU */
! case DTRACE_JSON_NUMBER_EXP:
! if (isdigit(cc) || cc == '+' || cc == '-') {
*dd++ = cc;
break;
}
*dd = '\0';
dd = dest; /* reset string buffer */
if (found_key) {
if (nelems > 1) {
/*
! * ERROR: We expected an object, not
! * this number.
*/
return (NULL);
}
return (dest);
}
cur--;
! state = DTRACE_JSON_COMMA;
break;
! case DTRACE_JSON_VALUE:
! if (isspace(cc))
! break;
if (cc == '{' || cc == '[') {
if (nelems > 1 && found_key) {
! in_array = cc == '[' ? B_TRUE : B_FALSE;
/*
* If our element selector directs us
* to descend into this nested object,
* then move to the next selector
* element in the list and restart the
*** 3663,3680 ****
elem++;
elem++; /* skip the inter-element NUL */
nelems--;
dd = dest;
if (in_array) {
! state = JSON_VALUE;
array_pos = 0;
array_elem = dtrace_strtoll(
elem, 10, size);
! found_key = !!(array_elem == 0);
} else {
found_key = B_FALSE;
! state = JSON_OBJECT;
}
break;
}
/*
--- 3764,3782 ----
elem++;
elem++; /* skip the inter-element NUL */
nelems--;
dd = dest;
if (in_array) {
! state = DTRACE_JSON_VALUE;
array_pos = 0;
array_elem = dtrace_strtoll(
elem, 10, size);
! found_key = array_elem == 0 ?
! B_TRUE : B_FALSE;
} else {
found_key = B_FALSE;
! state = DTRACE_JSON_OBJECT;
}
break;
}
/*
*** 3684,3737 ****
if (cc == '[')
brackets = 1;
else
braces = 1;
*dd++ = cc;
! state = JSON_COLLECT_OBJECT;
break;
}
if (cc == '"') {
! state = JSON_STRING;
break;
}
! if (cc >= 'a' && cc <= 'z') {
! /* Here we deal with true, false and null */
*dd++ = cc;
! state = JSON_IDENTIFIER;
break;
}
! if (cc == '-' || (cc >= '0' && cc <= '9')) {
*dd++ = cc;
! state = JSON_NUMBER;
break;
}
! /* ERROR: unexpected character */
return (NULL);
! case JSON_COLLECT_OBJECT:
if (cc == '\0')
! /* ERROR: unexpected end of input */
return (NULL);
*dd++ = cc;
if (cc == '"') {
collect_object = B_TRUE;
! state = JSON_STRING;
break;
}
if (cc == ']') {
if (brackets-- == 0) {
! /* ERROR: unbalanced brackets */
return (NULL);
}
} else if (cc == '}') {
if (braces-- == 0) {
! /* ERROR: unbalanced braces */
return (NULL);
}
} else if (cc == '{') {
braces++;
} else if (cc == '[') {
--- 3786,3849 ----
if (cc == '[')
brackets = 1;
else
braces = 1;
*dd++ = cc;
! state = DTRACE_JSON_COLLECT_OBJECT;
break;
}
if (cc == '"') {
! state = DTRACE_JSON_STRING;
break;
}
! if (islower(cc)) {
! /*
! * Here we deal with true, false and null.
! */
*dd++ = cc;
! state = DTRACE_JSON_IDENTIFIER;
break;
}
! if (cc == '-' || isdigit(cc)) {
*dd++ = cc;
! state = DTRACE_JSON_NUMBER;
break;
}
! /*
! * ERROR: unexpected character at start of value.
! */
return (NULL);
! case DTRACE_JSON_COLLECT_OBJECT:
if (cc == '\0')
! /*
! * ERROR: unexpected end of input.
! */
return (NULL);
*dd++ = cc;
if (cc == '"') {
collect_object = B_TRUE;
! state = DTRACE_JSON_STRING;
break;
}
if (cc == ']') {
if (brackets-- == 0) {
! /*
! * ERROR: unbalanced brackets.
! */
return (NULL);
}
} else if (cc == '}') {
if (braces-- == 0) {
! /*
! * ERROR: unbalanced braces.
! */
return (NULL);
}
} else if (cc == '{') {
braces++;
} else if (cc == '[') {
*** 3742,3752 ****
if (found_key) {
*dd = '\0';
return (dest);
}
dd = dest; /* reset string buffer */
! state = JSON_COMMA;
}
break;
}
}
return (NULL);
--- 3854,3864 ----
if (found_key) {
*dd = '\0';
return (dest);
}
dd = dest; /* reset string buffer */
! state = DTRACE_JSON_COMMA;
}
break;
}
}
return (NULL);
*** 4482,4494 ****
* of strings.
*/
for (cur = elem; cur < elem + elemlen; cur++) {
char cc = dtrace_load8(cur);
! if (cur == elem && cc == '[')
! /* first element selector may be an array */
continue;
if (cc == ']')
continue;
if (cc == '.' || cc == '[') {
--- 4594,4611 ----
* of strings.
*/
for (cur = elem; cur < elem + elemlen; cur++) {
char cc = dtrace_load8(cur);
! if (cur == elem && cc == '[') {
! /*
! * If the first element selector key is
! * actually an array index then ignore the
! * bracket.
! */
continue;
+ }
if (cc == ']')
continue;
if (cc == '.' || cc == '[') {