Print this page
4477 DTrace should speak JSON
Reviewed by: Bryan Cantrill <bmc@joyent.com>

@@ -90,10 +90,11 @@
 #include <sys/mkdev.h>
 #include <sys/kdi.h>
 #include <sys/zone.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include "strtolctype.h"
 
 /*
  * DTrace Tunable Variables
  *
  * The following variables may be tuned by adding a line to /etc/system that

@@ -857,10 +858,62 @@
 
         return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
 }
 
 /*
+ * Convert a string to a signed integer using safe loads.
+ *
+ * NOTE: This function uses various macros from strtolctype.h to manipulate
+ * digit values, etc -- these have all been checked to ensure they make
+ * no additional function calls.
+ */
+static int64_t
+dtrace_strtoll(char *input, int base, size_t limit)
+{
+        uintptr_t pos = (uintptr_t)input;
+        int64_t val = 0;
+        int x;
+        boolean_t neg = B_FALSE;
+        char c, cc, ccc;
+        uintptr_t end = pos + limit;
+
+        /*
+         * Consume any whitespace preceding digits.
+         */
+        while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
+                pos++;
+
+        /*
+         * Handle an explicit sign if one is present.
+         */
+        if (c == '-' || c == '+') {
+                if (c == '-')
+                        neg = B_TRUE;
+                c = dtrace_load8(++pos);
+        }
+
+        /*
+         * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
+         * if present.
+         */
+        if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
+            cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
+                pos += 2;
+                c = ccc;
+        }
+
+        /*
+         * Read in contiguous digits until the first non-digit character.
+         */
+        for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
+            c = dtrace_load8(++pos))
+                val = val * base + x;
+
+        return (neg ? -val : val);
+}
+
+/*
  * Compare two strings using safe loads.
  */
 static int
 dtrace_strncmp(char *s1, char *s2, size_t limit)
 {

@@ -3333,10 +3386,467 @@
                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
                 return (0);
         }
 }
 
+
+typedef enum dtrace_json_state {
+        DTRACE_JSON_REST = 1,
+        DTRACE_JSON_OBJECT,
+        DTRACE_JSON_STRING,
+        DTRACE_JSON_STRING_ESCAPE,
+        DTRACE_JSON_STRING_ESCAPE_UNICODE,
+        DTRACE_JSON_COLON,
+        DTRACE_JSON_COMMA,
+        DTRACE_JSON_VALUE,
+        DTRACE_JSON_IDENTIFIER,
+        DTRACE_JSON_NUMBER,
+        DTRACE_JSON_NUMBER_FRAC,
+        DTRACE_JSON_NUMBER_EXP,
+        DTRACE_JSON_COLLECT_OBJECT
+} dtrace_json_state_t;
+
+/*
+ * This function possesses just enough knowledge about JSON to extract a single
+ * value from a JSON string and store it in the scratch buffer.  It is able
+ * to extract nested object values, and members of arrays by index.
+ *
+ * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to
+ * be looked up as we descend into the object tree.  e.g.
+ *
+ *    foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL
+ *       with nelems = 5.
+ *
+ * The run time of this function must be bounded above by strsize to limit the
+ * amount of work done in probe context.  As such, it is implemented as a
+ * simple state machine, reading one character at a time using safe loads
+ * until we find the requested element, hit a parsing error or run off the
+ * end of the object or string.
+ *
+ * As there is no way for a subroutine to return an error without interrupting
+ * clause execution, we simply return NULL in the event of a missing key or any
+ * other error condition.  Each NULL return in this function is commented with
+ * the error condition it represents -- parsing or otherwise.
+ *
+ * The set of states for the state machine closely matches the JSON
+ * specification (http://json.org/).  Briefly:
+ *
+ *   DTRACE_JSON_REST:
+ *     Skip whitespace until we find either a top-level Object, moving
+ *     to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE.
+ *
+ *   DTRACE_JSON_OBJECT:
+ *     Locate the next key String in an Object.  Sets a flag to denote
+ *     the next String as a key string and moves to DTRACE_JSON_STRING.
+ *
+ *   DTRACE_JSON_COLON:
+ *     Skip whitespace until we find the colon that separates key Strings
+ *     from their values.  Once found, move to DTRACE_JSON_VALUE.
+ *
+ *   DTRACE_JSON_VALUE:
+ *     Detects the type of the next value (String, Number, Identifier, Object
+ *     or Array) and routes to the states that process that type.  Here we also
+ *     deal with the element selector list if we are requested to traverse down
+ *     into the object tree.
+ *
+ *   DTRACE_JSON_COMMA:
+ *     Skip whitespace until we find the comma that separates key-value pairs
+ *     in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays
+ *     (similarly DTRACE_JSON_VALUE).  All following literal value processing
+ *     states return to this state at the end of their value, unless otherwise
+ *     noted.
+ *
+ *   DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP:
+ *     Processes a Number literal from the JSON, including any exponent
+ *     component that may be present.  Numbers are returned as strings, which
+ *     may be passed to strtoll() if an integer is required.
+ *
+ *   DTRACE_JSON_IDENTIFIER:
+ *     Processes a "true", "false" or "null" literal in the JSON.
+ *
+ *   DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE,
+ *   DTRACE_JSON_STRING_ESCAPE_UNICODE:
+ *     Processes a String literal from the JSON, whether the String denotes
+ *     a key, a value or part of a larger Object.  Handles all escape sequences
+ *     present in the specification, including four-digit unicode characters,
+ *     but merely includes the escape sequence without converting it to the
+ *     actual escaped character.  If the String is flagged as a key, we
+ *     move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA.
+ *
+ *   DTRACE_JSON_COLLECT_OBJECT:
+ *     This state collects an entire Object (or Array), correctly handling
+ *     embedded strings.  If the full element selector list matches this nested
+ *     object, we return the Object in full as a string.  If not, we use this
+ *     state to skip to the next value at this level and continue processing.
+ *
+ * NOTE: This function uses various macros from strtolctype.h to manipulate
+ * digit values, etc -- these have all been checked to ensure they make
+ * no additional function calls.
+ */
+static char *
+dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems,
+    char *dest)
+{
+        dtrace_json_state_t state = DTRACE_JSON_REST;
+        int64_t array_elem = INT64_MIN;
+        int64_t array_pos = 0;
+        uint8_t escape_unicount = 0;
+        boolean_t string_is_key = B_FALSE;
+        boolean_t collect_object = B_FALSE;
+        boolean_t found_key = B_FALSE;
+        boolean_t in_array = B_FALSE;
+        uint32_t braces = 0, brackets = 0;
+        char *elem = elemlist;
+        char *dd = dest;
+        uintptr_t cur;
+
+        for (cur = json; cur < json + size; cur++) {
+                char cc = dtrace_load8(cur);
+                if (cc == '\0')
+                        return (NULL);
+
+                switch (state) {
+                case DTRACE_JSON_REST:
+                        if (isspace(cc))
+                                break;
+
+                        if (cc == '{') {
+                                state = DTRACE_JSON_OBJECT;
+                                break;
+                        }
+
+                        if (cc == '[') {
+                                in_array = B_TRUE;
+                                array_pos = 0;
+                                array_elem = dtrace_strtoll(elem, 10, size);
+                                found_key = array_elem == 0 ? B_TRUE : B_FALSE;
+                                state = DTRACE_JSON_VALUE;
+                                break;
+                        }
+
+                        /*
+                         * ERROR: expected to find a top-level object or array.
+                         */
+                        return (NULL);
+                case DTRACE_JSON_OBJECT:
+                        if (isspace(cc))
+                                break;
+
+                        if (cc == '"') {
+                                state = DTRACE_JSON_STRING;
+                                string_is_key = B_TRUE;
+                                break;
+                        }
+
+                        /*
+                         * ERROR: either the object did not start with a key
+                         * string, or we've run off the end of the object
+                         * without finding the requested key.
+                         */
+                        return (NULL);
+                case DTRACE_JSON_STRING:
+                        if (cc == '\\') {
+                                *dd++ = '\\';
+                                state = DTRACE_JSON_STRING_ESCAPE;
+                                break;
+                        }
+
+                        if (cc == '"') {
+                                if (collect_object) {
+                                        /*
+                                         * We don't reset the dest here, as
+                                         * the string is part of a larger
+                                         * object being collected.
+                                         */
+                                        *dd++ = cc;
+                                        collect_object = B_FALSE;
+                                        state = DTRACE_JSON_COLLECT_OBJECT;
+                                        break;
+                                }
+                                *dd = '\0';
+                                dd = dest; /* reset string buffer */
+                                if (string_is_key) {
+                                        if (dtrace_strncmp(dest, elem,
+                                            size) == 0)
+                                                found_key = B_TRUE;
+                                } else if (found_key) {
+                                        if (nelems > 1) {
+                                                /*
+                                                 * We expected an object, not
+                                                 * this string.
+                                                 */
+                                                return (NULL);
+                                        }
+                                        return (dest);
+                                }
+                                state = string_is_key ? DTRACE_JSON_COLON :
+                                    DTRACE_JSON_COMMA;
+                                string_is_key = B_FALSE;
+                                break;
+                        }
+
+                        *dd++ = cc;
+                        break;
+                case DTRACE_JSON_STRING_ESCAPE:
+                        *dd++ = cc;
+                        if (cc == 'u') {
+                                escape_unicount = 0;
+                                state = DTRACE_JSON_STRING_ESCAPE_UNICODE;
+                        } else {
+                                state = DTRACE_JSON_STRING;
+                        }
+                        break;
+                case DTRACE_JSON_STRING_ESCAPE_UNICODE:
+                        if (!isxdigit(cc)) {
+                                /*
+                                 * ERROR: invalid unicode escape, expected
+                                 * four valid hexidecimal digits.
+                                 */
+                                return (NULL);
+                        }
+
+                        *dd++ = cc;
+                        if (++escape_unicount == 4)
+                                state = DTRACE_JSON_STRING;
+                        break;
+                case DTRACE_JSON_COLON:
+                        if (isspace(cc))
+                                break;
+
+                        if (cc == ':') {
+                                state = DTRACE_JSON_VALUE;
+                                break;
+                        }
+
+                        /*
+                         * ERROR: expected a colon.
+                         */
+                        return (NULL);
+                case DTRACE_JSON_COMMA:
+                        if (isspace(cc))
+                                break;
+
+                        if (cc == ',') {
+                                if (in_array) {
+                                        state = DTRACE_JSON_VALUE;
+                                        if (++array_pos == array_elem)
+                                                found_key = B_TRUE;
+                                } else {
+                                        state = DTRACE_JSON_OBJECT;
+                                }
+                                break;
+                        }
+
+                        /*
+                         * ERROR: either we hit an unexpected character, or
+                         * we reached the end of the object or array without
+                         * finding the requested key.
+                         */
+                        return (NULL);
+                case DTRACE_JSON_IDENTIFIER:
+                        if (islower(cc)) {
+                                *dd++ = cc;
+                                break;
+                        }
+
+                        *dd = '\0';
+                        dd = dest; /* reset string buffer */
+
+                        if (dtrace_strncmp(dest, "true", 5) == 0 ||
+                            dtrace_strncmp(dest, "false", 6) == 0 ||
+                            dtrace_strncmp(dest, "null", 5) == 0) {
+                                if (found_key) {
+                                        if (nelems > 1) {
+                                                /*
+                                                 * ERROR: We expected an object,
+                                                 * not this identifier.
+                                                 */
+                                                return (NULL);
+                                        }
+                                        return (dest);
+                                } else {
+                                        cur--;
+                                        state = DTRACE_JSON_COMMA;
+                                        break;
+                                }
+                        }
+
+                        /*
+                         * ERROR: we did not recognise the identifier as one
+                         * of those in the JSON specification.
+                         */
+                        return (NULL);
+                case DTRACE_JSON_NUMBER:
+                        if (cc == '.') {
+                                *dd++ = cc;
+                                state = DTRACE_JSON_NUMBER_FRAC;
+                                break;
+                        }
+
+                        if (cc == 'x' || cc == 'X') {
+                                /*
+                                 * ERROR: specification explicitly excludes
+                                 * hexidecimal or octal numbers.
+                                 */
+                                return (NULL);
+                        }
+
+                        /* FALLTHRU */
+                case DTRACE_JSON_NUMBER_FRAC:
+                        if (cc == 'e' || cc == 'E') {
+                                *dd++ = cc;
+                                state = DTRACE_JSON_NUMBER_EXP;
+                                break;
+                        }
+
+                        if (cc == '+' || cc == '-') {
+                                /*
+                                 * ERROR: expect sign as part of exponent only.
+                                 */
+                                return (NULL);
+                        }
+                        /* FALLTHRU */
+                case DTRACE_JSON_NUMBER_EXP:
+                        if (isdigit(cc) || cc == '+' || cc == '-') {
+                                *dd++ = cc;
+                                break;
+                        }
+
+                        *dd = '\0';
+                        dd = dest; /* reset string buffer */
+                        if (found_key) {
+                                if (nelems > 1) {
+                                        /*
+                                         * ERROR: We expected an object, not
+                                         * this number.
+                                         */
+                                        return (NULL);
+                                }
+                                return (dest);
+                        }
+
+                        cur--;
+                        state = DTRACE_JSON_COMMA;
+                        break;
+                case DTRACE_JSON_VALUE:
+                        if (isspace(cc))
+                                break;
+
+                        if (cc == '{' || cc == '[') {
+                                if (nelems > 1 && found_key) {
+                                        in_array = cc == '[' ? B_TRUE : B_FALSE;
+                                        /*
+                                         * If our element selector directs us
+                                         * to descend into this nested object,
+                                         * then move to the next selector
+                                         * element in the list and restart the
+                                         * state machine.
+                                         */
+                                        while (*elem != '\0')
+                                                elem++;
+                                        elem++; /* skip the inter-element NUL */
+                                        nelems--;
+                                        dd = dest;
+                                        if (in_array) {
+                                                state = DTRACE_JSON_VALUE;
+                                                array_pos = 0;
+                                                array_elem = dtrace_strtoll(
+                                                    elem, 10, size);
+                                                found_key = array_elem == 0 ?
+                                                    B_TRUE : B_FALSE;
+                                        } else {
+                                                found_key = B_FALSE;
+                                                state = DTRACE_JSON_OBJECT;
+                                        }
+                                        break;
+                                }
+
+                                /*
+                                 * Otherwise, we wish to either skip this
+                                 * nested object or return it in full.
+                                 */
+                                if (cc == '[')
+                                        brackets = 1;
+                                else
+                                        braces = 1;
+                                *dd++ = cc;
+                                state = DTRACE_JSON_COLLECT_OBJECT;
+                                break;
+                        }
+
+                        if (cc == '"') {
+                                state = DTRACE_JSON_STRING;
+                                break;
+                        }
+
+                        if (islower(cc)) {
+                                /*
+                                 * Here we deal with true, false and null.
+                                 */
+                                *dd++ = cc;
+                                state = DTRACE_JSON_IDENTIFIER;
+                                break;
+                        }
+
+                        if (cc == '-' || isdigit(cc)) {
+                                *dd++ = cc;
+                                state = DTRACE_JSON_NUMBER;
+                                break;
+                        }
+
+                        /*
+                         * ERROR: unexpected character at start of value.
+                         */
+                        return (NULL);
+                case DTRACE_JSON_COLLECT_OBJECT:
+                        if (cc == '\0')
+                                /*
+                                 * ERROR: unexpected end of input.
+                                 */
+                                return (NULL);
+
+                        *dd++ = cc;
+                        if (cc == '"') {
+                                collect_object = B_TRUE;
+                                state = DTRACE_JSON_STRING;
+                                break;
+                        }
+
+                        if (cc == ']') {
+                                if (brackets-- == 0) {
+                                        /*
+                                         * ERROR: unbalanced brackets.
+                                         */
+                                        return (NULL);
+                                }
+                        } else if (cc == '}') {
+                                if (braces-- == 0) {
+                                        /*
+                                         * ERROR: unbalanced braces.
+                                         */
+                                        return (NULL);
+                                }
+                        } else if (cc == '{') {
+                                braces++;
+                        } else if (cc == '[') {
+                                brackets++;
+                        }
+
+                        if (brackets == 0 && braces == 0) {
+                                if (found_key) {
+                                        *dd = '\0';
+                                        return (dest);
+                                }
+                                dd = dest; /* reset string buffer */
+                                state = DTRACE_JSON_COMMA;
+                        }
+                        break;
+                }
+        }
+        return (NULL);
+}
+
 /*
  * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
  * Notice that we don't bother validating the proper number of arguments or
  * their types in the tuple stack.  This isn't needed because all argument
  * interpretation is safe because of our load safety -- the worst that can

@@ -4033,10 +4543,69 @@
                 mstate->dtms_scratch_ptr += size;
                 regs[rd] = (uintptr_t)d;
                 break;
         }
 
+        case DIF_SUBR_JSON: {
+                uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
+                uintptr_t json = tupregs[0].dttk_value;
+                size_t jsonlen = dtrace_strlen((char *)json, size);
+                uintptr_t elem = tupregs[1].dttk_value;
+                size_t elemlen = dtrace_strlen((char *)elem, size);
+
+                char *dest = (char *)mstate->dtms_scratch_ptr;
+                char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1;
+                char *ee = elemlist;
+                int nelems = 1;
+                uintptr_t cur;
+
+                if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) ||
+                    !dtrace_canload(elem, elemlen + 1, mstate, vstate)) {
+                        regs[rd] = NULL;
+                        break;
+                }
+
+                if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) {
+                        DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
+                        regs[rd] = NULL;
+                        break;
+                }
+
+                /*
+                 * Read the element selector and split it up into a packed list
+                 * of strings.
+                 */
+                for (cur = elem; cur < elem + elemlen; cur++) {
+                        char cc = dtrace_load8(cur);
+
+                        if (cur == elem && cc == '[') {
+                                /*
+                                 * If the first element selector key is
+                                 * actually an array index then ignore the
+                                 * bracket.
+                                 */
+                                continue;
+                        }
+
+                        if (cc == ']')
+                                continue;
+
+                        if (cc == '.' || cc == '[') {
+                                nelems++;
+                                cc = '\0';
+                        }
+
+                        *ee++ = cc;
+                }
+                *ee++ = '\0';
+
+                if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist,
+                    nelems, dest)) != NULL)
+                        mstate->dtms_scratch_ptr += jsonlen + 1;
+                break;
+        }
+
         case DIF_SUBR_TOUPPER:
         case DIF_SUBR_TOLOWER: {
                 uintptr_t s = tupregs[0].dttk_value;
                 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
                 char *dest = (char *)mstate->dtms_scratch_ptr, c;

@@ -4340,10 +4909,32 @@
                 }
 
                 break;
         }
 
+        case DIF_SUBR_STRTOLL: {
+                uintptr_t s = tupregs[0].dttk_value;
+                uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
+                int base = 10;
+
+                if (nargs > 1) {
+                        if ((base = tupregs[1].dttk_value) <= 1 ||
+                            base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
+                                *flags |= CPU_DTRACE_ILLOP;
+                                break;
+                        }
+                }
+
+                if (!dtrace_strcanload(s, size, mstate, vstate)) {
+                        regs[rd] = INT64_MIN;
+                        break;
+                }
+
+                regs[rd] = dtrace_strtoll((char *)s, base, size);
+                break;
+        }
+
         case DIF_SUBR_LLTOSTR: {
                 int64_t i = (int64_t)tupregs[0].dttk_value;
                 uint64_t val, digit;
                 uint64_t size = 65;     /* enough room for 2^64 in binary */
                 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;

@@ -8859,11 +9450,13 @@
                             subr == DIF_SUBR_COPYINSTR ||
                             subr == DIF_SUBR_INDEX ||
                             subr == DIF_SUBR_INET_NTOA ||
                             subr == DIF_SUBR_INET_NTOA6 ||
                             subr == DIF_SUBR_INET_NTOP ||
+                            subr == DIF_SUBR_JSON ||
                             subr == DIF_SUBR_LLTOSTR ||
+                            subr == DIF_SUBR_STRTOLL ||
                             subr == DIF_SUBR_RINDEX ||
                             subr == DIF_SUBR_STRCHR ||
                             subr == DIF_SUBR_STRJOIN ||
                             subr == DIF_SUBR_STRRCHR ||
                             subr == DIF_SUBR_STRSTR ||