Print this page
10806 mnode_range_setup() makes assumptions about mnodes
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>

@@ -22,11 +22,11 @@
  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Copyright (c) 2010, Intel Corporation.
  * All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019, Joyent, Inc.
  */
 
 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
 /*      All Rights Reserved   */
 

@@ -200,15 +200,15 @@
 
 /*
  * This combines mem_node_config and memranges into one data
  * structure to be used for page list management.
  */
-mnoderange_t    *mnoderanges;
-int             mnoderangecnt;
-int             mtype4g;
-int             mtype16m;
-int             mtypetop;       /* index of highest pfn'ed mnoderange */
+static mnoderange_t *mnoderanges;
+static int mnoderangecnt;
+static int mtype4g;
+static int mtype16m;
+static int mtypetop;
 
 /*
  * 4g memory management variables for systems with more than 4g of memory:
  *
  * physical memory below 4g is required for 32bit dma devices and, currently,

@@ -261,11 +261,11 @@
  */
 
 #define FREEMEM16M      MTYPE_FREEMEM(mtype16m)
 #define DESFREE16M      desfree16m
 #define RESTRICT16M_ALLOC(freemem, pgcnt, flags)                \
-        ((freemem != 0) && ((flags & PG_PANIC) == 0) &&         \
+        (mtype16m != -1 && (freemem != 0) && ((flags & PG_PANIC) == 0) && \
             ((freemem >= (FREEMEM16M)) ||                       \
             (FREEMEM16M  < (DESFREE16M + pgcnt))))
 
 static pgcnt_t  desfree16m = 0x380;
 

@@ -1387,78 +1387,78 @@
         ASSERT(mnrcnt <= MAX_MNODE_MRANGES);
         return (mnrcnt);
 #endif  /* __xpv */
 }
 
-/*
- * mnode_range_setup() initializes mnoderanges.
- */
+static int
+mnoderange_cmp(const void *v1, const void *v2)
+{
+        const mnoderange_t *m1 = v1;
+        const mnoderange_t *m2 = v2;
+
+        if (m1->mnr_pfnlo < m2->mnr_pfnlo)
+                return (-1);
+        return (m1->mnr_pfnlo > m2->mnr_pfnlo);
+}
+
 void
 mnode_range_setup(mnoderange_t *mnoderanges)
 {
-        mnoderange_t *mp = mnoderanges;
-        int     mnode, mri;
-        int     mindex = 0;     /* current index into mnoderanges array */
-        int     i, j;
-        pfn_t   hipfn;
-        int     last, hi;
+        mnoderange_t *mp;
+        size_t nr_ranges;
+        size_t mnode;
 
-        for (mnode = 0; mnode < max_mem_nodes; mnode++) {
+        for (mnode = 0, nr_ranges = 0, mp = mnoderanges;
+            mnode < max_mem_nodes; mnode++) {
+                size_t mri = nranges - 1;
+
                 if (mem_node_config[mnode].exists == 0)
                         continue;
 
-                mri = nranges - 1;
-
                 while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
                         mri--;
 
                 while (mri >= 0 && mem_node_config[mnode].physmax >=
                     MEMRANGELO(mri)) {
-                        mnoderanges->mnr_pfnlo = MAX(MEMRANGELO(mri),
+                        mp->mnr_pfnlo = MAX(MEMRANGELO(mri),
                             mem_node_config[mnode].physbase);
-                        mnoderanges->mnr_pfnhi = MIN(MEMRANGEHI(mri),
+                        mp->mnr_pfnhi = MIN(MEMRANGEHI(mri),
                             mem_node_config[mnode].physmax);
-                        mnoderanges->mnr_mnode = mnode;
-                        mnoderanges->mnr_memrange = mri;
-                        mnoderanges->mnr_exists = 1;
-                        mnoderanges++;
-                        mindex++;
+                        mp->mnr_mnode = mnode;
+                        mp->mnr_memrange = mri;
+                        mp->mnr_next = -1;
+                        mp->mnr_exists = 1;
+                        mp++;
+                        nr_ranges++;
                         if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
                                 mri--;
                         else
                                 break;
                 }
         }
 
         /*
-         * For now do a simple sort of the mnoderanges array to fill in
-         * the mnr_next fields.  Since mindex is expected to be relatively
-         * small, using a simple O(N^2) algorithm.
+         * mnoderangecnt can be larger than nr_ranges when memory DR is
+         * supposedly supported.
          */
-        for (i = 0; i < mindex; i++) {
-                if (mp[i].mnr_pfnlo == 0)       /* find lowest */
-                        break;
-        }
-        ASSERT(i < mindex);
-        last = i;
-        mtype16m = last;
-        mp[last].mnr_next = -1;
-        for (i = 0; i < mindex - 1; i++) {
-                hipfn = (pfn_t)(-1);
-                hi = -1;
-                /* find next highest mnode range */
-                for (j = 0; j < mindex; j++) {
-                        if (mp[j].mnr_pfnlo > mp[last].mnr_pfnlo &&
-                            mp[j].mnr_pfnlo < hipfn) {
-                                hipfn = mp[j].mnr_pfnlo;
-                                hi = j;
-                        }
-                }
-                mp[hi].mnr_next = last;
-                last = hi;
-        }
-        mtypetop = last;
+        VERIFY3U(nr_ranges, <=, mnoderangecnt);
+
+        qsort(mnoderanges, nr_ranges, sizeof (mnoderange_t), mnoderange_cmp);
+
+        /*
+         * If some intrepid soul takes the axe to the memory DR code, we can
+         * remove ->mnr_next altogether, as we just sorted by ->mnr_pfnlo order.
+         *
+         * The VERIFY3U() above can be "==" then too.
+         */
+        for (size_t i = 1; i < nr_ranges; i++)
+                mnoderanges[i].mnr_next = i - 1;
+
+        mtypetop = nr_ranges - 1;
+        mtype16m = pfn_2_mtype(PFN_16MEG - 1); /* Can be -1 ... */
+        if (physmax4g)
+                mtype4g = pfn_2_mtype(0xfffff);
 }
 
 #ifndef __xpv
 /*
  * Update mnoderanges for memory hot-add DR operations.

@@ -1976,13 +1976,10 @@
         mnoderanges = (mnoderange_t *)addr;
         addr += (mnoderangecnt * sizeof (mnoderange_t));
 
         mnode_range_setup(mnoderanges);
 
-        if (physmax4g)
-                mtype4g = pfn_2_mtype(0xfffff);
-
         for (k = 0; k < NPC_MUTEX; k++) {
                 fpc_mutex[k] = (kmutex_t *)addr;
                 addr += (max_mem_nodes * sizeof (kmutex_t));
         }
         for (k = 0; k < NPC_MUTEX; k++) {