Print this page
9936 atomic ops in syscall_mstate() induce significant overhead
9942 zone secflags are not initialized correctly

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/zone.c
          +++ new/usr/src/uts/common/os/zone.c
↓ open down ↓ 162 lines elided ↑ open up ↑
 163  163   *   zone_mem_lock: This is a per-zone lock used to protect the fields
 164  164   *       related to the zone.max-locked-memory and zone.max-swap rctls.
 165  165   *   zone_rctl_lock: This is a per-zone lock used to protect other rctls,
 166  166   *       currently just max_lofi
 167  167   *   zsd_key_lock: This is a global lock protecting the key state for ZSD.
 168  168   *   zone_deathrow_lock: This is a global lock protecting the "deathrow"
 169  169   *       list (a list of zones in the ZONE_IS_DEAD state).
 170  170   *
 171  171   *   Ordering requirements:
 172  172   *       pool_lock --> cpu_lock --> zonehash_lock --> zone_status_lock -->
 173      - *              zone_lock --> zsd_key_lock --> pidlock --> p_lock
      173 + *       zone_lock --> zsd_key_lock --> pidlock --> p_lock
 174  174   *
 175  175   *   When taking zone_mem_lock or zone_nlwps_lock, the lock ordering is:
 176  176   *      zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_mem_lock
 177  177   *      zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_nlwps_lock
 178  178   *
 179  179   *   Blocking memory allocations are permitted while holding any of the
 180  180   *   zone locks.
 181  181   *
 182  182   *
 183  183   *   System Call Interface:
↓ open down ↓ 1721 lines elided ↑ open up ↑
1905 1905  
1906 1906          kstat_install(ksp);
1907 1907          return (ksp);
1908 1908  }
1909 1909  
1910 1910  static int
1911 1911  zone_misc_kstat_update(kstat_t *ksp, int rw)
1912 1912  {
1913 1913          zone_t *zone = ksp->ks_private;
1914 1914          zone_misc_kstat_t *zmp = ksp->ks_data;
1915      -        hrtime_t tmp;
     1915 +        hrtime_t hrtime;
     1916 +        uint64_t tmp;
1916 1917  
1917 1918          if (rw == KSTAT_WRITE)
1918 1919                  return (EACCES);
1919 1920  
1920      -        tmp = zone->zone_utime;
1921      -        scalehrtime(&tmp);
1922      -        zmp->zm_utime.value.ui64 = tmp;
1923      -        tmp = zone->zone_stime;
1924      -        scalehrtime(&tmp);
1925      -        zmp->zm_stime.value.ui64 = tmp;
1926      -        tmp = zone->zone_wtime;
1927      -        scalehrtime(&tmp);
1928      -        zmp->zm_wtime.value.ui64 = tmp;
     1921 +        tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_STIME);
     1922 +        hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
     1923 +        scalehrtime(&hrtime);
     1924 +        zmp->zm_stime.value.ui64 = hrtime;
1929 1925  
     1926 +        tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_UTIME);
     1927 +        hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
     1928 +        scalehrtime(&hrtime);
     1929 +        zmp->zm_utime.value.ui64 = hrtime;
     1930 +
     1931 +        tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_WTIME);
     1932 +        hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
     1933 +        scalehrtime(&hrtime);
     1934 +        zmp->zm_wtime.value.ui64 = hrtime;
     1935 +
1930 1936          zmp->zm_avenrun1.value.ui32 = zone->zone_avenrun[0];
1931 1937          zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
1932 1938          zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
1933 1939  
1934 1940          zmp->zm_ffcap.value.ui32 = zone->zone_ffcap;
1935 1941          zmp->zm_ffnoproc.value.ui32 = zone->zone_ffnoproc;
1936 1942          zmp->zm_ffnomem.value.ui32 = zone->zone_ffnomem;
1937 1943          zmp->zm_ffmisc.value.ui32 = zone->zone_ffmisc;
1938 1944  
1939 1945          zmp->zm_nested_intp.value.ui32 = zone->zone_nested_intp;
↓ open down ↓ 150 lines elided ↑ open up ↑
2090 2096          zone0.zone_rootpathlen = 2;
2091 2097          zone0.zone_psetid = ZONE_PS_INVAL;
2092 2098          zone0.zone_ncpus = 0;
2093 2099          zone0.zone_ncpus_online = 0;
2094 2100          zone0.zone_proc_initpid = 1;
2095 2101          zone0.zone_initname = initname;
2096 2102          zone0.zone_lockedmem_kstat = NULL;
2097 2103          zone0.zone_swapresv_kstat = NULL;
2098 2104          zone0.zone_nprocs_kstat = NULL;
2099 2105  
2100      -        zone0.zone_stime = 0;
2101      -        zone0.zone_utime = 0;
2102      -        zone0.zone_wtime = 0;
2103      -
2104 2106          list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
2105 2107              offsetof(zone_ref_t, zref_linkage));
2106 2108          list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
2107 2109              offsetof(struct zsd_entry, zsd_linkage));
2108 2110          list_insert_head(&zone_active, &zone0);
2109 2111  
2110 2112          /*
2111 2113           * The root filesystem is not mounted yet, so zone_rootvp cannot be set
2112 2114           * to anything meaningful.  It is assigned to be 'rootdir' in
2113 2115           * vfs_mountroot().
↓ open down ↓ 183 lines elided ↑ open up ↑
2297 2299           */
2298 2300          zone0.zone_slabel = l_admin_low;
2299 2301          rw_init(&zone0.zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
2300 2302          label_hold(l_admin_low);
2301 2303  
2302 2304          /*
2303 2305           * Initialise the lock for the database structure used by mntfs.
2304 2306           */
2305 2307          rw_init(&zone0.zone_mntfs_db_lock, NULL, RW_DEFAULT, NULL);
2306 2308  
     2309 +        zone0.zone_ustate = cpu_uarray_zalloc(ZONE_USTATE_MAX, KM_SLEEP);
     2310 +
2307 2311          mutex_enter(&zonehash_lock);
2308 2312          zone_uniqid(&zone0);
2309 2313          ASSERT(zone0.zone_uniqid == GLOBAL_ZONEUNIQID);
2310 2314  
2311 2315          zonehashbyid = mod_hash_create_idhash("zone_by_id", zone_hash_size,
2312 2316              mod_hash_null_valdtor);
2313 2317          zonehashbyname = mod_hash_create_strhash("zone_by_name",
2314 2318              zone_hash_size, mod_hash_null_valdtor);
2315 2319          /*
2316 2320           * maintain zonehashbylabel only for labeled systems
↓ open down ↓ 64 lines elided ↑ open up ↑
2381 2385                  mutex_enter(&zone_deathrow_lock);
2382 2386                  list_remove(&zone_deathrow, zone);
2383 2387                  mutex_exit(&zone_deathrow_lock);
2384 2388          }
2385 2389  
2386 2390          list_destroy(&zone->zone_ref_list);
2387 2391          zone_free_zsd(zone);
2388 2392          zone_free_datasets(zone);
2389 2393          list_destroy(&zone->zone_dl_list);
2390 2394  
     2395 +        cpu_uarray_free(zone->zone_ustate);
     2396 +
2391 2397          if (zone->zone_rootvp != NULL)
2392 2398                  VN_RELE(zone->zone_rootvp);
2393 2399          if (zone->zone_rootpath)
2394 2400                  kmem_free(zone->zone_rootpath, zone->zone_rootpathlen);
2395 2401          if (zone->zone_name != NULL)
2396 2402                  kmem_free(zone->zone_name, ZONENAME_MAX);
2397 2403          if (zone->zone_slabel != NULL)
2398 2404                  label_rele(zone->zone_slabel);
2399 2405          if (zone->zone_nodename != NULL)
2400 2406                  kmem_free(zone->zone_nodename, _SYS_NMLN);
↓ open down ↓ 795 lines elided ↑ open up ↑
3196 3202          return (zret);
3197 3203  }
3198 3204  
3199 3205  /*
3200 3206   * Public interface for updating per-zone load averages.  Called once per
3201 3207   * second.
3202 3208   *
3203 3209   * Based on loadavg_update(), genloadavg() and calcloadavg() from clock.c.
3204 3210   */
3205 3211  void
3206      -zone_loadavg_update()
     3212 +zone_loadavg_update(void)
3207 3213  {
3208 3214          zone_t *zp;
3209 3215          zone_status_t status;
3210 3216          struct loadavg_s *lavg;
3211 3217          hrtime_t zone_total;
     3218 +        uint64_t tmp;
3212 3219          int i;
3213 3220          hrtime_t hr_avg;
3214 3221          int nrun;
3215 3222          static int64_t f[3] = { 135, 27, 9 };
3216 3223          int64_t q, r;
3217 3224  
3218 3225          mutex_enter(&zonehash_lock);
3219 3226          for (zp = list_head(&zone_active); zp != NULL;
3220 3227              zp = list_next(&zone_active, zp)) {
3221 3228                  mutex_enter(&zp->zone_lock);
↓ open down ↓ 4 lines elided ↑ open up ↑
3226 3233                          /* For all practical purposes the zone doesn't exist. */
3227 3234                          mutex_exit(&zp->zone_lock);
3228 3235                          continue;
3229 3236                  }
3230 3237  
3231 3238                  /*
3232 3239                   * Update the 10 second moving average data in zone_loadavg.
3233 3240                   */
3234 3241                  lavg = &zp->zone_loadavg;
3235 3242  
3236      -                zone_total = zp->zone_utime + zp->zone_stime + zp->zone_wtime;
     3243 +                tmp = cpu_uarray_sum_all(zp->zone_ustate);
     3244 +                zone_total = UINT64_OVERFLOW_TO_INT64(tmp);
     3245 +
3237 3246                  scalehrtime(&zone_total);
3238 3247  
3239 3248                  /* The zone_total should always be increasing. */
3240 3249                  lavg->lg_loads[lavg->lg_cur] = (zone_total > lavg->lg_total) ?
3241 3250                      zone_total - lavg->lg_total : 0;
3242 3251                  lavg->lg_cur = (lavg->lg_cur + 1) % S_LOADAVG_SZ;
3243 3252                  /* lg_total holds the prev. 1 sec. total */
3244 3253                  lavg->lg_total = zone_total;
3245 3254  
3246 3255                  /*
↓ open down ↓ 978 lines elided ↑ open up ↑
4225 4234  
4226 4235  /*
4227 4236   * We make creative use of nvlists to pass in rctls from userland.  The list is
4228 4237   * a list of the following structures:
4229 4238   *
4230 4239   * (name = rctl_name, value = nvpair_list_array)
4231 4240   *
4232 4241   * Where each element of the nvpair_list_array is of the form:
4233 4242   *
4234 4243   * [(name = "privilege", value = RCPRIV_PRIVILEGED),
4235      - *      (name = "limit", value = uint64_t),
4236      - *      (name = "action", value = (RCTL_LOCAL_NOACTION || RCTL_LOCAL_DENY))]
     4244 + *      (name = "limit", value = uint64_t),
     4245 + *      (name = "action", value = (RCTL_LOCAL_NOACTION || RCTL_LOCAL_DENY))]
4237 4246   */
4238 4247  static int
4239 4248  parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp)
4240 4249  {
4241 4250          nvpair_t *nvp = NULL;
4242 4251          nvlist_t *nvl = NULL;
4243 4252          char *kbuf;
4244 4253          int error;
4245 4254          rctl_val_t rv;
4246 4255  
↓ open down ↓ 269 lines elided ↑ open up ↑
4516 4525          zone->zone_domain[0] = '\0';
4517 4526          zone->zone_hostid = HW_INVALID_HOSTID;
4518 4527          zone->zone_shares = 1;
4519 4528          zone->zone_shmmax = 0;
4520 4529          zone->zone_ipc.ipcq_shmmni = 0;
4521 4530          zone->zone_ipc.ipcq_semmni = 0;
4522 4531          zone->zone_ipc.ipcq_msgmni = 0;
4523 4532          zone->zone_bootargs = NULL;
4524 4533          zone->zone_fs_allowed = NULL;
4525 4534  
4526      -        secflags_zero(&zone0.zone_secflags.psf_lower);
4527      -        secflags_zero(&zone0.zone_secflags.psf_effective);
4528      -        secflags_zero(&zone0.zone_secflags.psf_inherit);
4529      -        secflags_fullset(&zone0.zone_secflags.psf_upper);
     4535 +        psecflags_default(&zone->zone_secflags);
4530 4536  
4531 4537          zone->zone_initname =
4532 4538              kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP);
4533 4539          (void) strcpy(zone->zone_initname, zone_default_initname);
4534 4540          zone->zone_nlwps = 0;
4535 4541          zone->zone_nlwps_ctl = INT_MAX;
4536 4542          zone->zone_nprocs = 0;
4537 4543          zone->zone_nprocs_ctl = INT_MAX;
4538 4544          zone->zone_locked_mem = 0;
4539 4545          zone->zone_locked_mem_ctl = UINT64_MAX;
4540 4546          zone->zone_max_swap = 0;
4541 4547          zone->zone_max_swap_ctl = UINT64_MAX;
4542 4548          zone->zone_max_lofi = 0;
4543 4549          zone->zone_max_lofi_ctl = UINT64_MAX;
4544 4550          zone0.zone_lockedmem_kstat = NULL;
4545 4551          zone0.zone_swapresv_kstat = NULL;
4546 4552  
     4553 +        zone->zone_ustate = cpu_uarray_zalloc(ZONE_USTATE_MAX, KM_SLEEP);
     4554 +
4547 4555          /*
4548 4556           * Zsched initializes the rctls.
4549 4557           */
4550 4558          zone->zone_rctls = NULL;
4551 4559  
4552 4560          if ((error = parse_rctls(rctlbuf, rctlbufsz, &rctls)) != 0) {
4553 4561                  zone_free(zone);
4554 4562                  return (zone_create_error(error, 0, extended_error));
4555 4563          }
4556 4564  
↓ open down ↓ 2826 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX