Print this page
9936 atomic ops in syscall_mstate() induce significant overhead
9942 zone secflags are not initialized correctly


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2012 Joyent, Inc.  All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/user.h>
  31 #include <sys/proc.h>
  32 #include <sys/cpuvar.h>
  33 #include <sys/thread.h>
  34 #include <sys/debug.h>
  35 #include <sys/msacct.h>
  36 #include <sys/time.h>
  37 #include <sys/zone.h>
  38 
  39 /*
  40  * Mega-theory block comment:
  41  *
  42  * Microstate accounting uses finite states and the transitions between these
  43  * states to measure timing and accounting information.  The state information
  44  * is presently tracked for threads (via microstate accounting) and cpus (via


 399         klwp_t *lwp;
 400         hrtime_t newtime;
 401         cpu_t *cpu;
 402         uint16_t gen;
 403 
 404         if ((lwp = ttolwp(t)) == NULL)
 405                 return;
 406 
 407         ASSERT(fromms < NMSTATES);
 408         ASSERT(toms < NMSTATES);
 409 
 410         ms = &lwp->lwp_mstate;
 411         mstimep = &ms->ms_acct[fromms];
 412         curtime = gethrtime_unscaled();
 413         newtime = curtime - ms->ms_state_start;
 414         while (newtime < 0) {
 415                 curtime = gethrtime_unscaled();
 416                 newtime = curtime - ms->ms_state_start;
 417         }
 418         *mstimep += newtime;
 419         if (fromms == LMS_USER)
 420                 atomic_add_64(&z->zone_utime, newtime);
 421         else if (fromms == LMS_SYSTEM)
 422                 atomic_add_64(&z->zone_stime, newtime);
 423         t->t_mstate = toms;
 424         ms->ms_state_start = curtime;
 425         ms->ms_prev = fromms;
 426         kpreempt_disable(); /* don't change CPU while changing CPU's state */
 427         cpu = CPU;
 428         ASSERT(cpu == t->t_cpu);









 429         if ((toms != LMS_USER) && (cpu->cpu_mstate != CMS_SYSTEM)) {
 430                 NEW_CPU_MSTATE(CMS_SYSTEM);
 431         } else if ((toms == LMS_USER) && (cpu->cpu_mstate != CMS_USER)) {
 432                 NEW_CPU_MSTATE(CMS_USER);
 433         }
 434         kpreempt_enable();
 435 }
 436 
 437 #undef NEW_CPU_MSTATE
 438 
 439 /*
 440  * The following is for computing the percentage of cpu time used recently
 441  * by an lwp.  The function cpu_decay() is also called from /proc code.
 442  *
 443  * exp_x(x):
 444  * Given x as a 64-bit non-negative scaled integer of arbitrary magnitude,
 445  * Return exp(-x) as a 64-bit scaled integer in the range [0 .. 1].
 446  *
 447  * Scaling for 64-bit scaled integer:
 448  * The binary point is to the right of the high-order bit


 636                         mstimep = &ms->ms_acct[LMS_SYSTEM];
 637                         break;
 638                 default:
 639                         mstimep = &ms->ms_acct[state];
 640                         break;
 641                 }
 642                 ztime = newtime = curtime - ms->ms_state_start;
 643                 if (newtime < 0) {
 644                         curtime = gethrtime_unscaled();
 645                         oldtime = *mstimep - 1; /* force CAS to fail */
 646                         continue;
 647                 }
 648                 oldtime = *mstimep;
 649                 newtime += oldtime;
 650                 t->t_mstate = new_state;
 651                 ms->ms_state_start = curtime;
 652         } while (atomic_cas_64((uint64_t *)mstimep, oldtime, newtime) !=
 653             oldtime);
 654 
 655         /*
 656          * When the system boots the initial startup thread will have a
 657          * ms_state_start of 0 which would add a huge system time to the global
 658          * zone.  We want to skip aggregating that initial bit of work.
 659          */
 660         if (origstart != 0) {
 661                 z = ttozone(t);
 662                 if (state == LMS_USER)
 663                         atomic_add_64(&z->zone_utime, ztime);
 664                 else if (state == LMS_SYSTEM)
 665                         atomic_add_64(&z->zone_stime, ztime);
 666         }
 667 
 668         /*
 669          * Remember the previous running microstate.
 670          */
 671         if (state != LMS_SLEEP && state != LMS_STOPPED)
 672                 ms->ms_prev = state;
 673 
 674         /*
 675          * Switch CPU microstate if appropriate
 676          */
 677 
 678         kpreempt_disable(); /* MUST disable kpreempt before touching t->cpu */

 679         ASSERT(t->t_cpu == CPU);

















 680         if (!CPU_ON_INTR(t->t_cpu) && curthread->t_intr == NULL) {
 681                 if (new_state == LMS_USER && t->t_cpu->cpu_mstate != CMS_USER)
 682                         new_cpu_mstate(CMS_USER, curtime);
 683                 else if (new_state != LMS_USER &&
 684                     t->t_cpu->cpu_mstate != CMS_SYSTEM)
 685                         new_cpu_mstate(CMS_SYSTEM, curtime);
 686         }
 687         kpreempt_enable();
 688 
 689         return (ms->ms_prev);
 690 }
 691 
 692 /*
 693  * Restore the LWP microstate to the previous runnable state.
 694  * Called from disp() with the newly selected lwp.
 695  */
 696 void
 697 restore_mstate(kthread_t *t)
 698 {
 699         struct mstate *ms;


 766                         waitrq = curtime;
 767                 }
 768                 t->t_waitrq = 0;
 769                 newtime = waitrq - ms->ms_state_start;
 770                 if (newtime < 0) {
 771                         curtime = gethrtime_unscaled();
 772                         oldtime = *mstimep - 1; /* force CAS to fail */
 773                         continue;
 774                 }
 775                 oldtime = *mstimep;
 776                 newtime += oldtime;
 777         } while (atomic_cas_64((uint64_t *)mstimep, oldtime, newtime) !=
 778             oldtime);
 779 
 780         /*
 781          * Update the WAIT_CPU timer and per-cpu waitrq total.
 782          */
 783         z = ttozone(t);
 784         waittime = curtime - waitrq;
 785         ms->ms_acct[LMS_WAIT_CPU] += waittime;
 786         atomic_add_64(&z->zone_wtime, waittime);






 787         CPU->cpu_waitrq += waittime;
 788         ms->ms_state_start = curtime;
 789 }
 790 
 791 /*
 792  * Copy lwp microstate accounting and resource usage information
 793  * to the process.  (lwp is terminating)
 794  */
 795 void
 796 term_mstate(kthread_t *t)
 797 {
 798         struct mstate *ms;
 799         proc_t *p = ttoproc(t);
 800         klwp_t *lwp = ttolwp(t);
 801         int i;
 802         hrtime_t tmp;
 803 
 804         ASSERT(MUTEX_HELD(&p->p_lock));
 805 
 806         ms = &lwp->lwp_mstate;




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2018, Joyent, Inc.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/user.h>
  31 #include <sys/proc.h>
  32 #include <sys/cpuvar.h>
  33 #include <sys/thread.h>
  34 #include <sys/debug.h>
  35 #include <sys/msacct.h>
  36 #include <sys/time.h>
  37 #include <sys/zone.h>
  38 
  39 /*
  40  * Mega-theory block comment:
  41  *
  42  * Microstate accounting uses finite states and the transitions between these
  43  * states to measure timing and accounting information.  The state information
  44  * is presently tracked for threads (via microstate accounting) and cpus (via


 399         klwp_t *lwp;
 400         hrtime_t newtime;
 401         cpu_t *cpu;
 402         uint16_t gen;
 403 
 404         if ((lwp = ttolwp(t)) == NULL)
 405                 return;
 406 
 407         ASSERT(fromms < NMSTATES);
 408         ASSERT(toms < NMSTATES);
 409 
 410         ms = &lwp->lwp_mstate;
 411         mstimep = &ms->ms_acct[fromms];
 412         curtime = gethrtime_unscaled();
 413         newtime = curtime - ms->ms_state_start;
 414         while (newtime < 0) {
 415                 curtime = gethrtime_unscaled();
 416                 newtime = curtime - ms->ms_state_start;
 417         }
 418         *mstimep += newtime;




 419         t->t_mstate = toms;
 420         ms->ms_state_start = curtime;
 421         ms->ms_prev = fromms;
 422         kpreempt_disable(); /* don't change CPU while changing CPU's state */
 423         cpu = CPU;
 424         ASSERT(cpu == t->t_cpu);
 425 
 426         if (fromms == LMS_USER) {
 427                 CPU_UARRAY_VAL(z->zone_ustate, cpu->cpu_id,
 428                     ZONE_USTATE_UTIME) += newtime;
 429         } else if (fromms == LMS_SYSTEM) {
 430                 CPU_UARRAY_VAL(z->zone_ustate, cpu->cpu_id,
 431                     ZONE_USTATE_STIME) += newtime;
 432         }
 433 
 434         if ((toms != LMS_USER) && (cpu->cpu_mstate != CMS_SYSTEM)) {
 435                 NEW_CPU_MSTATE(CMS_SYSTEM);
 436         } else if ((toms == LMS_USER) && (cpu->cpu_mstate != CMS_USER)) {
 437                 NEW_CPU_MSTATE(CMS_USER);
 438         }
 439         kpreempt_enable();
 440 }
 441 
 442 #undef NEW_CPU_MSTATE
 443 
 444 /*
 445  * The following is for computing the percentage of cpu time used recently
 446  * by an lwp.  The function cpu_decay() is also called from /proc code.
 447  *
 448  * exp_x(x):
 449  * Given x as a 64-bit non-negative scaled integer of arbitrary magnitude,
 450  * Return exp(-x) as a 64-bit scaled integer in the range [0 .. 1].
 451  *
 452  * Scaling for 64-bit scaled integer:
 453  * The binary point is to the right of the high-order bit


 641                         mstimep = &ms->ms_acct[LMS_SYSTEM];
 642                         break;
 643                 default:
 644                         mstimep = &ms->ms_acct[state];
 645                         break;
 646                 }
 647                 ztime = newtime = curtime - ms->ms_state_start;
 648                 if (newtime < 0) {
 649                         curtime = gethrtime_unscaled();
 650                         oldtime = *mstimep - 1; /* force CAS to fail */
 651                         continue;
 652                 }
 653                 oldtime = *mstimep;
 654                 newtime += oldtime;
 655                 t->t_mstate = new_state;
 656                 ms->ms_state_start = curtime;
 657         } while (atomic_cas_64((uint64_t *)mstimep, oldtime, newtime) !=
 658             oldtime);
 659 
 660         /*













 661          * Remember the previous running microstate.
 662          */
 663         if (state != LMS_SLEEP && state != LMS_STOPPED)
 664                 ms->ms_prev = state;
 665 
 666         /*
 667          * Switch CPU microstate if appropriate
 668          */
 669 
 670         kpreempt_disable(); /* MUST disable kpreempt before touching t->cpu */
 671 
 672         ASSERT(t->t_cpu == CPU);
 673 
 674         /*
 675          * When the system boots the initial startup thread will have a
 676          * ms_state_start of 0 which would add a huge system time to the global
 677          * zone.  We want to skip aggregating that initial bit of work.
 678          */
 679         if (origstart != 0) {
 680                 z = ttozone(t);
 681                 if (state == LMS_USER) {
 682                         CPU_UARRAY_VAL(z->zone_ustate, t->t_cpu->cpu_id,
 683                             ZONE_USTATE_UTIME) += ztime;
 684                 } else if (state == LMS_SYSTEM) {
 685                         CPU_UARRAY_VAL(z->zone_ustate, t->t_cpu->cpu_id,
 686                             ZONE_USTATE_STIME) += ztime;
 687                 }
 688         }
 689 
 690         if (!CPU_ON_INTR(t->t_cpu) && curthread->t_intr == NULL) {
 691                 if (new_state == LMS_USER && t->t_cpu->cpu_mstate != CMS_USER)
 692                         new_cpu_mstate(CMS_USER, curtime);
 693                 else if (new_state != LMS_USER &&
 694                     t->t_cpu->cpu_mstate != CMS_SYSTEM)
 695                         new_cpu_mstate(CMS_SYSTEM, curtime);
 696         }
 697         kpreempt_enable();
 698 
 699         return (ms->ms_prev);
 700 }
 701 
 702 /*
 703  * Restore the LWP microstate to the previous runnable state.
 704  * Called from disp() with the newly selected lwp.
 705  */
 706 void
 707 restore_mstate(kthread_t *t)
 708 {
 709         struct mstate *ms;


 776                         waitrq = curtime;
 777                 }
 778                 t->t_waitrq = 0;
 779                 newtime = waitrq - ms->ms_state_start;
 780                 if (newtime < 0) {
 781                         curtime = gethrtime_unscaled();
 782                         oldtime = *mstimep - 1; /* force CAS to fail */
 783                         continue;
 784                 }
 785                 oldtime = *mstimep;
 786                 newtime += oldtime;
 787         } while (atomic_cas_64((uint64_t *)mstimep, oldtime, newtime) !=
 788             oldtime);
 789 
 790         /*
 791          * Update the WAIT_CPU timer and per-cpu waitrq total.
 792          */
 793         z = ttozone(t);
 794         waittime = curtime - waitrq;
 795         ms->ms_acct[LMS_WAIT_CPU] += waittime;
 796 
 797         /*
 798          * We are in a disp context where we're not going to migrate CPUs.
 799          */
 800         CPU_UARRAY_VAL(z->zone_ustate, CPU->cpu_id,
 801             ZONE_USTATE_WTIME) += waittime;
 802 
 803         CPU->cpu_waitrq += waittime;
 804         ms->ms_state_start = curtime;
 805 }
 806 
 807 /*
 808  * Copy lwp microstate accounting and resource usage information
 809  * to the process.  (lwp is terminating)
 810  */
 811 void
 812 term_mstate(kthread_t *t)
 813 {
 814         struct mstate *ms;
 815         proc_t *p = ttoproc(t);
 816         klwp_t *lwp = ttolwp(t);
 817         int i;
 818         hrtime_t tmp;
 819 
 820         ASSERT(MUTEX_HELD(&p->p_lock));
 821 
 822         ms = &lwp->lwp_mstate;