1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/ksynch.h>
  28 #include <sys/cmn_err.h>
  29 #include <sys/errno.h>
  30 #include <sys/kmem.h>
  31 #include <sys/ddi.h>
  32 #include <sys/nsc_thread.h>
  33 
  34 #include "sd_bcache.h"
  35 #include "sd_trace.h"
  36 #include "sd_misc.h"
  37 
  38 #ifndef _SD_NOTRACE
  39 
  40 #ifndef SM_SDTRSEMA
  41 #define SM_SDTRSEMA 1
  42 #define SM_SDTRLCK  1
  43 #endif
  44 
  45 int _sd_trace_mask = 0;
  46 
  47 /*
  48  * _sdbd_trace_t _sd_trace_table[-1, 0 .. sdbc_max_devs - 1]
  49  *      allocate memory, shift pointer up by one.
  50  */
  51 static _sdbc_trace_t    *_sd_trace_table;
  52 
  53 static kcondvar_t       _sd_adump_cv;
  54 static int _sd_trace_configed;
  55 static kmutex_t  _sd_adump_lk;
  56 
  57 static int              _alert_cd = SDT_ANY_CD;
  58 static int              _last_cd = SDT_ANY_CD;
  59 #define XMEM(x, y)      (void)(x = y, y = (SDT_ANY_CD), x)
  60 
  61 /*
  62  * Forward declare all statics that are used before defined to enforce
  63  * parameter checking.
  64  * Some (if not all) of these could be removed if the code were reordered
  65  */
  66 
  67 static int _sd_set_adump(int cd, int flag, _sdtr_table_t *table);
  68 
  69 /*
  70  * _sdbc_tr_unload - cache is being unloaded. Release any memory/lock/sv's
  71  * created by _sdbc_tr_unload and null the stale pointers.
  72  *
  73  */
  74 void
  75 _sdbc_tr_unload(void)
  76 {
  77         if (_sd_trace_table)
  78                 nsc_kmem_free((_sd_trace_table - 1),
  79                     sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1));
  80         cv_destroy(&_sd_adump_cv);
  81         mutex_destroy(&_sd_adump_lk);
  82 
  83         _sd_trace_table = NULL;
  84 }
  85 
  86 /*
  87  * _sdbc_tr_load - cache is being loaded. Allocate the memory/lock/sv's
  88  * which need to be present regardless of state of cache configuration.
  89  *
  90  */
  91 int
  92 _sdbc_tr_load(void)
  93 {
  94         _sdbc_trace_t *m;
  95 
  96         cv_init(&_sd_adump_cv, NULL, CV_DRIVER, NULL);
  97         mutex_init(&_sd_adump_lk, NULL, MUTEX_DRIVER, NULL);
  98 
  99         /*
 100          * this maybe ought to wait to see if traces are configured, but it
 101          * is only 4k
 102          */
 103 
 104         m = (_sdbc_trace_t *)nsc_kmem_zalloc(
 105             sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1),
 106             KM_NOSLEEP, sdbc_stats_mem);
 107 
 108         if (m == NULL) {
 109                 cmn_err(CE_WARN,
 110                     "sdbc(_sdbc_tr_load) cannot allocate trace table");
 111                 return (-1);
 112         }
 113         _sd_trace_table = m + 1;
 114 
 115         return (0);
 116 
 117 }
 118 
 119 /*
 120  * _sdbc_tr_configure - configure a trace area for the descriptor "cd".
 121  * Unlike other ..._configure routines this routine is called multiple
 122  * times since there will be an unknown number of open descriptors. At
 123  * cache config time if tracing is enabled only the slot for SDT_INV_CD
 124  * is created.
 125  *
 126  * Allocate the SD cache trace area (per device)
 127  */
 128 
 129 int
 130 _sdbc_tr_configure(int cd)
 131 {
 132         int size;
 133         _sdtr_table_t *t;
 134         kmutex_t *lk;
 135 
 136         if (!_sd_cache_config.trace_size)
 137                 return (0);
 138 
 139         if (cd == SDT_INV_CD)
 140                 _sd_trace_configed = 1;
 141 
 142         if (_sd_trace_table[cd].tbl)
 143                 return (0);
 144 
 145         size = sizeof (_sdtr_table_t) +
 146             _sd_cache_config.trace_size * sizeof (_sdtr_t);
 147 
 148         if ((t = (_sdtr_table_t *)nsc_kmem_zalloc(size,
 149             KM_NOSLEEP, sdbc_stats_mem)) == NULL) {
 150                 cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) failed to "
 151                     "allocate %d bytes for trace, cd=%d", size, cd);
 152                 return (-1);
 153         }
 154 
 155         lk = nsc_kmem_zalloc(sizeof (kmutex_t), KM_NOSLEEP, sdbc_local_mem);
 156         if (!lk) {
 157                 nsc_kmem_free(t, size);
 158                 cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) cannot "
 159                     "alloc trace lock for cd %d", cd);
 160                 return (-1);
 161         }
 162         mutex_init(lk, NULL, MUTEX_DRIVER, NULL);
 163 
 164         _sd_trace_table[cd].t_lock = lk;
 165         t->tt_cd   = cd;
 166         t->tt_max  = _sd_cache_config.trace_size;
 167         t->tt_mask = _sd_cache_config.trace_mask;
 168         t->tt_lbolt = (char)_sd_cache_config.trace_lbolt;
 169         t->tt_good = (char)_sd_cache_config.trace_good;
 170         _sd_trace_mask |= t->tt_mask;
 171         _sd_trace_table[cd].tbl = t;
 172         return (0);
 173 }
 174 
 175 
 176 /*
 177  * _sdbc_tr_deconfigure
 178  *      free all trace memory (regions) when deconfiguring cache
 179  */
 180 void
 181 _sdbc_tr_deconfigure(void)
 182 {
 183         int i, size;
 184         _sdbc_trace_t *tt;
 185 
 186         if (!_sd_cache_config.trace_size || !_sd_trace_configed)
 187                 return;
 188 
 189         mutex_enter(&_sd_adump_lk);
 190         _sd_trace_configed = 0;
 191         cv_broadcast(&_sd_adump_cv);
 192         mutex_exit(&_sd_adump_lk);
 193 
 194         for (i = -1, tt = &_sd_trace_table[-1]; i < sdbc_max_devs; i++, tt++) {
 195                 if (tt->tbl == NULL) continue;
 196                 size = tt->tbl->tt_max * sizeof (_sdtr_t) +
 197                     sizeof (_sdtr_table_t);
 198                 if (tt->t_lock) {
 199                         mutex_destroy(tt->t_lock);
 200                         nsc_kmem_free(tt->t_lock, sizeof (kmutex_t));
 201                 }
 202                 nsc_kmem_free(tt->tbl, size);
 203                 tt->t_lock = NULL;
 204                 tt->tbl = NULL;
 205         }
 206         _alert_cd = SDT_ANY_CD;
 207         _last_cd = SDT_ANY_CD;
 208 }
 209 
 210 static int first_alert = 0;
 211 /*
 212  * SDALERT(f,cd,len,fba,flg,ret) \
 213  *      _sd_alert(f,cd,len,fba,flg,ret)
 214  *  Build a ALERT trace entry and place it into the trace table.
 215  */
 216 void
 217 _sd_alert(int f, int cd, int len, nsc_off_t fba, int flg, int ret)
 218 {
 219         int tin;
 220         _sdtr_t *tp;
 221         _sdtr_table_t *t;
 222         kmutex_t *lk;
 223 
 224         if (!first_alert) {
 225                 first_alert++;
 226                 cmn_err(CE_WARN,
 227                     "sdbc(_sd_alert) cd=%x f=%x len=%x fba=%" NSC_SZFMT
 228                     " flg=%x ret=%x", cd, f, len, fba, flg, ret);
 229 
 230         }
 231 
 232         /* Watch out for negative error codes or simply bogus cd's */
 233 
 234         if (cd < -1 || cd >= sdbc_max_devs) {
 235                 /*
 236                  * no device trace buffer -- use SDT_INV_CD table?
 237                  */
 238                 if ((t = _sd_trace_table[-1].tbl) == NULL)
 239                         return;
 240                 lk = _sd_trace_table[-1].t_lock;
 241         } else {
 242                 lk = _sd_trace_table[cd].t_lock;
 243                 if ((t = _sd_trace_table[cd].tbl) == NULL) {
 244                         /*
 245                          * no device trace buffer -- use SDT_INV_CD table?
 246                          */
 247                         if ((t = _sd_trace_table[-1].tbl) == NULL)
 248                                 return;
 249                         lk = _sd_trace_table[-1].t_lock;
 250                 }
 251         }
 252 
 253         if (!(t->tt_mask & ST_ALERT))
 254                 return; /* check per-device mask */
 255 
 256         if (t->tt_good) mutex_enter(lk);
 257         t->tt_alert++;       /* alert on this device */
 258         t->tt_cnt++; /* overwritten entries if (tt_cnt >= tt_max) */
 259 
 260         tin = t->tt_in++;
 261         if (tin >= t->tt_max) tin = t->tt_in = 0;
 262         tp = &t->tt_buf[tin];
 263         tp->t_time = 0;              /* not filled in yet */
 264         if (t->tt_good) mutex_exit(lk);
 265 
 266         tp->t_func = (ushort_t)f | ST_ALERT;
 267         tp->t_len = (ushort_t)len;
 268         tp->t_fba = fba;
 269         tp->t_flg = flg;
 270         tp->t_ret = ret;
 271         /*
 272          * On LP64 systems we will only capture the low 32 bits of the
 273          * time this really should be good enough for our purposes.
 274          *
 275          */
 276         if (t->tt_lbolt)
 277                 tp->t_time = (int)nsc_lbolt();
 278         else
 279                 tp->t_time = (int)nsc_usec();
 280 
 281         /* wakeup trace daemon, with hint */
 282         _alert_cd = cd;
 283 
 284         if (_sd_trace_configed)
 285                 cv_signal(&_sd_adump_cv);
 286 }
 287 
 288 
 289 /*
 290  * SDTRACE(f,cd,len,fba,flg,ret) \
 291  *      if (_sd_trace_mask & (f)) _sd_trace(f,cd,len,fba,flg,ret)
 292  *  Build a trace entry and place it into the trace table.
 293  */
 294 void
 295 _sd_trace(int f, int cd, int len, nsc_off_t fba, int flg, int ret)
 296 {
 297         int tin;
 298         _sdtr_t *tp;
 299         _sdtr_table_t *t;
 300         kmutex_t *lk;
 301 
 302         /* Watch out for negative error codes or simply bogus cd's */
 303 
 304         if (cd < -1 || cd >= sdbc_max_devs) {
 305                 /*
 306                  * no device trace buffer -- use SDT_INV_CD table?
 307                  */
 308                 if ((t = _sd_trace_table[-1].tbl) == NULL)
 309                         return;
 310                 lk = _sd_trace_table[-1].t_lock;
 311         } else {
 312                 lk = _sd_trace_table[cd].t_lock;
 313                 if ((t = _sd_trace_table[cd].tbl) == NULL)
 314                         return;
 315         }
 316 
 317         if (!(t->tt_mask & f))
 318                 return; /* check per-device mask */
 319 
 320         /*
 321          * Don't overwrite if alert signaled (count lost instead)
 322          * Locking only if 'trace_good' parameter set.
 323          */
 324         if (t->tt_good) mutex_enter(lk);
 325         if (t->tt_alert && (t->tt_cnt >= t->tt_max)) {
 326                 t->tt_lost++; /* lost during alert */
 327                 if (t->tt_good) mutex_exit(lk);
 328                 return;
 329         }
 330         t->tt_cnt++; /* overwritten entries if (tt_cnt >= tt_max) */
 331 
 332         tin = t->tt_in++;
 333         if (tin >= t->tt_max) tin = t->tt_in = 0;
 334         tp = &t->tt_buf[tin];
 335         tp->t_time = 0;              /* not filled in yet */
 336         if (t->tt_good) mutex_exit(lk);
 337 
 338         tp->t_func = (ushort_t)f;
 339         tp->t_len = (ushort_t)len;
 340         tp->t_fba = fba;
 341         tp->t_flg = flg;
 342         tp->t_ret = ret;
 343         /*
 344          * On LP64 systems we will only capture the low 32 bits of the
 345          * time this really should be good enough for our purposes.
 346          *
 347          */
 348         if (t->tt_lbolt)
 349                 tp->t_time = (int)nsc_lbolt();
 350         else
 351                 tp->t_time = (int)nsc_usec();
 352 }
 353 
 354 /*
 355  * _sd_scan_alert -- search for device with trace alert
 356  */
 357 static int
 358 _sd_scan_alert(void)
 359 {
 360         int cd;
 361 
 362         XMEM(cd, _alert_cd);
 363         if ((cd != SDT_ANY_CD) && _sd_trace_table[cd].tbl->tt_alert)
 364                 return (cd);
 365         for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++)
 366                 if (_sd_trace_table[cd].tbl &&
 367                     _sd_trace_table[cd].tbl->tt_alert)
 368                         return (_last_cd = cd);
 369         for (cd = SDT_INV_CD; cd <= _last_cd; cd++)
 370                 if (_sd_trace_table[cd].tbl &&
 371                     _sd_trace_table[cd].tbl->tt_alert)
 372                         return (_last_cd = cd);
 373         return (SDT_ANY_CD);
 374 }
 375 
 376 /*
 377  * _sd_scan_entries -- search for next device with trace entries
 378  */
 379 static int
 380 _sd_scan_entries(void)
 381 {
 382         int cd;
 383 
 384         for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++)
 385                 if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt)
 386                         return (_last_cd = cd);
 387         for (cd = SDT_INV_CD; cd <= _last_cd; cd++)
 388                 if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt)
 389                         return (_last_cd = cd);
 390         return (SDT_ANY_CD);
 391 }
 392 
 393 
 394 /*
 395  * _sd_adump
 396  *      copy information about new trace records to trace daemon,
 397  *      or modify trace parameters.
 398  *
 399  * Some tracing parameters can be modified
 400  * [Either per-device if cd specified, or the defaults if cd = SDT_ANY_CD]
 401  *  SD_LOGSIZE:   table.tt_max (size for future opens)
 402  *  SD_SET_LBOLT: table.tt_lbolt
 403  *  SD_SET_MASK:  table.tt_mask
 404  *  SD_SET_GOOD:  table.tt_good
 405  *
 406  * if (cd >= 0) dump specific device records;
 407  * if (cd == SDT_INV_CD) dump records which don't apply to any one device.
 408  * if (cd == SDT_ANY_CD), then choose a device:
 409  *      1) most recent alert, block if (flag & SD_ALERT_WAIT)
 410  *      2) "next" device with unprocessed records.
 411  */
 412 int
 413 _sd_adump(void *args, int *rvp)
 414 {
 415         struct a {
 416                 long cd;
 417                 _sdtr_table_t *table;
 418                 _sdtr_t *buf;
 419                 long size;
 420                 long flag;
 421         } *uap = (struct a *)args;
 422         _sdtr_t *ubuf;
 423         _sdtr_table_t tt, *t;
 424         kmutex_t *lk;
 425         int cd, count, lost, new_cnt;
 426 
 427         if (uap->flag & (SD_SET_SIZE|SD_SET_MASK|SD_SET_LBOLT|SD_SET_GOOD)) {
 428                 return (_sd_set_adump(uap->cd, uap->flag, uap->table));
 429         }
 430         if (! _sd_trace_configed) {
 431                 return (EINVAL); /* not initialized yet */
 432         }
 433         if (uap->cd >= SDT_INV_CD) {
 434                 /* specific device: check if configured. dump current state. */
 435                 if ((uap->cd > (long)sdbc_max_devs) ||
 436                     !(t = _sd_trace_table[uap->cd].tbl)) {
 437                         return (ENOSPC); /* no space configured */
 438                 }
 439                 lk = _sd_trace_table[uap->cd].t_lock;
 440                 cd = uap->cd;
 441         } else {
 442                 /*
 443                  * SDT_ANY_CD:
 444                  * SD_ALERT_WAIT - wait for alert
 445                  */
 446         scan:
 447                 if ((cd = _sd_scan_alert()) != SDT_ANY_CD)
 448                         goto dump;
 449                 if ((uap->flag & SD_ALERT_WAIT)) {
 450                         mutex_enter(&_sd_adump_lk);
 451                         if (!_sd_trace_configed) {
 452                                 mutex_exit(&_sd_adump_lk);
 453                                 return (EINVAL);
 454                         }
 455 
 456                         if (!cv_wait_sig(&_sd_adump_cv, &_sd_adump_lk)) {
 457                                 mutex_exit(&_sd_adump_lk);
 458                                 return (EINTR);
 459                         }
 460                         mutex_exit(&_sd_adump_lk);
 461 
 462                         if (!_sd_trace_configed || !_sd_cache_initialized) {
 463                                 return (EIDRM);
 464                         }
 465                         goto scan;
 466                 }
 467                 /* any device with entries */
 468                 if ((cd = _sd_scan_entries()) == SDT_INV_CD)
 469                         return (0);             /* no new entries */
 470 
 471         dump:
 472                 lk = _sd_trace_table[cd].t_lock;
 473                 if ((t = _sd_trace_table[cd].tbl) == NULL) {
 474                         if (uap->flag & SD_ALERT_WAIT) {
 475                                 t = _sd_trace_table[-1].tbl;
 476                                 lk = _sd_trace_table[-1].t_lock;
 477                         } else {
 478                                 return (ENOSPC); /* no space configured */
 479                         }
 480                 }
 481         }
 482 
 483         /*
 484          * take a snapshot of the table state
 485          */
 486         if (t->tt_good)
 487                 mutex_enter(lk);
 488         tt = *t;
 489         if (t->tt_good)
 490                 mutex_exit(lk);
 491 
 492         /*
 493          * copy trace log entries to daemon
 494          *
 495          * size:   entries in user-level 'buf'
 496          * count:  how many entries to copy [force count <= size]
 497          * tt_max: size of kernel buffer
 498          * tt_cnt: written entries [lossage if tt_cnt > tt_max]
 499          * cnt:    for wrap-around calculations
 500          */
 501         if ((count = tt.tt_cnt) > tt.tt_max) { /* lost from beginning */
 502                 tt.tt_out = tt.tt_in;
 503                 count = tt.tt_max;
 504                 lost = tt.tt_cnt - tt.tt_max;
 505         } else
 506                 lost = 0;
 507         if (count <= 0)
 508                 return (0);
 509         if ((long)count > uap->size)
 510                 count = uap->size;
 511         ubuf = uap->buf;
 512         if ((tt.tt_out + count) > tt.tt_max) {
 513                 int cnt = tt.tt_max - tt.tt_out;
 514                 if (cnt > count)
 515                         cnt = count;
 516                 if (copyout(&(t->tt_buf[tt.tt_out]), ubuf,
 517                     cnt * sizeof (_sdtr_t))) {
 518                         return (EFAULT);
 519                 }
 520                 ubuf += cnt;
 521                 cnt = count - cnt;
 522                 if (copyout(&(t->tt_buf[0]), ubuf, cnt * sizeof (_sdtr_t))) {
 523                         return (EFAULT);
 524                 }
 525                 tt.tt_out = cnt;
 526         } else {
 527                 if (copyout(&(t->tt_buf[tt.tt_out]), ubuf,
 528                     count * sizeof (_sdtr_t))) {
 529                         return (EFAULT);
 530                 }
 531                 tt.tt_out += count;
 532                 if (tt.tt_out == tt.tt_max)
 533                         tt.tt_out = 0;
 534         }
 535 
 536         /*
 537          * tt_alert uses fuzzy counting.
 538          * if multiple alerts signaled, leave it at 1.
 539          */
 540         if (t->tt_alert)
 541                 t->tt_alert = (t->tt_alert > 1) ? 1 : 0;
 542 
 543         /*
 544          * tt_cntout is tt_cnt after dump
 545          * update tt_cnt for copied entries
 546          */
 547         if (t->tt_good)
 548                 mutex_enter(lk);
 549         tt.tt_cntout = t->tt_cnt;
 550         t->tt_out = tt.tt_out;
 551         new_cnt = t->tt_cnt;
 552         if ((new_cnt -= count+lost) < 0)
 553                 new_cnt = 0;
 554         t->tt_cnt = new_cnt; /* race with new traces if not "tt_good" */
 555         if (t->tt_good)
 556                 mutex_exit(lk);
 557 
 558         if (copyout(&tt, uap->table, sizeof (tt) - sizeof (_sdtr_t))) {
 559                 return (EFAULT);
 560         }
 561         *rvp = count;
 562 
 563         first_alert = 0;
 564         return (0);
 565 }
 566 
 567 
 568 /* set size, mask, lbolt, or good(locks) */
 569 static int
 570 _sd_set_adump(int cd, int flag, _sdtr_table_t *table)
 571 {
 572         _sdtr_table_t tt, *t;
 573 
 574         if (copyin(table, &tt, sizeof (tt) - sizeof (_sdtr_t))) {
 575                 return (EFAULT);
 576         }
 577         if (cd == SDT_ANY_CD) {         /* modify config parameter */
 578                 if (flag & SD_SET_SIZE)
 579                         _sd_cache_config.trace_size = tt.tt_max;
 580                 if (flag & SD_SET_MASK) {
 581                         _sd_cache_config.trace_mask = tt.tt_mask;
 582                         /* explicitly set global mask, not bitwise or */
 583                         _sd_trace_mask = tt.tt_mask;
 584                 }
 585                 if (flag & SD_SET_LBOLT)
 586                         _sd_cache_config.trace_lbolt = tt.tt_lbolt;
 587                 if (flag & SD_SET_GOOD)
 588                         _sd_cache_config.trace_good = tt.tt_good;
 589                 return (0);
 590         }
 591         if (flag & SD_SET_SIZE)
 592                 _sd_cache_config.trace_size = tt.tt_max;
 593         /* modify particular device parameters */
 594         if (!_sd_trace_table[cd].tbl)
 595                 (void) _sdbc_tr_configure(cd);
 596         if ((t = _sd_trace_table[cd].tbl) == NULL)
 597                 return (0);
 598         if (flag & SD_SET_MASK) {
 599                 t->tt_mask = tt.tt_mask;
 600                 _sd_trace_mask |= tt.tt_mask; /* or-ed with global mask */
 601         }
 602         if (flag & SD_SET_LBOLT)
 603                 t->tt_lbolt = tt.tt_lbolt;
 604         if (flag & SD_SET_GOOD)
 605                 t->tt_good = tt.tt_good;
 606         if (copyout(t, table, sizeof (*t) - sizeof (_sdtr_t))) {
 607                 return (EFAULT);
 608         }
 609         return (0);
 610 }
 611 
 612 #else /* ! _SD_NOTRACE */
 613 
 614 int _sd_adump()         { return (ENOSYS); }
 615 int _sdbc_tr_load(void)         { return (0); }
 616 int _sdbc_tr_configure(void)    { return (0); }
 617 void _sdbc_tr_deconfigure(void) { return; }
 618 void _sdbc_tr_unload(void) { return; }
 619 
 620 #endif /* ! _SD_NOTRACE */