1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/xpv_user.h> 28 29 #include <sys/types.h> 30 #include <sys/file.h> 31 #include <sys/errno.h> 32 #include <sys/open.h> 33 #include <sys/cred.h> 34 #include <sys/conf.h> 35 #include <sys/stat.h> 36 #include <sys/modctl.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/vmsystm.h> 40 #include <sys/sdt.h> 41 #include <sys/hypervisor.h> 42 #include <sys/xen_errno.h> 43 #include <sys/policy.h> 44 45 #include <vm/hat_i86.h> 46 #include <vm/hat_pte.h> 47 #include <vm/seg_mf.h> 48 49 #include <xen/sys/privcmd.h> 50 #include <sys/privcmd_impl.h> 51 52 static dev_info_t *privcmd_devi; 53 54 /*ARGSUSED*/ 55 static int 56 privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 57 { 58 switch (cmd) { 59 case DDI_INFO_DEVT2DEVINFO: 60 case DDI_INFO_DEVT2INSTANCE: 61 break; 62 default: 63 return (DDI_FAILURE); 64 } 65 66 switch (getminor((dev_t)arg)) { 67 case PRIVCMD_MINOR: 68 break; 69 default: 70 return (DDI_FAILURE); 71 } 72 73 if (cmd == DDI_INFO_DEVT2INSTANCE) 74 *result = 0; 75 else 76 *result = privcmd_devi; 77 return (DDI_SUCCESS); 78 } 79 80 static int 81 privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 82 { 83 if (cmd != DDI_ATTACH) 84 return (DDI_FAILURE); 85 86 if (ddi_create_minor_node(devi, PRIVCMD_NODE, 87 S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) 88 return (DDI_FAILURE); 89 90 privcmd_devi = devi; 91 ddi_report_dev(devi); 92 return (DDI_SUCCESS); 93 } 94 95 static int 96 privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 97 { 98 if (cmd != DDI_DETACH) 99 return (DDI_FAILURE); 100 ddi_remove_minor_node(devi, NULL); 101 privcmd_devi = NULL; 102 return (DDI_SUCCESS); 103 } 104 105 /*ARGSUSED1*/ 106 static int 107 privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr) 108 { 109 return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO); 110 } 111 112 /* 113 * Map a contiguous set of machine frames in a foreign domain. 114 * Used in the following way: 115 * 116 * privcmd_mmap_t p; 117 * privcmd_mmap_entry_t e; 118 * 119 * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 120 * p.num = number of privcmd_mmap_entry_t's 121 * p.dom = domid; 122 * p.entry = &e; 123 * e.va = addr; 124 * e.mfn = mfn; 125 * e.npages = btopr(size); 126 * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p); 127 */ 128 /*ARGSUSED2*/ 129 int 130 do_privcmd_mmap(void *uarg, int mode, cred_t *cr) 131 { 132 privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd; 133 privcmd_mmap_entry_t *umme; 134 struct as *as = curproc->p_as; 135 struct seg *seg; 136 int i, error = 0; 137 138 if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode)) 139 return (EFAULT); 140 141 DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num, 142 privcmd_mmap_entry_t *, mmc->entry); 143 144 if (mmc->dom == DOMID_SELF) { 145 error = ENOTSUP; /* Too paranoid? */ 146 goto done; 147 } 148 149 for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) { 150 privcmd_mmap_entry_t __mmapent, *mme = &__mmapent; 151 caddr_t addr; 152 153 if (ddi_copyin(umme, mme, sizeof (*mme), mode)) { 154 error = EFAULT; 155 break; 156 } 157 158 DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn, 159 ulong_t, mme->npages); 160 161 if (mme->mfn == MFN_INVALID) { 162 error = EINVAL; 163 break; 164 } 165 166 addr = (caddr_t)mme->va; 167 168 /* 169 * Find the segment we want to mess with, then add 170 * the mfn range to the segment. 171 */ 172 AS_LOCK_ENTER(as, RW_READER); 173 if ((seg = as_findseg(as, addr, 0)) == NULL || 174 addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size) 175 error = EINVAL; 176 else 177 error = segmf_add_mfns(seg, addr, 178 mme->mfn, mme->npages, mmc->dom); 179 AS_LOCK_EXIT(as); 180 181 if (error != 0) 182 break; 183 } 184 185 done: 186 DTRACE_XPV1(mmap__end, int, error); 187 188 return (error); 189 } 190 191 /* 192 * Set up the address range to map to an array of mfns in 193 * a foreign domain. Used in the following way: 194 * 195 * privcmd_mmap_batch_t p; 196 * 197 * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 198 * p.num = number of pages 199 * p.dom = domid 200 * p.addr = addr; 201 * p.arr = array of mfns, indexed 0 .. p.num - 1 202 * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p); 203 */ 204 /*ARGSUSED2*/ 205 static int 206 do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) 207 { 208 privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch; 209 struct as *as = curproc->p_as; 210 struct seg *seg; 211 int i, error = 0; 212 caddr_t addr; 213 ulong_t *ulp; 214 215 if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode)) 216 return (EFAULT); 217 218 DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num, 219 caddr_t, mmb->addr); 220 221 addr = (caddr_t)mmb->addr; 222 AS_LOCK_ENTER(as, RW_READER); 223 if ((seg = as_findseg(as, addr, 0)) == NULL || 224 addr + ptob(mmb->num) > seg->s_base + seg->s_size) { 225 error = EINVAL; 226 goto done; 227 } 228 229 for (i = 0, ulp = mmb->arr; 230 i < mmb->num; i++, addr += PAGESIZE, ulp++) { 231 mfn_t mfn; 232 233 if (fulword(ulp, &mfn) != 0) { 234 error = EFAULT; 235 break; 236 } 237 238 if (mfn == MFN_INVALID) { 239 /* 240 * This mfn is invalid and should not be added to 241 * segmf, as we'd only cause an immediate EFAULT when 242 * we tried to fault it in. 243 */ 244 mfn |= XEN_DOMCTL_PFINFO_XTAB; 245 continue; 246 } 247 248 if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0) 249 continue; 250 251 /* 252 * Tell the process that this MFN could not be mapped, so it 253 * won't later try to access it. 254 */ 255 mfn |= XEN_DOMCTL_PFINFO_XTAB; 256 if (sulword(ulp, mfn) != 0) { 257 error = EFAULT; 258 break; 259 } 260 } 261 262 done: 263 AS_LOCK_EXIT(as); 264 265 DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t, 266 mmb->addr); 267 268 return (error); 269 } 270 271 /*ARGSUSED*/ 272 static int 273 privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval) 274 { 275 if (secpolicy_xvm_control(cr)) 276 return (EPERM); 277 278 /* 279 * Everything is a -native- data type. 280 */ 281 if ((mode & FMODELS) != FNATIVE) 282 return (EOVERFLOW); 283 284 switch (cmd) { 285 case IOCTL_PRIVCMD_HYPERCALL: 286 return (do_privcmd_hypercall((void *)arg, mode, cr, rval)); 287 case IOCTL_PRIVCMD_MMAP: 288 if (DOMAIN_IS_PRIVILEGED(xen_info)) 289 return (do_privcmd_mmap((void *)arg, mode, cr)); 290 break; 291 case IOCTL_PRIVCMD_MMAPBATCH: 292 if (DOMAIN_IS_PRIVILEGED(xen_info)) 293 return (do_privcmd_mmapbatch((void *)arg, mode, cr)); 294 break; 295 default: 296 break; 297 } 298 return (EINVAL); 299 } 300 301 /* 302 * The real magic happens in the segmf segment driver. 303 */ 304 /*ARGSUSED8*/ 305 static int 306 privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, 307 off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr) 308 { 309 struct segmf_crargs a; 310 int error; 311 312 if (secpolicy_xvm_control(cr)) 313 return (EPERM); 314 315 as_rangelock(as); 316 if ((flags & MAP_FIXED) == 0) { 317 map_addr(addrp, len, (offset_t)off, 0, flags); 318 if (*addrp == NULL) { 319 error = ENOMEM; 320 goto rangeunlock; 321 } 322 } else { 323 /* 324 * User specified address 325 */ 326 (void) as_unmap(as, *addrp, len); 327 } 328 329 /* 330 * The mapping *must* be MAP_SHARED at offset 0. 331 * 332 * (Foreign pages are treated like device memory; the 333 * ioctl interface allows the backing objects to be 334 * arbitrarily redefined to point at any machine frame.) 335 */ 336 if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) { 337 error = EINVAL; 338 goto rangeunlock; 339 } 340 341 a.dev = dev; 342 a.prot = (uchar_t)prot; 343 a.maxprot = (uchar_t)maxprot; 344 error = as_map(as, *addrp, len, segmf_create, &a); 345 346 rangeunlock: 347 as_rangeunlock(as); 348 return (error); 349 } 350 351 static struct cb_ops privcmd_cb_ops = { 352 privcmd_open, 353 nulldev, /* close */ 354 nodev, /* strategy */ 355 nodev, /* print */ 356 nodev, /* dump */ 357 nodev, /* read */ 358 nodev, /* write */ 359 privcmd_ioctl, 360 nodev, /* devmap */ 361 nodev, /* mmap */ 362 privcmd_segmap, 363 nochpoll, /* poll */ 364 ddi_prop_op, 365 NULL, 366 D_64BIT | D_NEW | D_MP 367 }; 368 369 static struct dev_ops privcmd_dv_ops = { 370 DEVO_REV, 371 0, 372 privcmd_getinfo, 373 nulldev, /* identify */ 374 nulldev, /* probe */ 375 privcmd_attach, 376 privcmd_detach, 377 nodev, /* reset */ 378 &privcmd_cb_ops, 379 0, /* struct bus_ops */ 380 NULL, /* power */ 381 ddi_quiesce_not_needed, /* quiesce */ 382 }; 383 384 static struct modldrv modldrv = { 385 &mod_driverops, 386 "privcmd driver", 387 &privcmd_dv_ops 388 }; 389 390 static struct modlinkage modl = { 391 MODREV_1, 392 &modldrv 393 }; 394 395 int 396 _init(void) 397 { 398 return (mod_install(&modl)); 399 } 400 401 int 402 _fini(void) 403 { 404 return (mod_remove(&modl)); 405 } 406 407 int 408 _info(struct modinfo *modinfo) 409 { 410 return (mod_info(&modl, modinfo)); 411 }