1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 28 * It has the following characteristics: 29 * 30 * - Thread Safe. libzfs_core is accessible concurrently from multiple 31 * threads. This is accomplished primarily by avoiding global data 32 * (e.g. caching). Since it's thread-safe, there is no reason for a 33 * process to have multiple libzfs "instances". Therefore, we store 34 * our few pieces of data (e.g. the file descriptor) in global 35 * variables. The fd is reference-counted so that the libzfs_core 36 * library can be "initialized" multiple times (e.g. by different 37 * consumers within the same process). 38 * 39 * - Committed Interface. The libzfs_core interface will be committed, 40 * therefore consumers can compile against it and be confident that 41 * their code will continue to work on future releases of this code. 42 * Currently, the interface is Evolving (not Committed), but we intend 43 * to commit to it once it is more complete and we determine that it 44 * meets the needs of all consumers. 45 * 46 * - Programatic Error Handling. libzfs_core communicates errors with 47 * defined error numbers, and doesn't print anything to stdout/stderr. 48 * 49 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 50 * to/from the kernel ioctls. There is generally a 1:1 correspondence 51 * between libzfs_core functions and ioctls to /dev/zfs. 52 * 53 * - Clear Atomicity. Because libzfs_core functions are generally 1:1 54 * with kernel ioctls, and kernel ioctls are general atomic, each 55 * libzfs_core function is atomic. For example, creating multiple 56 * snapshots with a single call to lzc_snapshot() is atomic -- it 57 * can't fail with only some of the requested snapshots created, even 58 * in the event of power loss or system crash. 59 * 60 * - Continued libzfs Support. Some higher-level operations (e.g. 61 * support for "zfs send -R") are too complicated to fit the scope of 62 * libzfs_core. This functionality will continue to live in libzfs. 63 * Where appropriate, libzfs will use the underlying atomic operations 64 * of libzfs_core. For example, libzfs may implement "zfs send -R | 65 * zfs receive" by using individual "send one snapshot", rename, 66 * destroy, and "receive one snapshot" operations in libzfs_core. 67 * /sbin/zfs and /zbin/zpool will link with both libzfs and 68 * libzfs_core. Other consumers should aim to use only libzfs_core, 69 * since that will be the supported, stable interface going forwards. 70 */ 71 72 #include <libzfs_core.h> 73 #include <ctype.h> 74 #include <unistd.h> 75 #include <stdlib.h> 76 #include <string.h> 77 #include <errno.h> 78 #include <fcntl.h> 79 #include <pthread.h> 80 #include <sys/nvpair.h> 81 #include <sys/param.h> 82 #include <sys/types.h> 83 #include <sys/stat.h> 84 #include <sys/zfs_ioctl.h> 85 86 static int g_fd; 87 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 88 static int g_refcount; 89 90 int 91 libzfs_core_init(void) 92 { 93 (void) pthread_mutex_lock(&g_lock); 94 if (g_refcount == 0) { 95 g_fd = open("/dev/zfs", O_RDWR); 96 if (g_fd < 0) { 97 (void) pthread_mutex_unlock(&g_lock); 98 return (errno); 99 } 100 } 101 g_refcount++; 102 (void) pthread_mutex_unlock(&g_lock); 103 return (0); 104 } 105 106 void 107 libzfs_core_fini(void) 108 { 109 (void) pthread_mutex_lock(&g_lock); 110 ASSERT3S(g_refcount, >, 0); 111 g_refcount--; 112 if (g_refcount == 0) 113 (void) close(g_fd); 114 (void) pthread_mutex_unlock(&g_lock); 115 } 116 117 static int 118 lzc_ioctl(zfs_ioc_t ioc, const char *name, 119 nvlist_t *source, nvlist_t **resultp) 120 { 121 zfs_cmd_t zc = { 0 }; 122 int error = 0; 123 char *packed; 124 size_t size; 125 126 ASSERT3S(g_refcount, >, 0); 127 128 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 129 130 packed = fnvlist_pack(source, &size); 131 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 132 zc.zc_nvlist_src_size = size; 133 134 if (resultp != NULL) { 135 *resultp = NULL; 136 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 137 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 138 malloc(zc.zc_nvlist_dst_size); 139 if (zc.zc_nvlist_dst == NULL) { 140 error = ENOMEM; 141 goto out; 142 } 143 } 144 145 while (ioctl(g_fd, ioc, &zc) != 0) { 146 if (errno == ENOMEM && resultp != NULL) { 147 free((void *)(uintptr_t)zc.zc_nvlist_dst); 148 zc.zc_nvlist_dst_size *= 2; 149 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 150 malloc(zc.zc_nvlist_dst_size); 151 if (zc.zc_nvlist_dst == NULL) { 152 error = ENOMEM; 153 goto out; 154 } 155 } else { 156 error = errno; 157 break; 158 } 159 } 160 if (zc.zc_nvlist_dst_filled) { 161 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 162 zc.zc_nvlist_dst_size); 163 } 164 165 out: 166 fnvlist_pack_free(packed, size); 167 free((void *)(uintptr_t)zc.zc_nvlist_dst); 168 return (error); 169 } 170 171 int 172 lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) 173 { 174 int error; 175 nvlist_t *args = fnvlist_alloc(); 176 fnvlist_add_int32(args, "type", type); 177 if (props != NULL) 178 fnvlist_add_nvlist(args, "props", props); 179 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 180 nvlist_free(args); 181 return (error); 182 } 183 184 int 185 lzc_clone(const char *fsname, const char *origin, 186 nvlist_t *props) 187 { 188 int error; 189 nvlist_t *args = fnvlist_alloc(); 190 fnvlist_add_string(args, "origin", origin); 191 if (props != NULL) 192 fnvlist_add_nvlist(args, "props", props); 193 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 194 nvlist_free(args); 195 return (error); 196 } 197 198 /* 199 * Creates snapshots. 200 * 201 * The keys in the snaps nvlist are the snapshots to be created. 202 * They must all be in the same pool. 203 * 204 * The props nvlist is properties to set. Currently only user properties 205 * are supported. { user:prop_name -> string value } 206 * 207 * The returned results nvlist will have an entry for each snapshot that failed. 208 * The value will be the (int32) error code. 209 * 210 * The return value will be 0 if all snapshots were created, otherwise it will 211 * be the errno of a (unspecified) snapshot that failed. 212 */ 213 int 214 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 215 { 216 nvpair_t *elem; 217 nvlist_t *args; 218 int error; 219 char pool[MAXNAMELEN]; 220 221 *errlist = NULL; 222 223 /* determine the pool name */ 224 elem = nvlist_next_nvpair(snaps, NULL); 225 if (elem == NULL) 226 return (0); 227 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 228 pool[strcspn(pool, "/@")] = '\0'; 229 230 args = fnvlist_alloc(); 231 fnvlist_add_nvlist(args, "snaps", snaps); 232 if (props != NULL) 233 fnvlist_add_nvlist(args, "props", props); 234 235 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 236 nvlist_free(args); 237 238 return (error); 239 } 240 241 /* 242 * Destroys snapshots. 243 * 244 * The keys in the snaps nvlist are the snapshots to be destroyed. 245 * They must all be in the same pool. 246 * 247 * Snapshots that do not exist will be silently ignored. 248 * 249 * If 'defer' is not set, and a snapshot has user holds or clones, the 250 * destroy operation will fail and none of the snapshots will be 251 * destroyed. 252 * 253 * If 'defer' is set, and a snapshot has user holds or clones, it will be 254 * marked for deferred destruction, and will be destroyed when the last hold 255 * or clone is removed/destroyed. 256 * 257 * The return value will be 0 if all snapshots were destroyed (or marked for 258 * later destruction if 'defer' is set) or didn't exist to begin with. 259 * 260 * Otherwise the return value will be the errno of a (unspecified) snapshot 261 * that failed, no snapshots will be destroyed, and the errlist will have an 262 * entry for each snapshot that failed. The value in the errlist will be 263 * the (int32) error code. 264 */ 265 int 266 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 267 { 268 nvpair_t *elem; 269 nvlist_t *args; 270 int error; 271 char pool[MAXNAMELEN]; 272 273 /* determine the pool name */ 274 elem = nvlist_next_nvpair(snaps, NULL); 275 if (elem == NULL) 276 return (0); 277 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 278 pool[strcspn(pool, "/@")] = '\0'; 279 280 args = fnvlist_alloc(); 281 fnvlist_add_nvlist(args, "snaps", snaps); 282 if (defer) 283 fnvlist_add_boolean(args, "defer"); 284 285 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 286 nvlist_free(args); 287 288 return (error); 289 290 } 291 292 int 293 lzc_snaprange_space(const char *firstsnap, const char *lastsnap, 294 uint64_t *usedp) 295 { 296 nvlist_t *args; 297 nvlist_t *result; 298 int err; 299 char fs[MAXNAMELEN]; 300 char *atp; 301 302 /* determine the fs name */ 303 (void) strlcpy(fs, firstsnap, sizeof (fs)); 304 atp = strchr(fs, '@'); 305 if (atp == NULL) 306 return (EINVAL); 307 *atp = '\0'; 308 309 args = fnvlist_alloc(); 310 fnvlist_add_string(args, "firstsnap", firstsnap); 311 312 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 313 nvlist_free(args); 314 if (err == 0) 315 *usedp = fnvlist_lookup_uint64(result, "used"); 316 fnvlist_free(result); 317 318 return (err); 319 } 320 321 boolean_t 322 lzc_exists(const char *dataset) 323 { 324 /* 325 * The objset_stats ioctl is still legacy, so we need to construct our 326 * own zfs_cmd_t rather than using zfsc_ioctl(). 327 */ 328 zfs_cmd_t zc = { 0 }; 329 330 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 331 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 332 } 333 334 /* 335 * Create "user holds" on snapshots. If there is a hold on a snapshot, 336 * the snapshot can not be destroyed. (However, it can be marked for deletion 337 * by lzc_destroy_snaps(defer=B_TRUE).) 338 * 339 * The keys in the nvlist are snapshot names. 340 * The snapshots must all be in the same pool. 341 * The value is the name of the hold (string type). 342 * 343 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). 344 * In this case, when the cleanup_fd is closed (including on process 345 * termination), the holds will be released. If the system is shut down 346 * uncleanly, the holds will be released when the pool is next opened 347 * or imported. 348 * 349 * Holds for snapshots which don't exist will be skipped and have an entry 350 * added to errlist, but will not cause an overall failure, except in the 351 * case that all holds where skipped. 352 * 353 * The return value will be 0 if the nvl holds was empty or all holds, for 354 * snapshots that existed, were succesfully created and at least one hold 355 * was created. 356 * 357 * If none of the snapshots for the requested holds existed ENOENT will be 358 * returned. 359 * 360 * Otherwise the return value will be the errno of a (unspecified) hold that 361 * failed, no holds will be created. 362 * 363 * In all cases the errlist will have an entry for each hold that failed 364 * (name = snapshot), with its value being the error code (int32). 365 */ 366 int 367 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) 368 { 369 char pool[MAXNAMELEN]; 370 nvlist_t *args; 371 nvpair_t *elem; 372 int error; 373 374 /* determine the pool name */ 375 elem = nvlist_next_nvpair(holds, NULL); 376 if (elem == NULL) 377 return (0); 378 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 379 pool[strcspn(pool, "/@")] = '\0'; 380 381 args = fnvlist_alloc(); 382 fnvlist_add_nvlist(args, "holds", holds); 383 if (cleanup_fd != -1) 384 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); 385 386 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); 387 nvlist_free(args); 388 return (error); 389 } 390 391 /* 392 * Release "user holds" on snapshots. If the snapshot has been marked for 393 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have 394 * any clones, and all the user holds are removed, then the snapshot will be 395 * destroyed. 396 * 397 * The keys in the nvlist are snapshot names. 398 * The snapshots must all be in the same pool. 399 * The value is a nvlist whose keys are the holds to remove. 400 * 401 * Holds which failed to release because they didn't exist will have an entry 402 * added to errlist, but will not cause an overall failure. 403 * 404 * The return value will be 0 if the nvl holds was empty or all holds, that 405 * existed, were succesfully removed and at least one hold was removed. 406 * 407 * If none of the holds specified existed ENOENT will be returned. 408 * 409 * Otherwise the return value will be the errno of a (unspecified) hold that 410 * failed to release and no holds will be released. 411 * 412 * In all cases the errlist will have an entry for each hold that failed to 413 * to release. 414 */ 415 int 416 lzc_release(nvlist_t *holds, nvlist_t **errlist) 417 { 418 char pool[MAXNAMELEN]; 419 nvpair_t *elem; 420 421 /* determine the pool name */ 422 elem = nvlist_next_nvpair(holds, NULL); 423 if (elem == NULL) 424 return (0); 425 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 426 pool[strcspn(pool, "/@")] = '\0'; 427 428 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); 429 } 430 431 /* 432 * Retrieve list of user holds on the specified snapshot. 433 * 434 * On success, *holdsp will be set to a nvlist which the caller must free. 435 * The keys are the names of the holds, and the value is the creation time 436 * of the hold (uint64) in seconds since the epoch. 437 */ 438 int 439 lzc_get_holds(const char *snapname, nvlist_t **holdsp) 440 { 441 int error; 442 nvlist_t *innvl = fnvlist_alloc(); 443 error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp); 444 fnvlist_free(innvl); 445 return (error); 446 } 447 448 /* 449 * If fromsnap is NULL, a full (non-incremental) stream will be sent. 450 */ 451 int 452 lzc_send(const char *snapname, const char *fromsnap, int fd) 453 { 454 nvlist_t *args; 455 int err; 456 457 args = fnvlist_alloc(); 458 fnvlist_add_int32(args, "fd", fd); 459 if (fromsnap != NULL) 460 fnvlist_add_string(args, "fromsnap", fromsnap); 461 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 462 nvlist_free(args); 463 return (err); 464 } 465 466 /* 467 * If fromsnap is NULL, a full (non-incremental) stream will be estimated. 468 */ 469 int 470 lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) 471 { 472 nvlist_t *args; 473 nvlist_t *result; 474 int err; 475 476 args = fnvlist_alloc(); 477 if (fromsnap != NULL) 478 fnvlist_add_string(args, "fromsnap", fromsnap); 479 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 480 nvlist_free(args); 481 if (err == 0) 482 *spacep = fnvlist_lookup_uint64(result, "space"); 483 nvlist_free(result); 484 return (err); 485 } 486 487 static int 488 recv_read(int fd, void *buf, int ilen) 489 { 490 char *cp = buf; 491 int rv; 492 int len = ilen; 493 494 do { 495 rv = read(fd, cp, len); 496 cp += rv; 497 len -= rv; 498 } while (rv > 0); 499 500 if (rv < 0 || len != 0) 501 return (EIO); 502 503 return (0); 504 } 505 506 /* 507 * The simplest receive case: receive from the specified fd, creating the 508 * specified snapshot. Apply the specified properties a "received" properties 509 * (which can be overridden by locally-set properties). If the stream is a 510 * clone, its origin snapshot must be specified by 'origin'. The 'force' 511 * flag will cause the target filesystem to be rolled back or destroyed if 512 * necessary to receive. 513 * 514 * Return 0 on success or an errno on failure. 515 * 516 * Note: this interface does not work on dedup'd streams 517 * (those with DMU_BACKUP_FEATURE_DEDUP). 518 */ 519 int 520 lzc_receive(const char *snapname, nvlist_t *props, const char *origin, 521 boolean_t force, int fd) 522 { 523 /* 524 * The receive ioctl is still legacy, so we need to construct our own 525 * zfs_cmd_t rather than using zfsc_ioctl(). 526 */ 527 zfs_cmd_t zc = { 0 }; 528 char *atp; 529 char *packed = NULL; 530 size_t size; 531 dmu_replay_record_t drr; 532 int error; 533 534 ASSERT3S(g_refcount, >, 0); 535 536 /* zc_name is name of containing filesystem */ 537 (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); 538 atp = strchr(zc.zc_name, '@'); 539 if (atp == NULL) 540 return (EINVAL); 541 *atp = '\0'; 542 543 /* if the fs does not exist, try its parent. */ 544 if (!lzc_exists(zc.zc_name)) { 545 char *slashp = strrchr(zc.zc_name, '/'); 546 if (slashp == NULL) 547 return (ENOENT); 548 *slashp = '\0'; 549 550 } 551 552 /* zc_value is full name of the snapshot to create */ 553 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 554 555 if (props != NULL) { 556 /* zc_nvlist_src is props to set */ 557 packed = fnvlist_pack(props, &size); 558 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 559 zc.zc_nvlist_src_size = size; 560 } 561 562 /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ 563 if (origin != NULL) 564 (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); 565 566 /* zc_begin_record is non-byteswapped BEGIN record */ 567 error = recv_read(fd, &drr, sizeof (drr)); 568 if (error != 0) 569 goto out; 570 zc.zc_begin_record = drr.drr_u.drr_begin; 571 572 /* zc_cookie is fd to read from */ 573 zc.zc_cookie = fd; 574 575 /* zc guid is force flag */ 576 zc.zc_guid = force; 577 578 /* zc_cleanup_fd is unused */ 579 zc.zc_cleanup_fd = -1; 580 581 error = ioctl(g_fd, ZFS_IOC_RECV, &zc); 582 if (error != 0) 583 error = errno; 584 585 out: 586 if (packed != NULL) 587 fnvlist_pack_free(packed, size); 588 free((void*)(uintptr_t)zc.zc_nvlist_dst); 589 return (error); 590 }