OpenCloudOS-Kernel/include/uapi/linux/btrfs.h

993 lines
30 KiB
C
Raw Normal View History

License cleanup: add SPDX license identifier to uapi header files with a license Many user space API headers have licensing information, which is either incomplete, badly formatted or just a shorthand for referring to the license under which the file is supposed to be. This makes it hard for compliance tools to determine the correct license. Update these files with an SPDX license identifier. The identifier was chosen based on the license information in the file. GPL/LGPL licensed headers get the matching GPL/LGPL SPDX license identifier with the added 'WITH Linux-syscall-note' exception, which is the officially assigned exception identifier for the kernel syscall exception: NOTE! This copyright does *not* cover user programs that use kernel services by normal system calls - this is merely considered normal use of the kernel, and does *not* fall under the heading of "derived work". This exception makes it possible to include GPL headers into non GPL code, without confusing license compliance tools. Headers which have either explicit dual licensing or are just licensed under a non GPL license are updated with the corresponding SPDX identifier and the GPLv2 with syscall exception identifier. The format is: ((GPL-2.0 WITH Linux-syscall-note) OR SPDX-ID-OF-OTHER-LICENSE) SPDX license identifiers are a legally binding shorthand, which can be used instead of the full boiler plate text. The update does not remove existing license information as this has to be done on a case by case basis and the copyright holders might have to be consulted. This will happen in a separate step. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. See the previous patch in this series for the methodology of how this patch was researched. Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-01 22:09:13 +08:00
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef _UAPI_LINUX_BTRFS_H
#define _UAPI_LINUX_BTRFS_H
#include <linux/types.h>
#include <linux/ioctl.h>
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_VOL_NAME_MAX 255
#define BTRFS_LABEL_SIZE 256
/* this should be 4k */
#define BTRFS_PATH_NAME_MAX 4087
struct btrfs_ioctl_vol_args {
__s64 fd;
char name[BTRFS_PATH_NAME_MAX + 1];
};
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
#define BTRFS_SUBVOL_NAME_MAX 4039
#ifndef __KERNEL__
/* Deprecated since 5.7 */
# define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#endif
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3)
#define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
#define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \
(BTRFS_SUBVOL_RDONLY | \
BTRFS_SUBVOL_QGROUP_INHERIT | \
BTRFS_DEVICE_SPEC_BY_ID | \
BTRFS_SUBVOL_SPEC_BY_ID)
#define BTRFS_FSID_SIZE 16
#define BTRFS_UUID_SIZE 16
#define BTRFS_UUID_UNPARSED_SIZE 37
/*
* flags definition for qgroup limits
*
* Used by:
* struct btrfs_qgroup_limit.flags
* struct btrfs_qgroup_limit_item.flags
*/
#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
struct btrfs_qgroup_limit {
__u64 flags;
__u64 max_rfer;
__u64 max_excl;
__u64 rsv_rfer;
__u64 rsv_excl;
};
/*
* flags definition for qgroup inheritance
*
* Used by:
* struct btrfs_qgroup_inherit.flags
*/
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
struct btrfs_qgroup_inherit {
__u64 flags;
__u64 num_qgroups;
__u64 num_ref_copies;
__u64 num_excl_copies;
struct btrfs_qgroup_limit lim;
__u64 qgroups[0];
};
struct btrfs_ioctl_qgroup_limit_args {
__u64 qgroupid;
struct btrfs_qgroup_limit lim;
};
/*
* Arguments for specification of subvolumes or devices, supporting by-name or
* by-id and flags
*
* The set of supported flags depends on the ioctl
*
* BTRFS_SUBVOL_RDONLY is also provided/consumed by the following ioctls:
* - BTRFS_IOC_SUBVOL_GETFLAGS
* - BTRFS_IOC_SUBVOL_SETFLAGS
*/
/* Supported flags for BTRFS_IOC_RM_DEV_V2 */
#define BTRFS_DEVICE_REMOVE_ARGS_MASK \
(BTRFS_DEVICE_SPEC_BY_ID)
/* Supported flags for BTRFS_IOC_SNAP_CREATE_V2 and BTRFS_IOC_SUBVOL_CREATE_V2 */
#define BTRFS_SUBVOL_CREATE_ARGS_MASK \
(BTRFS_SUBVOL_RDONLY | \
BTRFS_SUBVOL_QGROUP_INHERIT)
/* Supported flags for BTRFS_IOC_SNAP_DESTROY_V2 */
#define BTRFS_SUBVOL_DELETE_ARGS_MASK \
(BTRFS_SUBVOL_SPEC_BY_ID)
struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;
__u64 flags;
union {
struct {
__u64 size;
struct btrfs_qgroup_inherit __user *qgroup_inherit;
};
__u64 unused[4];
};
union {
char name[BTRFS_SUBVOL_NAME_MAX + 1];
__u64 devid;
__u64 subvolid;
};
};
/*
* structure to report errors and progress to userspace, either as a
* result of a finished scrub, a canceled scrub or a progress inquiry
*/
struct btrfs_scrub_progress {
__u64 data_extents_scrubbed; /* # of data extents scrubbed */
__u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
__u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
__u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
__u64 read_errors; /* # of read errors encountered (EIO) */
__u64 csum_errors; /* # of failed csum checks */
__u64 verify_errors; /* # of occurrences, where the metadata
* of a tree block did not match the
* expected values, like generation or
* logical */
__u64 no_csum; /* # of 4k data block for which no csum
* is present, probably the result of
* data written with nodatasum */
__u64 csum_discards; /* # of csum for which no data was found
* in the extent tree. */
__u64 super_errors; /* # of bad super blocks encountered */
__u64 malloc_errors; /* # of internal kmalloc errors. These
* will likely cause an incomplete
* scrub */
__u64 uncorrectable_errors; /* # of errors where either no intact
* copy was found or the writeback
* failed */
__u64 corrected_errors; /* # of errors corrected */
__u64 last_physical; /* last physical address scrubbed. In
* case a scrub was aborted, this can
* be used to restart the scrub */
__u64 unverified_errors; /* # of occurrences where a read for a
* full (64k) bio failed, but the re-
* check succeeded for each 4k piece.
* Intermittent error. */
};
#define BTRFS_SCRUB_READONLY 1
struct btrfs_ioctl_scrub_args {
__u64 devid; /* in */
__u64 start; /* in */
__u64 end; /* in */
__u64 flags; /* in */
struct btrfs_scrub_progress progress; /* out */
/* pad to 1k */
__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
};
#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
struct btrfs_ioctl_dev_replace_start_params {
__u64 srcdevid; /* in, if 0, use srcdev_name instead */
__u64 cont_reading_from_srcdev_mode; /* in, see #define
* above */
__u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
__u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */
};
#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0
#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1
#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2
#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3
#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4
struct btrfs_ioctl_dev_replace_status_params {
__u64 replace_state; /* out, see #define above */
__u64 progress_1000; /* out, 0 <= x <= 1000 */
__u64 time_started; /* out, seconds since 1-Jan-1970 */
__u64 time_stopped; /* out, seconds since 1-Jan-1970 */
__u64 num_write_errors; /* out */
__u64 num_uncorrectable_read_errors; /* out */
};
#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0
#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1
#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2
#define BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS 3
struct btrfs_ioctl_dev_replace_args {
__u64 cmd; /* in */
__u64 result; /* out */
union {
struct btrfs_ioctl_dev_replace_start_params start;
struct btrfs_ioctl_dev_replace_status_params status;
}; /* in/out */
__u64 spare[64];
};
struct btrfs_ioctl_dev_info_args {
__u64 devid; /* in/out */
__u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
__u64 bytes_used; /* out */
__u64 total_bytes; /* out */
__u64 unused[379]; /* pad to 4k */
__u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
};
btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl With the recent addition of filesystem checksum types other than CRC32c, it is not anymore hard-coded which checksum type a btrfs filesystem uses. Up to now there is no good way to read the filesystem checksum, apart from reading the filesystem UUID and then query sysfs for the checksum type. Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl command which usually is used to query filesystem features. Also add a flags member indicating that the kernel responded with a set csum_type and csum_size field. For compatibility reasons, only return the csum_type and csum_size if the BTRFS_FS_INFO_FLAG_CSUM_INFO flag was passed to the kernel. Also clear any unknown flags so we don't pass false positives to user-space newer than the kernel. To simplify further additions to the ioctl, also switch the padding to a u8 array. Pahole was used to verify the result of this switch: The csum members are added before flags, which might look odd, but this is to keep the alignment requirements and not to introduce holes in the structure. $ pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko struct btrfs_ioctl_fs_info_args { __u64 max_id; /* 0 8 */ __u64 num_devices; /* 8 8 */ __u8 fsid[16]; /* 16 16 */ __u32 nodesize; /* 32 4 */ __u32 sectorsize; /* 36 4 */ __u32 clone_alignment; /* 40 4 */ __u16 csum_type; /* 44 2 */ __u16 csum_size; /* 46 2 */ __u64 flags; /* 48 8 */ __u8 reserved[968]; /* 56 968 */ /* size: 1024, cachelines: 16, members: 10 */ }; Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms") Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm") CC: stable@vger.kernel.org # 5.5+ Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2020-07-13 20:28:58 +08:00
/*
* Retrieve information about the filesystem
*/
/* Request information about checksum type and size */
#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
/* Request information about filesystem generation */
#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
/* Request information about filesystem metadata UUID */
#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
struct btrfs_ioctl_fs_info_args {
__u64 max_id; /* out */
__u64 num_devices; /* out */
__u8 fsid[BTRFS_FSID_SIZE]; /* out */
__u32 nodesize; /* out */
__u32 sectorsize; /* out */
__u32 clone_alignment; /* out */
btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl With the recent addition of filesystem checksum types other than CRC32c, it is not anymore hard-coded which checksum type a btrfs filesystem uses. Up to now there is no good way to read the filesystem checksum, apart from reading the filesystem UUID and then query sysfs for the checksum type. Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl command which usually is used to query filesystem features. Also add a flags member indicating that the kernel responded with a set csum_type and csum_size field. For compatibility reasons, only return the csum_type and csum_size if the BTRFS_FS_INFO_FLAG_CSUM_INFO flag was passed to the kernel. Also clear any unknown flags so we don't pass false positives to user-space newer than the kernel. To simplify further additions to the ioctl, also switch the padding to a u8 array. Pahole was used to verify the result of this switch: The csum members are added before flags, which might look odd, but this is to keep the alignment requirements and not to introduce holes in the structure. $ pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko struct btrfs_ioctl_fs_info_args { __u64 max_id; /* 0 8 */ __u64 num_devices; /* 8 8 */ __u8 fsid[16]; /* 16 16 */ __u32 nodesize; /* 32 4 */ __u32 sectorsize; /* 36 4 */ __u32 clone_alignment; /* 40 4 */ __u16 csum_type; /* 44 2 */ __u16 csum_size; /* 46 2 */ __u64 flags; /* 48 8 */ __u8 reserved[968]; /* 56 968 */ /* size: 1024, cachelines: 16, members: 10 */ }; Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms") Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm") CC: stable@vger.kernel.org # 5.5+ Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2020-07-13 20:28:58 +08:00
/* See BTRFS_FS_INFO_FLAG_* */
__u16 csum_type; /* out */
__u16 csum_size; /* out */
__u64 flags; /* in/out */
__u64 generation; /* out */
__u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
__u8 reserved[944]; /* pad to 1k */
};
/*
* feature flags
*
* Used by:
* struct btrfs_ioctl_feature_flags
*/
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
/*
* Older kernels (< 4.9) on big-endian systems produced broken free space tree
* bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
* < 4.7.3). If this bit is clear, then the free space tree cannot be trusted.
* btrfs-progs can also intentionally clear this bit to ask the kernel to
* rebuild the free space tree, however this might not work on older kernels
* that do not know about this bit. If not sure, clear the cache manually on
* first mount when booting older kernel versions.
*/
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
btrfs: Add zstd support Add zstd compression and decompression support to BtrFS. zstd at its fastest level compresses almost as well as zlib, while offering much faster compression and decompression, approaching lzo speeds. I benchmarked btrfs with zstd compression against no compression, lzo compression, and zlib compression. I benchmarked two scenarios. Copying a set of files to btrfs, and then reading the files. Copying a tarball to btrfs, extracting it to btrfs, and then reading the extracted files. After every operation, I call `sync` and include the sync time. Between every pair of operations I unmount and remount the filesystem to avoid caching. The benchmark files can be found in the upstream zstd source repository under `contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}` [1] [2]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. The first compression benchmark is copying 10 copies of the unzipped Silesia corpus [3] into a BtrFS filesystem mounted with `-o compress-force=Method`. The decompression benchmark times how long it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is measured by comparing the output of `df` and `du`. See the benchmark file [1] for details. I benchmarked multiple zstd compression levels, although the patch uses zstd level 1. | Method | Ratio | Compression MB/s | Decompression speed | |---------|-------|------------------|---------------------| | None | 0.99 | 504 | 686 | | lzo | 1.66 | 398 | 442 | | zlib | 2.58 | 65 | 241 | | zstd 1 | 2.57 | 260 | 383 | | zstd 3 | 2.71 | 174 | 408 | | zstd 6 | 2.87 | 70 | 398 | | zstd 9 | 2.92 | 43 | 406 | | zstd 12 | 2.93 | 21 | 408 | | zstd 15 | 3.01 | 11 | 354 | The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it measures the compression ratio, extracts the tar, and deletes the tar. Then it measures the compression ratio again, and `tar`s the extracted files into `/dev/null`. See the benchmark file [2] for details. | Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) | |--------|-----------|---------------|----------|------------|----------| | None | 0.97 | 0.78 | 0.981 | 5.501 | 8.807 | | lzo | 2.06 | 1.38 | 1.631 | 8.458 | 8.585 | | zlib | 3.40 | 1.86 | 7.750 | 21.544 | 11.744 | | zstd 1 | 3.57 | 1.85 | 2.579 | 11.479 | 9.389 | [1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh [3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia [4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz zstd source repository: https://github.com/facebook/zstd Signed-off-by: Nick Terrell <terrelln@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
2017-08-10 10:39:02 +08:00
#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4)
/*
* older kernels tried to do bigger metadata blocks, but the
* code was pretty buggy. Lets not let them try anymore.
*/
#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
btrfs: Introduce support for FSID change without metadata rewrite This field is going to be used when the user wants to change the UUID of the filesystem without having to rewrite all metadata blocks. This field adds another level of indirection such that when the FSID is changed what really happens is the current UUID (the one with which the fs was created) is copied to the 'metadata_uuid' field in the superblock as well as a new incompat flag is set METADATA_UUID. When the kernel detects this flag is set it knows that the superblock in fact has 2 UUIDs: 1. Is the UUID which is user-visible, currently known as FSID. 2. Metadata UUID - this is the UUID which is stamped into all on-disk datastructures belonging to this file system. When the new incompat flag is present device scanning checks whether both fsid/metadata_uuid of the scanned device match any of the registered filesystems. When the flag is not set then both UUIDs are equal and only the FSID is retained on disk, metadata_uuid is set only in-memory during mount. Additionally a new metadata_uuid field is also added to the fs_info struct. It's initialised either with the FSID in case METADATA_UUID incompat flag is not set or with the metdata_uuid of the superblock otherwise. This commit introduces the new fields as well as the new incompat flag and switches all users of the fsid to the new logic. Signed-off-by: Nikolay Borisov <nborisov@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> [ minor updates in comments ] Signed-off-by: David Sterba <dsterba@suse.com>
2018-10-30 22:43:23 +08:00
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
struct btrfs_ioctl_feature_flags {
__u64 compat_flags;
__u64 compat_ro_flags;
__u64 incompat_flags;
};
/* balance control ioctl modes */
#define BTRFS_BALANCE_CTL_PAUSE 1
#define BTRFS_BALANCE_CTL_CANCEL 2
/*
* this is packed, because it should be exactly the same as its disk
* byte order counterpart (struct btrfs_disk_balance_args)
*/
struct btrfs_balance_args {
__u64 profiles;
union {
__u64 usage;
struct {
__u32 usage_min;
__u32 usage_max;
};
};
__u64 devid;
__u64 pstart;
__u64 pend;
__u64 vstart;
__u64 vend;
__u64 target;
__u64 flags;
/*
* BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
* BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
* and maximum
*/
union {
__u64 limit; /* limit number of processed chunks */
struct {
__u32 limit_min;
__u32 limit_max;
};
};
/*
* Process chunks that cross stripes_min..stripes_max devices,
* BTRFS_BALANCE_ARGS_STRIPES_RANGE
*/
__u32 stripes_min;
__u32 stripes_max;
__u64 unused[6];
} __attribute__ ((__packed__));
/* report balance progress to userspace */
struct btrfs_balance_progress {
__u64 expected; /* estimated # of chunks that will be
* relocated to fulfill the request */
__u64 considered; /* # of chunks we have considered so far */
__u64 completed; /* # of chunks relocated so far */
};
/*
* flags definition for balance
*
* Restriper's general type filter
*
* Used by:
* btrfs_ioctl_balance_args.flags
* btrfs_balance_control.flags (internal)
*/
#define BTRFS_BALANCE_DATA (1ULL << 0)
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
#define BTRFS_BALANCE_METADATA (1ULL << 2)
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
BTRFS_BALANCE_SYSTEM | \
BTRFS_BALANCE_METADATA)
#define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)
/*
* flags definitions for per-type balance args
*
* Balance filters
*
* Used by:
* struct btrfs_balance_args
*/
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
#define BTRFS_BALANCE_ARGS_MASK \
(BTRFS_BALANCE_ARGS_PROFILES | \
BTRFS_BALANCE_ARGS_USAGE | \
BTRFS_BALANCE_ARGS_DEVID | \
BTRFS_BALANCE_ARGS_DRANGE | \
BTRFS_BALANCE_ARGS_VRANGE | \
BTRFS_BALANCE_ARGS_LIMIT | \
BTRFS_BALANCE_ARGS_LIMIT_RANGE | \
BTRFS_BALANCE_ARGS_STRIPES_RANGE | \
BTRFS_BALANCE_ARGS_USAGE_RANGE)
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be
* half-filled).
*/
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
/*
* flags definition for balance state
*
* Used by:
* struct btrfs_ioctl_balance_args.state
*/
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
struct btrfs_ioctl_balance_args {
__u64 flags; /* in/out */
__u64 state; /* out */
struct btrfs_balance_args data; /* in/out */
struct btrfs_balance_args meta; /* in/out */
struct btrfs_balance_args sys; /* in/out */
struct btrfs_balance_progress stat; /* out */
__u64 unused[72]; /* pad to 1k */
};
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
__u64 objectid;
char name[BTRFS_INO_LOOKUP_PATH_MAX];
};
#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
struct btrfs_ioctl_ino_lookup_user_args {
/* in, inode number containing the subvolume of 'subvolid' */
__u64 dirid;
/* in */
__u64 treeid;
/* out, name of the subvolume of 'treeid' */
char name[BTRFS_VOL_NAME_MAX + 1];
/*
* out, constructed path from the directory with which the ioctl is
* called to dirid
*/
char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
};
/* Search criteria for the btrfs SEARCH ioctl family. */
struct btrfs_ioctl_search_key {
/*
* The tree we're searching in. 1 is the tree of tree roots, 2 is the
* extent tree, etc...
*
* A special tree_id value of 0 will cause a search in the subvolume
* tree that the inode which is passed to the ioctl is part of.
*/
__u64 tree_id; /* in */
/*
* When doing a tree search, we're actually taking a slice from a
* linear search space of 136-bit keys.
*
* A full 136-bit tree key is composed as:
* (objectid << 72) + (type << 64) + offset
*
* The individual min and max values for objectid, type and offset
* define the min_key and max_key values for the search range. All
* metadata items with a key in the interval [min_key, max_key] will be
* returned.
*
* Additionally, we can filter the items returned on transaction id of
* the metadata block they're stored in by specifying a transid range.
* Be aware that this transaction id only denotes when the metadata
* page that currently contains the item got written the last time as
* result of a COW operation. The number does not have any meaning
* related to the transaction in which an individual item that is being
* returned was created or changed.
*/
__u64 min_objectid; /* in */
__u64 max_objectid; /* in */
__u64 min_offset; /* in */
__u64 max_offset; /* in */
__u64 min_transid; /* in */
__u64 max_transid; /* in */
__u32 min_type; /* in */
__u32 max_type; /* in */
/*
* input: The maximum amount of results desired.
* output: The actual amount of items returned, restricted by any of:
* - reaching the upper bound of the search range
* - reaching the input nr_items amount of items
* - completely filling the supplied memory buffer
*/
__u32 nr_items; /* in/out */
/* align to 64 bits */
__u32 unused;
/* some extra for later */
__u64 unused1;
__u64 unused2;
__u64 unused3;
__u64 unused4;
};
struct btrfs_ioctl_search_header {
__u64 transid;
__u64 objectid;
__u64 offset;
__u32 type;
__u32 len;
};
#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
/*
* the buf is an array of search headers where
* each header is followed by the actual item
* the type field is expanded to 32 bits for alignment
*/
struct btrfs_ioctl_search_args {
struct btrfs_ioctl_search_key key;
char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
};
struct btrfs_ioctl_search_args_v2 {
struct btrfs_ioctl_search_key key; /* in/out - search parameters */
__u64 buf_size; /* in - size of buffer
* out - on EOVERFLOW: needed size
* to store item */
__u64 buf[0]; /* out - found items */
};
struct btrfs_ioctl_clone_range_args {
__s64 src_fd;
__u64 src_offset, src_length;
__u64 dest_offset;
};
/*
* flags definition for the defrag range ioctl
*
* Used by:
* struct btrfs_ioctl_defrag_range_args.flags
*/
#define BTRFS_DEFRAG_RANGE_COMPRESS 1
#define BTRFS_DEFRAG_RANGE_START_IO 2
struct btrfs_ioctl_defrag_range_args {
/* start of the defrag operation */
__u64 start;
/* number of bytes to defrag, use (u64)-1 to say all */
__u64 len;
/*
* flags for the operation, which can include turning
* on compression for this one defrag
*/
__u64 flags;
/*
* any extent bigger than this will be considered
* already defragged. Use 0 to take the kernel default
* Use 1 to say every single extent must be rewritten
*/
__u32 extent_thresh;
/*
* which compression method to use if turning on compression
* for this defrag operation. If unspecified, zlib will
* be used
*/
__u32 compress_type;
/* spare for later */
__u32 unused[4];
};
#define BTRFS_SAME_DATA_DIFFERS 1
/* For extent-same ioctl */
struct btrfs_ioctl_same_extent_info {
__s64 fd; /* in - destination file */
__u64 logical_offset; /* in - start of extent in destination */
__u64 bytes_deduped; /* out - total # of bytes we were able
* to dedupe from this file */
/* status of this dedupe operation:
* 0 if dedup succeeds
* < 0 for error
* == BTRFS_SAME_DATA_DIFFERS if data differs
*/
__s32 status; /* out - see above description */
__u32 reserved;
};
struct btrfs_ioctl_same_args {
__u64 logical_offset; /* in - start of extent in source */
__u64 length; /* in - length of extent */
__u16 dest_count; /* in - total elements in info array */
__u16 reserved1;
__u32 reserved2;
struct btrfs_ioctl_same_extent_info info[0];
};
struct btrfs_ioctl_space_info {
__u64 flags;
__u64 total_bytes;
__u64 used_bytes;
};
struct btrfs_ioctl_space_args {
__u64 space_slots;
__u64 total_spaces;
struct btrfs_ioctl_space_info spaces[0];
};
struct btrfs_data_container {
__u32 bytes_left; /* out -- bytes not needed to deliver output */
__u32 bytes_missing; /* out -- additional bytes needed for result */
__u32 elem_cnt; /* out */
__u32 elem_missed; /* out */
__u64 val[0]; /* out */
};
struct btrfs_ioctl_ino_path_args {
__u64 inum; /* in */
__u64 size; /* in */
__u64 reserved[4];
/* struct btrfs_data_container *fspath; out */
__u64 fspath; /* out */
};
struct btrfs_ioctl_logical_ino_args {
__u64 logical; /* in */
__u64 size; /* in */
btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2 Now that check_extent_in_eb()'s extent offset filter can be turned off, we need a way to do it from userspace. Add a 'flags' field to the btrfs_logical_ino_args structure to disable extent offset filtering, taking the place of one of the existing reserved[] fields. Previous versions of LOGICAL_INO neglected to check whether any of the reserved fields have non-zero values. Assigning meaning to those fields now may change the behavior of existing programs that left these fields uninitialized. The lack of a zero check also means that new programs have no way to know whether the kernel is honoring the flags field. To avoid these problems, define a new ioctl LOGICAL_INO_V2. We can use the same argument layout as LOGICAL_INO, but shorten the reserved[] array by one element and turn it into the 'flags' field. The V2 ioctl explicitly checks that reserved fields and unsupported flag bits are zero so that userspace can negotiate future feature bits as they are defined. Since the memory layouts of the two ioctls' arguments are compatible, there is no need for a separate function for logical_to_ino_v2 (contrast with tree_search_v2 vs tree_search where the layout and code are quite different). A version parameter and an 'if' statement will suffice. Now that we have a flags field in logical_ino_args, add a flag BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET to get the behavior we want, and pass it down the stack to iterate_inodes_from_logical. Motivation and background, copied from the patchset cover letter: Suppose we have a file with one extent: root@tester:~# zcat /usr/share/doc/cpio/changelog.gz > /test/a root@tester:~# sync Split the extent by overwriting it in the middle: root@tester:~# cat /dev/urandom | dd bs=4k seek=2 skip=2 count=1 conv=notrunc of=/test/a We should now have 3 extent refs to 2 extents, with one block unreachable. The extent tree looks like: root@tester:~# btrfs-debug-tree /dev/vdc -t 2 [...] item 9 key (1103101952 EXTENT_ITEM 73728) itemoff 15942 itemsize 53 extent refs 2 gen 29 flags DATA extent data backref root 5 objectid 261 offset 0 count 2 [...] item 11 key (1103175680 EXTENT_ITEM 4096) itemoff 15865 itemsize 53 extent refs 1 gen 30 flags DATA extent data backref root 5 objectid 261 offset 8192 count 1 [...] and the ref tree looks like: root@tester:~# btrfs-debug-tree /dev/vdc -t 5 [...] item 6 key (261 EXTENT_DATA 0) itemoff 15825 itemsize 53 extent data disk byte 1103101952 nr 73728 extent data offset 0 nr 8192 ram 73728 extent compression(none) item 7 key (261 EXTENT_DATA 8192) itemoff 15772 itemsize 53 extent data disk byte 1103175680 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression(none) item 8 key (261 EXTENT_DATA 12288) itemoff 15719 itemsize 53 extent data disk byte 1103101952 nr 73728 extent data offset 12288 nr 61440 ram 73728 extent compression(none) [...] There are two references to the same extent with different, non-overlapping byte offsets: [------------------72K extent at 1103101952----------------------] [--8K----------------|--4K unreachable----|--60K-----------------] ^ ^ | | [--8K ref offset 0--][--4K ref offset 0--][--60K ref offset 12K--] | v [-----4K extent-----] at 1103175680 We want to find all of the references to extent bytenr 1103101952. Without the patch (and without running btrfs-debug-tree), we have to do it with 18 LOGICAL_INO calls: root@tester:~# btrfs ins log 1103101952 -P /test/ Using LOGICAL_INO inode 261 offset 0 root 5 root@tester:~# for x in $(seq 0 17); do btrfs ins log $((1103101952 + x * 4096)) -P /test/; done 2>&1 | grep inode inode 261 offset 0 root 5 inode 261 offset 4096 root 5 <- same extent ref as offset 0 (offset 8192 returns empty set, not reachable) inode 261 offset 12288 root 5 inode 261 offset 16384 root 5 \ inode 261 offset 20480 root 5 | inode 261 offset 24576 root 5 | inode 261 offset 28672 root 5 | inode 261 offset 32768 root 5 | inode 261 offset 36864 root 5 \ inode 261 offset 40960 root 5 > all the same extent ref as offset 12288. inode 261 offset 45056 root 5 / More processing required in userspace inode 261 offset 49152 root 5 | to figure out these are all duplicates. inode 261 offset 53248 root 5 | inode 261 offset 57344 root 5 | inode 261 offset 61440 root 5 | inode 261 offset 65536 root 5 | inode 261 offset 69632 root 5 / In the worst case the extents are 128MB long, and we have to do 32768 iterations of the loop to find one 4K extent ref. With the patch, we just use one call to map all refs to the extent at once: root@tester:~# btrfs ins log 1103101952 -P /test/ Using LOGICAL_INO_V2 inode 261 offset 0 root 5 inode 261 offset 12288 root 5 The TREE_SEARCH ioctl allows userspace to retrieve the offset and extent bytenr fields easily once the root, inode and offset are known. This is sufficient information to build a complete map of the extent and all of its references. Userspace can use this information to make better choices to dedup or defrag. Signed-off-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org> Reviewed-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com> Tested-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com> [ copy background and motivation from cover letter ] Signed-off-by: David Sterba <dsterba@suse.com>
2017-09-23 01:58:46 +08:00
__u64 reserved[3]; /* must be 0 for now */
__u64 flags; /* in, v2 only */
/* struct btrfs_data_container *inodes; out */
__u64 inodes;
};
btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2 Now that check_extent_in_eb()'s extent offset filter can be turned off, we need a way to do it from userspace. Add a 'flags' field to the btrfs_logical_ino_args structure to disable extent offset filtering, taking the place of one of the existing reserved[] fields. Previous versions of LOGICAL_INO neglected to check whether any of the reserved fields have non-zero values. Assigning meaning to those fields now may change the behavior of existing programs that left these fields uninitialized. The lack of a zero check also means that new programs have no way to know whether the kernel is honoring the flags field. To avoid these problems, define a new ioctl LOGICAL_INO_V2. We can use the same argument layout as LOGICAL_INO, but shorten the reserved[] array by one element and turn it into the 'flags' field. The V2 ioctl explicitly checks that reserved fields and unsupported flag bits are zero so that userspace can negotiate future feature bits as they are defined. Since the memory layouts of the two ioctls' arguments are compatible, there is no need for a separate function for logical_to_ino_v2 (contrast with tree_search_v2 vs tree_search where the layout and code are quite different). A version parameter and an 'if' statement will suffice. Now that we have a flags field in logical_ino_args, add a flag BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET to get the behavior we want, and pass it down the stack to iterate_inodes_from_logical. Motivation and background, copied from the patchset cover letter: Suppose we have a file with one extent: root@tester:~# zcat /usr/share/doc/cpio/changelog.gz > /test/a root@tester:~# sync Split the extent by overwriting it in the middle: root@tester:~# cat /dev/urandom | dd bs=4k seek=2 skip=2 count=1 conv=notrunc of=/test/a We should now have 3 extent refs to 2 extents, with one block unreachable. The extent tree looks like: root@tester:~# btrfs-debug-tree /dev/vdc -t 2 [...] item 9 key (1103101952 EXTENT_ITEM 73728) itemoff 15942 itemsize 53 extent refs 2 gen 29 flags DATA extent data backref root 5 objectid 261 offset 0 count 2 [...] item 11 key (1103175680 EXTENT_ITEM 4096) itemoff 15865 itemsize 53 extent refs 1 gen 30 flags DATA extent data backref root 5 objectid 261 offset 8192 count 1 [...] and the ref tree looks like: root@tester:~# btrfs-debug-tree /dev/vdc -t 5 [...] item 6 key (261 EXTENT_DATA 0) itemoff 15825 itemsize 53 extent data disk byte 1103101952 nr 73728 extent data offset 0 nr 8192 ram 73728 extent compression(none) item 7 key (261 EXTENT_DATA 8192) itemoff 15772 itemsize 53 extent data disk byte 1103175680 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression(none) item 8 key (261 EXTENT_DATA 12288) itemoff 15719 itemsize 53 extent data disk byte 1103101952 nr 73728 extent data offset 12288 nr 61440 ram 73728 extent compression(none) [...] There are two references to the same extent with different, non-overlapping byte offsets: [------------------72K extent at 1103101952----------------------] [--8K----------------|--4K unreachable----|--60K-----------------] ^ ^ | | [--8K ref offset 0--][--4K ref offset 0--][--60K ref offset 12K--] | v [-----4K extent-----] at 1103175680 We want to find all of the references to extent bytenr 1103101952. Without the patch (and without running btrfs-debug-tree), we have to do it with 18 LOGICAL_INO calls: root@tester:~# btrfs ins log 1103101952 -P /test/ Using LOGICAL_INO inode 261 offset 0 root 5 root@tester:~# for x in $(seq 0 17); do btrfs ins log $((1103101952 + x * 4096)) -P /test/; done 2>&1 | grep inode inode 261 offset 0 root 5 inode 261 offset 4096 root 5 <- same extent ref as offset 0 (offset 8192 returns empty set, not reachable) inode 261 offset 12288 root 5 inode 261 offset 16384 root 5 \ inode 261 offset 20480 root 5 | inode 261 offset 24576 root 5 | inode 261 offset 28672 root 5 | inode 261 offset 32768 root 5 | inode 261 offset 36864 root 5 \ inode 261 offset 40960 root 5 > all the same extent ref as offset 12288. inode 261 offset 45056 root 5 / More processing required in userspace inode 261 offset 49152 root 5 | to figure out these are all duplicates. inode 261 offset 53248 root 5 | inode 261 offset 57344 root 5 | inode 261 offset 61440 root 5 | inode 261 offset 65536 root 5 | inode 261 offset 69632 root 5 / In the worst case the extents are 128MB long, and we have to do 32768 iterations of the loop to find one 4K extent ref. With the patch, we just use one call to map all refs to the extent at once: root@tester:~# btrfs ins log 1103101952 -P /test/ Using LOGICAL_INO_V2 inode 261 offset 0 root 5 inode 261 offset 12288 root 5 The TREE_SEARCH ioctl allows userspace to retrieve the offset and extent bytenr fields easily once the root, inode and offset are known. This is sufficient information to build a complete map of the extent and all of its references. Userspace can use this information to make better choices to dedup or defrag. Signed-off-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org> Reviewed-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com> Tested-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com> [ copy background and motivation from cover letter ] Signed-off-by: David Sterba <dsterba@suse.com>
2017-09-23 01:58:46 +08:00
/* Return every ref to the extent, not just those containing logical block.
* Requires logical == extent bytenr. */
#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0)
enum btrfs_dev_stat_values {
/* disk I/O failure stats */
BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
/* stats for indirect indications for I/O failures */
BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
* contents is illegal: this is an
* indication that the block was damaged
* during read or write, or written to
* wrong location or read from wrong
* location */
BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
* been written */
BTRFS_DEV_STAT_VALUES_MAX
};
/* Reset statistics after reading; needs SYS_ADMIN capability */
#define BTRFS_DEV_STATS_RESET (1ULL << 0)
struct btrfs_ioctl_get_dev_stats {
__u64 devid; /* in */
__u64 nr_items; /* in/out */
__u64 flags; /* in/out */
/* out values: */
__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
/*
* This pads the struct to 1032 bytes. It was originally meant to pad to
* 1024 bytes, but when adding the flags field, the padding calculation
* was not adjusted.
*/
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX];
};
#define BTRFS_QUOTA_CTL_ENABLE 1
#define BTRFS_QUOTA_CTL_DISABLE 2
#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3
struct btrfs_ioctl_quota_ctl_args {
__u64 cmd;
__u64 status;
};
struct btrfs_ioctl_quota_rescan_args {
__u64 flags;
__u64 progress;
__u64 reserved[6];
};
struct btrfs_ioctl_qgroup_assign_args {
__u64 assign;
__u64 src;
__u64 dst;
};
struct btrfs_ioctl_qgroup_create_args {
__u64 create;
__u64 qgroupid;
};
struct btrfs_ioctl_timespec {
__u64 sec;
__u32 nsec;
};
struct btrfs_ioctl_received_subvol_args {
char uuid[BTRFS_UUID_SIZE]; /* in */
__u64 stransid; /* in */
__u64 rtransid; /* out */
struct btrfs_ioctl_timespec stime; /* in */
struct btrfs_ioctl_timespec rtime; /* out */
__u64 flags; /* in */
__u64 reserved[16]; /* in */
};
/*
* Caller doesn't want file data in the send stream, even if the
* search of clone sources doesn't find an extent. UPDATE_EXTENT
* commands will be sent instead of WRITE commands.
*/
Btrfs: allow omitting stream header and end-cmd for btrfs send Two new flags are added to allow omitting the stream header and the end command for btrfs send streams. This is used in cases where you send multiple snapshots back-to-back in one stream. This used to be encoded like this (with 2 snapshots in this example): <stream header> + <sequence of commands> + <end cmd> + <stream header> + <sequence of commands> + <end cmd> + EOF The new format (if the two new flags are used) is this one: <stream header> + <sequence of commands> + <sequence of commands> + <end cmd> Note that the currently existing receivers treat <end cmd> only as an indication that a new <stream header> is following. This means, you can just skip the sequence <end cmd> <stream header> without loosing compatibility. As long as an EOF is following, the currently existing receivers handle the new format (if the two new flags are used) exactly as the old one. So what is the benefit of this change? The goal is to be able to use a single stream (one TCP connection) to multiplex a request/response handshake plus Btrfs send streams, all in the same stream. In this case you cannot evaluate an EOF condition as an end of the Btrfs send stream. You need something else, and the <end cmd> is just perfect for this purpose. The summary is: The format change is driven by the need to send several Btrfs send streams over a single TCP connections, with the ability for a repeated request/response handshake in the middle. And this format change does not break any existing tool, it is completely compatible. You could compare the old behaviour of the Btrfs send stream to the one of ftp where you need a seperate request/response channel and newly opened data transfer channels for each file, while the new behaviour is more like http using a single stream for everything. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
2013-04-11 01:10:52 +08:00
#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1
/*
* Do not add the leading stream header. Used when multiple snapshots
* are sent back to back.
*/
#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER 0x2
/*
* Omit the command at the end of the stream that indicated the end
* of the stream. This option is used when multiple snapshots are
* sent back to back.
*/
#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
#define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
BTRFS_SEND_FLAG_OMIT_END_CMD)
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
__u64 __user *clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
};
/*
* Information about a fs tree root.
*
* All items are filled by the ioctl
*/
struct btrfs_ioctl_get_subvol_info_args {
/* Id of this subvolume */
__u64 treeid;
/* Name of this subvolume, used to get the real name at mount point */
char name[BTRFS_VOL_NAME_MAX + 1];
/*
* Id of the subvolume which contains this subvolume.
* Zero for top-level subvolume or a deleted subvolume.
*/
__u64 parent_id;
/*
* Inode number of the directory which contains this subvolume.
* Zero for top-level subvolume or a deleted subvolume
*/
__u64 dirid;
/* Latest transaction id of this subvolume */
__u64 generation;
/* Flags of this subvolume */
__u64 flags;
/* UUID of this subvolume */
__u8 uuid[BTRFS_UUID_SIZE];
/*
* UUID of the subvolume of which this subvolume is a snapshot.
* All zero for a non-snapshot subvolume.
*/
__u8 parent_uuid[BTRFS_UUID_SIZE];
/*
* UUID of the subvolume from which this subvolume was received.
* All zero for non-received subvolume.
*/
__u8 received_uuid[BTRFS_UUID_SIZE];
/* Transaction id indicating when change/create/send/receive happened */
__u64 ctransid;
__u64 otransid;
__u64 stransid;
__u64 rtransid;
/* Time corresponding to c/o/s/rtransid */
struct btrfs_ioctl_timespec ctime;
struct btrfs_ioctl_timespec otime;
struct btrfs_ioctl_timespec stime;
struct btrfs_ioctl_timespec rtime;
/* Must be zero */
__u64 reserved[8];
};
#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
struct btrfs_ioctl_get_subvol_rootref_args {
/* in/out, minimum id of rootref's treeid to be searched */
__u64 min_treeid;
/* out */
struct {
__u64 treeid;
__u64 dirid;
} rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
/* out, number of found items */
__u8 num_items;
__u8 align[7];
};
/* Error codes as returned by the kernel */
enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
BTRFS_ERROR_DEV_TGT_REPLACE,
BTRFS_ERROR_DEV_MISSING_NOT_FOUND,
BTRFS_ERROR_DEV_ONLY_WRITABLE,
BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \
struct btrfs_ioctl_vol_args)
/* trans start and trans end are dangerous, and only for
* use by applications that know how to avoid the
* resulting deadlocks
*/
#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
struct btrfs_ioctl_clone_range_args)
#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
struct btrfs_ioctl_defrag_range_args)
#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
struct btrfs_ioctl_search_args)
#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
struct btrfs_ioctl_search_args_v2)
#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
struct btrfs_ioctl_ino_lookup_args)
#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
struct btrfs_ioctl_space_args)
Btrfs: add START_SYNC, WAIT_SYNC ioctls START_SYNC will start a sync/commit, but not wait for it to complete. Any modification started after the ioctl returns is guaranteed not to be included in the commit. If a non-NULL pointer is passed, the transaction id will be returned to userspace. WAIT_SYNC will wait for any in-progress commit to complete. If a transaction id is specified, the ioctl will block and then return (success) when the specified transaction has committed. If it has already committed when we call the ioctl, it returns immediately. If the specified transaction doesn't exist, it returns EINVAL. If no transaction id is specified, WAIT_SYNC will wait for the currently committing transaction to finish it's commit to disk. If there is no currently committing transaction, it returns success. These ioctls are useful for applications which want to impose an ordering on when fs modifications reach disk, but do not want to wait for the full (slow) commit process to do so. Picky callers can take the transid returned by START_SYNC and feed it to WAIT_SYNC, and be certain to wait only as long as necessary for the transaction _they_ started to reach disk. Sloppy callers can START_SYNC and WAIT_SYNC without a transid, and provided they didn't wait too long between the calls, they will get the same result. However, if a second commit starts before they call WAIT_SYNC, they may end up waiting longer for it to commit as well. Even so, a START_SYNC+WAIT_SYNC still guarantees that any operation completed before the START_SYNC reaches disk. Signed-off-by: Sage Weil <sage@newdream.net> Signed-off-by: Chris Mason <chris.mason@oracle.com>
2010-10-30 03:41:32 +08:00
#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
struct btrfs_ioctl_scrub_args)
#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
struct btrfs_ioctl_scrub_args)
#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
struct btrfs_ioctl_logical_ino_args)
#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
struct btrfs_ioctl_received_subvol_args)
#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
struct btrfs_ioctl_quota_ctl_args)
#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
struct btrfs_ioctl_qgroup_assign_args)
#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
struct btrfs_ioctl_qgroup_create_args)
#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
struct btrfs_ioctl_quota_rescan_args)
#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
struct btrfs_ioctl_quota_rescan_args)
#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
#define BTRFS_IOC_GET_FSLABEL FS_IOC_GETFSLABEL
#define BTRFS_IOC_SET_FSLABEL FS_IOC_SETFSLABEL
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_dev_replace_args)
#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
struct btrfs_ioctl_same_args)
#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
struct btrfs_ioctl_feature_flags)
#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
struct btrfs_ioctl_feature_flags[2])
#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
struct btrfs_ioctl_feature_flags[3])
#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
struct btrfs_ioctl_vol_args_v2)
btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2 Now that check_extent_in_eb()'s extent offset filter can be turned off, we need a way to do it from userspace. Add a 'flags' field to the btrfs_logical_ino_args structure to disable extent offset filtering, taking the place of one of the existing reserved[] fields. Previous versions of LOGICAL_INO neglected to check whether any of the reserved fields have non-zero values. Assigning meaning to those fields now may change the behavior of existing programs that left these fields uninitialized. The lack of a zero check also means that new programs have no way to know whether the kernel is honoring the flags field. To avoid these problems, define a new ioctl LOGICAL_INO_V2. We can use the same argument layout as LOGICAL_INO, but shorten the reserved[] array by one element and turn it into the 'flags' field. The V2 ioctl explicitly checks that reserved fields and unsupported flag bits are zero so that userspace can negotiate future feature bits as they are defined. Since the memory layouts of the two ioctls' arguments are compatible, there is no need for a separate function for logical_to_ino_v2 (contrast with tree_search_v2 vs tree_search where the layout and code are quite different). A version parameter and an 'if' statement will suffice. Now that we have a flags field in logical_ino_args, add a flag BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET to get the behavior we want, and pass it down the stack to iterate_inodes_from_logical. Motivation and background, copied from the patchset cover letter: Suppose we have a file with one extent: root@tester:~# zcat /usr/share/doc/cpio/changelog.gz > /test/a root@tester:~# sync Split the extent by overwriting it in the middle: root@tester:~# cat /dev/urandom | dd bs=4k seek=2 skip=2 count=1 conv=notrunc of=/test/a We should now have 3 extent refs to 2 extents, with one block unreachable. The extent tree looks like: root@tester:~# btrfs-debug-tree /dev/vdc -t 2 [...] item 9 key (1103101952 EXTENT_ITEM 73728) itemoff 15942 itemsize 53 extent refs 2 gen 29 flags DATA extent data backref root 5 objectid 261 offset 0 count 2 [...] item 11 key (1103175680 EXTENT_ITEM 4096) itemoff 15865 itemsize 53 extent refs 1 gen 30 flags DATA extent data backref root 5 objectid 261 offset 8192 count 1 [...] and the ref tree looks like: root@tester:~# btrfs-debug-tree /dev/vdc -t 5 [...] item 6 key (261 EXTENT_DATA 0) itemoff 15825 itemsize 53 extent data disk byte 1103101952 nr 73728 extent data offset 0 nr 8192 ram 73728 extent compression(none) item 7 key (261 EXTENT_DATA 8192) itemoff 15772 itemsize 53 extent data disk byte 1103175680 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression(none) item 8 key (261 EXTENT_DATA 12288) itemoff 15719 itemsize 53 extent data disk byte 1103101952 nr 73728 extent data offset 12288 nr 61440 ram 73728 extent compression(none) [...] There are two references to the same extent with different, non-overlapping byte offsets: [------------------72K extent at 1103101952----------------------] [--8K----------------|--4K unreachable----|--60K-----------------] ^ ^ | | [--8K ref offset 0--][--4K ref offset 0--][--60K ref offset 12K--] | v [-----4K extent-----] at 1103175680 We want to find all of the references to extent bytenr 1103101952. Without the patch (and without running btrfs-debug-tree), we have to do it with 18 LOGICAL_INO calls: root@tester:~# btrfs ins log 1103101952 -P /test/ Using LOGICAL_INO inode 261 offset 0 root 5 root@tester:~# for x in $(seq 0 17); do btrfs ins log $((1103101952 + x * 4096)) -P /test/; done 2>&1 | grep inode inode 261 offset 0 root 5 inode 261 offset 4096 root 5 <- same extent ref as offset 0 (offset 8192 returns empty set, not reachable) inode 261 offset 12288 root 5 inode 261 offset 16384 root 5 \ inode 261 offset 20480 root 5 | inode 261 offset 24576 root 5 | inode 261 offset 28672 root 5 | inode 261 offset 32768 root 5 | inode 261 offset 36864 root 5 \ inode 261 offset 40960 root 5 > all the same extent ref as offset 12288. inode 261 offset 45056 root 5 / More processing required in userspace inode 261 offset 49152 root 5 | to figure out these are all duplicates. inode 261 offset 53248 root 5 | inode 261 offset 57344 root 5 | inode 261 offset 61440 root 5 | inode 261 offset 65536 root 5 | inode 261 offset 69632 root 5 / In the worst case the extents are 128MB long, and we have to do 32768 iterations of the loop to find one 4K extent ref. With the patch, we just use one call to map all refs to the extent at once: root@tester:~# btrfs ins log 1103101952 -P /test/ Using LOGICAL_INO_V2 inode 261 offset 0 root 5 inode 261 offset 12288 root 5 The TREE_SEARCH ioctl allows userspace to retrieve the offset and extent bytenr fields easily once the root, inode and offset are known. This is sufficient information to build a complete map of the extent and all of its references. Userspace can use this information to make better choices to dedup or defrag. Signed-off-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org> Reviewed-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com> Tested-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com> [ copy background and motivation from cover letter ] Signed-off-by: David Sterba <dsterba@suse.com>
2017-09-23 01:58:46 +08:00
#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
struct btrfs_ioctl_logical_ino_args)
#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
struct btrfs_ioctl_get_subvol_info_args)
#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
struct btrfs_ioctl_get_subvol_rootref_args)
#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
struct btrfs_ioctl_ino_lookup_user_args)
#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
struct btrfs_ioctl_vol_args_v2)
#endif /* _UAPI_LINUX_BTRFS_H */