From d08854f5bcf3ea0cabc6fd2fc49c2d97e00c7c88 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 26 Jun 2016 18:24:01 -0400 Subject: [PATCH 01/21] ext4: optimize ext4_should_retry_alloc() to improve ENOSPC performance If there are no pending blocks to be released after a commit, forcing a journal commit has no hope of helping. It's possible that a commit had just completed, so if there are now free blocks available for allocation, it's worth retrying the commit. Reported-by: Chao Yu Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 4 +++- fs/ext4/ext4.h | 1 + fs/ext4/ext4_jbd2.h | 10 +++++++++- fs/ext4/mballoc.c | 12 ++++++++++-- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 3020fd70c392..0b8105b3293d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -610,7 +610,9 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); + smp_mb(); + if (EXT4_SB(sb)->s_mb_free_pending) + jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); return 1; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b84aa1ca480a..96c73e6fec6e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1430,6 +1430,7 @@ struct ext4_sb_info { unsigned short *s_mb_offsets; unsigned int *s_mb_maxs; unsigned int s_group_info_size; + unsigned int s_mb_free_pending; /* tunables */ unsigned long s_stripe; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 09c1ef38cbe6..b1d52c14098e 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -175,6 +175,13 @@ struct ext4_journal_cb_entry { * There is no guaranteed calling order of multiple registered callbacks on * the same transaction. */ +static inline void _ext4_journal_callback_add(handle_t *handle, + struct ext4_journal_cb_entry *jce) +{ + /* Add the jce to transaction's private list */ + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); +} + static inline void ext4_journal_callback_add(handle_t *handle, void (*func)(struct super_block *sb, struct ext4_journal_cb_entry *jce, @@ -187,10 +194,11 @@ static inline void ext4_journal_callback_add(handle_t *handle, /* Add the jce to transaction's private list */ jce->jce_func = func; spin_lock(&sbi->s_md_lock); - list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); + _ext4_journal_callback_add(handle, jce); spin_unlock(&sbi->s_md_lock); } + /** * ext4_journal_callback_del: delete a registered callback * @handle: active journal transaction handle on which callback was registered diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c1ab3ec30423..77249e1f5c3a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2627,6 +2627,7 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_bal_lock); + sbi->s_mb_free_pending = 0; sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; @@ -2814,6 +2815,9 @@ static void ext4_free_data_callback(struct super_block *sb, /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); + spin_lock(&EXT4_SB(sb)->s_md_lock); + EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count; + spin_unlock(&EXT4_SB(sb)->s_md_lock); db = e4b.bd_info; /* there are blocks to put in buddy to make them really free */ @@ -4583,6 +4587,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, { ext4_group_t group = e4b->bd_group; ext4_grpblk_t cluster; + ext4_grpblk_t clusters = new_entry->efd_count; struct ext4_free_data *entry; struct ext4_group_info *db = e4b->bd_info; struct super_block *sb = e4b->bd_sb; @@ -4649,8 +4654,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, } } /* Add the extent to transaction's private list */ - ext4_journal_callback_add(handle, ext4_free_data_callback, - &new_entry->efd_jce); + new_entry->efd_jce.jce_func = ext4_free_data_callback; + spin_lock(&sbi->s_md_lock); + _ext4_journal_callback_add(handle, &new_entry->efd_jce); + sbi->s_mb_free_pending += clusters; + spin_unlock(&sbi->s_md_lock); return 0; } From 78d962510796fdf39ccc5efd23d2eea2eca1ed99 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 26 Jun 2016 18:25:01 -0400 Subject: [PATCH 02/21] ext4: respect the nobarrier mount option in nojournal mode Also, if we are going to issue the barrier, we should do this after we write out the parent directories if necessary. Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/fsync.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 8850254136ae..5c4372512ef7 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -106,9 +106,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } if (!journal) { - ret = generic_file_fsync(file, start, end, datasync); + ret = __generic_file_fsync(file, start, end, datasync); if (!ret && !hlist_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); + if (test_opt(inode->i_sb, BARRIER)) + goto issue_flush; goto out; } @@ -140,6 +142,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) needs_barrier = true; ret = jbd2_complete_transaction(journal, commit_tid); if (needs_barrier) { + issue_flush: err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); if (!ret) ret = err; From 7a4b188f0c0b49ed8ae41489494a9669ad7f1f8c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Jun 2016 11:30:21 -0400 Subject: [PATCH 03/21] jbd2: move lockdep instrumentation for jbd2 handles The transaction the handle references is free to commit once we've decremented t_updates counter. Move the lockdep instrumentation to that place. Currently it was a bit later which did not really matter but subsequent improvements to lockdep instrumentation would cause false positives with it. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 1749519b362f..41249538c047 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1750,11 +1750,11 @@ int jbd2_journal_stop(handle_t *handle) wake_up(&journal->j_wait_transaction_locked); } + lock_map_release(&handle->h_lockdep_map); + if (wait_for_commit) err = jbd2_log_wait_commit(journal, tid); - lock_map_release(&handle->h_lockdep_map); - if (handle->h_rsv_handle) jbd2_journal_free_reserved(handle->h_rsv_handle); free_and_exit: From ab714aff4f744f52f0beae93ed441f2f5585eb7a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Jun 2016 11:39:38 -0400 Subject: [PATCH 04/21] jbd2: move lockdep tracking to journal_s Currently lockdep map is tracked in each journal handle. To be able to expand lockdep support to cover also other cases where we depend on transaction commit and where handle is not available, move lockdep map into struct journal_s. Since this makes the lockdep map shared for all handles, we have to use rwsem_acquire_read() for acquisitions now. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 4 ++++ fs/jbd2/transaction.c | 11 +++-------- include/linux/jbd2.h | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b31852f76f46..208e4058040b 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1091,6 +1091,7 @@ static void jbd2_stats_proc_exit(journal_t *journal) static journal_t * journal_init_common (void) { + static struct lock_class_key jbd2_trans_commit_key; journal_t *journal; int err; @@ -1126,6 +1127,9 @@ static journal_t * journal_init_common (void) spin_lock_init(&journal->j_history_lock); + lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", + &jbd2_trans_commit_key, 0); + return journal; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 41249538c047..c0065040c5be 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -382,13 +382,11 @@ repeat: read_unlock(&journal->j_state_lock); current->journal_info = handle; - lock_map_acquire(&handle->h_lockdep_map); + rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_); jbd2_journal_free_transaction(new_transaction); return 0; } -static struct lock_class_key jbd2_handle_key; - /* Allocate a new handle. This should probably be in a slab... */ static handle_t *new_handle(int nblocks) { @@ -398,9 +396,6 @@ static handle_t *new_handle(int nblocks) handle->h_buffer_credits = nblocks; handle->h_ref = 1; - lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle", - &jbd2_handle_key, 0); - return handle; } @@ -672,7 +667,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) if (need_to_start) jbd2_log_start_commit(journal, tid); - lock_map_release(&handle->h_lockdep_map); + rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_); handle->h_buffer_credits = nblocks; ret = start_this_handle(journal, handle, gfp_mask); return ret; @@ -1750,7 +1745,7 @@ int jbd2_journal_stop(handle_t *handle) wake_up(&journal->j_wait_transaction_locked); } - lock_map_release(&handle->h_lockdep_map); + rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_); if (wait_for_commit) err = jbd2_log_wait_commit(journal, tid); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index efb232c5f668..3d210cbe4e1b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -491,10 +491,6 @@ struct jbd2_journal_handle unsigned long h_start_jiffies; unsigned int h_requested_credits; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map h_lockdep_map; -#endif }; @@ -793,6 +789,7 @@ jbd2_time_diff(unsigned long start, unsigned long end) * @j_proc_entry: procfs entry for the jbd statistics directory * @j_stats: Overall statistics * @j_private: An opaque pointer to fs-private information. + * @j_trans_commit_map: Lockdep entity to track transaction commit dependencies */ struct journal_s @@ -1035,6 +1032,18 @@ struct journal_s /* Precomputed journal UUID checksum for seeding other checksums */ __u32 j_csum_seed; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Lockdep entity to track transaction commit dependencies. Handles + * hold this "lock" for read, when we wait for commit, we acquire the + * "lock" for writing. This matches the properties of jbd2 journalling + * where the running transaction has to wait for all handles to be + * dropped to commit that transaction and also acquiring a handle may + * require transaction commit to finish. + */ + struct lockdep_map j_trans_commit_map; +#endif }; /* journal feature predicate functions */ From 1eaa566d368b214d99cbb973647c1b0b8102a9ae Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Jun 2016 11:40:54 -0400 Subject: [PATCH 05/21] jbd2: track more dependencies on transaction commit So far we were tracking only dependency on transaction commit due to starting a new handle (which may require commit to start a new transaction). Now add tracking also for other cases where we wait for transaction commit. This way lockdep can catch deadlocks e. g. because we call jbd2_journal_stop() for a synchronous handle with some locks held which rank below transaction start. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 1 + fs/jbd2/transaction.c | 4 ++++ include/linux/jbd2.h | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 208e4058040b..fc1d7a39b082 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -691,6 +691,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; + jbd2_might_wait_for_commit(journal); read_lock(&journal->j_state_lock); #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c0065040c5be..b5bc3e249163 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -182,6 +182,8 @@ static int add_transaction_credits(journal_t *journal, int blocks, int needed; int total = blocks + rsv_blocks; + jbd2_might_wait_for_commit(journal); + /* * If the current transaction is locked down for commit, wait * for the lock to be released. @@ -695,6 +697,8 @@ void jbd2_journal_lock_updates(journal_t *journal) { DEFINE_WAIT(wait); + jbd2_might_wait_for_commit(journal); + write_lock(&journal->j_state_lock); ++journal->j_barrier_count; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 3d210cbe4e1b..dfaa1f4dcb0c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1046,6 +1046,12 @@ struct journal_s #endif }; +#define jbd2_might_wait_for_commit(j) \ + do { \ + rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \ + rwsem_release(&j->j_trans_commit_map, 1, _THIS_IP_); \ + } while (0) + /* journal feature predicate functions */ #define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ From abcfb5d979892fc8b12574551fc907c05fe1b11b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Jun 2016 11:49:01 -0400 Subject: [PATCH 06/21] jbd2: make journal y2038 safe The jbd2 journal stores the commit time in 64-bit seconds and 32-bit nanoseconds, which avoids an overflow in 2038, but it gets the numbers from current_kernel_time(), which uses 'long' seconds on 32-bit architectures. This simply changes the code to call current_kernel_time64() so we use 64-bit seconds consistently. Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/jbd2/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 70078096117d..78313adb3c95 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -124,7 +124,7 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - struct timespec now = current_kernel_time(); + struct timespec64 now = current_kernel_time64(); *cbh = NULL; From f70749ca42943faa4d4dcce46dfdcaadb1d0c4b6 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 30 Jun 2016 11:53:46 -0400 Subject: [PATCH 07/21] ext4: check for extents that wrap around An extent with lblock = 4294967295 and len = 1 will pass the ext4_valid_extent() test: ext4_lblk_t last = lblock + len - 1; if (len == 0 || lblock > last) return 0; since last = 4294967295 + 1 - 1 = 4294967295. This would later trigger the BUG_ON(es->es_lblk + es->es_len < es->es_lblk) in ext4_es_end(). We can simplify it by removing the - 1 altogether and changing the test to use lblock + len <= lblock, since now if len = 0, then lblock + 0 == lblock and it fails, and if len > 0 then lblock + len > lblock in order to pass (i.e. it doesn't overflow). Fixes: 5946d0893 ("ext4: check for overlapping extents in ext4_valid_extent_entries()") Fixes: 2f974865f ("ext4: check for zero length extent explicitly") Cc: Eryu Guan Cc: stable@vger.kernel.org Signed-off-by: Phil Turnbull Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2a2eef9c14e4..2f258c68d3e0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -381,9 +381,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); - ext4_lblk_t last = lblock + len - 1; - if (len == 0 || lblock > last) + /* + * We allow neither: + * - zero length + * - overflow/wrap-around + */ + if (lblock + len <= lblock) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } From b47820edd1634dc1208f9212b7ecfb4230610a23 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sun, 3 Jul 2016 17:51:39 -0400 Subject: [PATCH 08/21] ext4: avoid modifying checksum fields directly during checksum verification We temporally change checksum fields in buffers of some types of metadata into '0' for verifying the checksum values. By doing this without locking the buffer, some metadata's checksums, which are being committed or written back to the storage, could be damaged. In our test, several metadata blocks were found with damaged metadata checksum value during recovery process. When we only verify the checksum value, we have to avoid modifying checksum fields directly. Signed-off-by: Daeho Jeong Signed-off-by: Youngjin Gil Signed-off-by: Theodore Ts'o Reviewed-by: Darrick J. Wong --- fs/ext4/inode.c | 38 ++++++++++++++++++++++---------------- fs/ext4/namei.c | 9 ++++----- fs/ext4/super.c | 18 +++++++++--------- fs/ext4/xattr.c | 13 +++++++------ 4 files changed, 42 insertions(+), 36 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f7140ca66e3b..44ee5d933b36 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -51,26 +51,32 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __u16 csum_lo; - __u16 csum_hi = 0; __u32 csum; + __u16 dummy_csum = 0; + int offset = offsetof(struct ext4_inode, i_checksum_lo); + unsigned int csum_size = sizeof(dummy_csum); - csum_lo = le16_to_cpu(raw->i_checksum_lo); - raw->i_checksum_lo = 0; - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { - csum_hi = le16_to_cpu(raw->i_checksum_hi); - raw->i_checksum_hi = 0; + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset); + csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size); + offset += csum_size; + csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, + EXT4_GOOD_OLD_INODE_SIZE - offset); + + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { + offset = offsetof(struct ext4_inode, i_checksum_hi); + csum = ext4_chksum(sbi, csum, (__u8 *)raw + + EXT4_GOOD_OLD_INODE_SIZE, + offset - EXT4_GOOD_OLD_INODE_SIZE); + if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { + csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, + csum_size); + offset += csum_size; + csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, + EXT4_INODE_SIZE(inode->i_sb) - + offset); + } } - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, - EXT4_INODE_SIZE(inode->i_sb)); - - raw->i_checksum_lo = cpu_to_le16(csum_lo); - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) - raw->i_checksum_hi = cpu_to_le16(csum_hi); - return csum; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ec4c39952e84..5bb46b6ed456 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -420,15 +420,14 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); __u32 csum; - __le32 save_csum; int size; + __u32 dummy_csum = 0; + int offset = offsetof(struct dx_tail, dt_checksum); size = count_offset + (count * sizeof(struct dx_entry)); - save_csum = t->dt_checksum; - t->dt_checksum = 0; csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); - csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail)); - t->dt_checksum = save_csum; + csum = ext4_chksum(sbi, csum, (__u8 *)t, offset); + csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum)); return cpu_to_le32(csum); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3822a5aedc61..6e2f9d628c48 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2068,23 +2068,25 @@ failed: static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { - int offset; + int offset = offsetof(struct ext4_group_desc, bg_checksum); __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); struct ext4_sb_info *sbi = EXT4_SB(sb); if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ - __le16 save_csum; __u32 csum32; + __u16 dummy_csum = 0; - save_csum = gdp->bg_checksum; - gdp->bg_checksum = 0; csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, sizeof(le_group)); - csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, - sbi->s_desc_size); - gdp->bg_checksum = save_csum; + csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset); + csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum, + sizeof(dummy_csum)); + offset += sizeof(dummy_csum); + if (offset < sbi->s_desc_size) + csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset, + sbi->s_desc_size - offset); crc = csum32 & 0xFFFF; goto out; @@ -2094,8 +2096,6 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, if (!ext4_has_feature_gdt_csum(sb)) return 0; - offset = offsetof(struct ext4_group_desc, bg_checksum); - crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); crc = crc16(crc, (__u8 *)gdp, offset); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e79bd32b9b79..39e9cfb1b371 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -121,17 +121,18 @@ static __le32 ext4_xattr_block_csum(struct inode *inode, { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 csum; - __le32 save_csum; __le64 dsk_block_nr = cpu_to_le64(block_nr); + __u32 dummy_csum = 0; + int offset = offsetof(struct ext4_xattr_header, h_checksum); - save_csum = hdr->h_checksum; - hdr->h_checksum = 0; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr)); - csum = ext4_chksum(sbi, csum, (__u8 *)hdr, - EXT4_BLOCK_SIZE(inode->i_sb)); + csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset); + csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum)); + offset += sizeof(dummy_csum); + csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset, + EXT4_BLOCK_SIZE(inode->i_sb) - offset); - hdr->h_checksum = save_csum; return cpu_to_le32(csum); } From fa96454069b85a7e5d10f38b7d95edcd5dc64b9a Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sun, 3 Jul 2016 21:11:08 -0400 Subject: [PATCH 09/21] ext4: correct error value of function verifying dx checksum ext4_dx_csum_verify() returns the success return value in two checksum verification failure cases. We need to set the return values to zero as failure like ext4_dirent_csum_verify() returning zero when failing to find a checksum dirent at the tail. Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Reviewed-by: Darrick J. Wong --- fs/ext4/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5bb46b6ed456..94d22e78a7dd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -445,14 +445,14 @@ static int ext4_dx_csum_verify(struct inode *inode, c = get_dx_countlimit(inode, dirent, &count_offset); if (!c) { EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); - return 1; + return 0; } limit = le16_to_cpu(c->limit); count = le16_to_cpu(c->count); if (count_offset + (limit * sizeof(struct dx_entry)) > EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { warn_no_space_for_csum(inode); - return 1; + return 0; } t = (struct dx_tail *)(((struct dx_entry *)c) + limit); From 646caa9c8e196880b41cd3e3d33a2ebc752bdb85 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 4 Jul 2016 10:14:01 -0400 Subject: [PATCH 10/21] ext4: fix deadlock during page writeback Commit 06bd3c36a733 (ext4: fix data exposure after a crash) uncovered a deadlock in ext4_writepages() which was previously much harder to hit. After this commit xfstest generic/130 reproduces the deadlock on small filesystems. The problem happens when ext4_do_update_inode() sets LARGE_FILE feature and marks current inode handle as synchronous. That subsequently results in ext4_journal_stop() called from ext4_writepages() to block waiting for transaction commit while still holding page locks, reference to io_end, and some prepared bio in mpd structure each of which can possibly block transaction commit from completing and thus results in deadlock. Fix the problem by releasing page locks, io_end reference, and submitting prepared bio before calling ext4_journal_stop(). [ Changed to defer the call to ext4_journal_stop() only if the handle is synchronous. --tytso ] Reported-and-tested-by: Eryu Guan Signed-off-by: Theodore Ts'o CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/ext4/inode.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 44ee5d933b36..321a31cef59c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2754,13 +2754,36 @@ retry: done = true; } } - ext4_journal_stop(handle); + /* + * Caution: If the handle is synchronous, + * ext4_journal_stop() can wait for transaction commit + * to finish which may depend on writeback of pages to + * complete or on page lock to be released. In that + * case, we have to wait until after after we have + * submitted all the IO, released page locks we hold, + * and dropped io_end reference (for extent conversion + * to be able to complete) before stopping the handle. + */ + if (!ext4_handle_valid(handle) || handle->h_sync == 0) { + ext4_journal_stop(handle); + handle = NULL; + } /* Submit prepared bio */ ext4_io_submit(&mpd.io_submit); /* Unlock pages we didn't use */ mpage_release_unused_pages(&mpd, give_up_on_write); - /* Drop our io_end reference we got from init */ - ext4_put_io_end(mpd.io_submit.io_end); + /* + * Drop our io_end reference we got from init. We have + * to be careful and use deferred io_end finishing if + * we are still holding the transaction as we can + * release the last reference to io_end which may end + * up doing unwritten extent conversion. + */ + if (handle) { + ext4_put_io_end_defer(mpd.io_submit.io_end); + ext4_journal_stop(handle); + } else + ext4_put_io_end(mpd.io_submit.io_end); if (ret == -ENOSPC && sbi->s_journal) { /* From 4743f83990614af6adb09ea7aa3c37b78c4031ab Mon Sep 17 00:00:00 2001 From: "Pranay Kr. Srivastava" Date: Mon, 4 Jul 2016 10:24:52 -0400 Subject: [PATCH 11/21] ext4: Fix WARN_ON_ONCE in ext4_commit_super() If there are racing calls to ext4_commit_super() it's possible for another writeback of the superblock to result in the buffer being marked with an error after we check if the buffer is marked as having a write error and the buffer up-to-date flag is set again. If that happens mark_buffer_dirty() can end up throwing a WARN_ON_ONCE. Fix this by moving this check to write before we call write_buffer_dirty(), and keeping the buffer locked during this whole sequence. Signed-off-by: Pranay Kr. Srivastava Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6e2f9d628c48..5664ee66b301 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4327,20 +4327,6 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error; - if (buffer_write_io_error(sbh)) { - /* - * Oh, dear. A previous attempt to write the - * superblock failed. This could happen because the - * USB device was yanked out. Or it could happen to - * be a transient write error and maybe the block will - * be remapped. Nothing we can do but to retry the - * write and hope for the best. - */ - ext4_msg(sb, KERN_ERR, "previous I/O error to " - "superblock detected"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock @@ -4371,7 +4357,23 @@ static int ext4_commit_super(struct super_block *sb, int sync) &EXT4_SB(sb)->s_freeinodes_counter)); BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); + lock_buffer(sbh); + if (buffer_write_io_error(sbh)) { + /* + * Oh, dear. A previous attempt to write the + * superblock failed. This could happen because the + * USB device was yanked out. Or it could happen to + * be a transient write error and maybe the block will + * be remapped. Nothing we can do but to retry the + * write and hope for the best. + */ + ext4_msg(sb, KERN_ERR, "previous I/O error to " + "superblock detected"); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + } mark_buffer_dirty(sbh); + unlock_buffer(sbh); if (sync) { error = __sync_dirty_buffer(sbh, test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); From 6a7fd522a7c94cdef0a3b08acf8e6702056e635c Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 4 Jul 2016 11:03:00 -0400 Subject: [PATCH 12/21] ext4: don't call ext4_should_journal_data() on the journal inode If ext4_fill_super() fails early, it's possible for ext4_evict_inode() to call ext4_should_journal_data() before superblock options and flags are fully set up. In that case, the iput() on the journal inode can end up causing a BUG(). Work around this problem by reordering the tests so we only call ext4_should_journal_data() after we know it's not the journal inode. Fixes: 2d859db3e4 ("ext4: fix data corruption in inodes with journalled data") Fixes: 2b405bfa84 ("ext4: fix data=journal fast mount/umount hang") Cc: Jan Kara Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 321a31cef59c..ea39d191dbcb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -211,9 +211,9 @@ void ext4_evict_inode(struct inode *inode) * Note that directories do not have this problem because they * don't use page cache. */ - if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_ino != EXT4_JOURNAL_INO) { + if (inode->i_ino != EXT4_JOURNAL_INO && + ext4_should_journal_data(inode) && + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; From de9e9181bc066d63d78b768e95b5d949e2a8673a Mon Sep 17 00:00:00 2001 From: yalin wang Date: Tue, 5 Jul 2016 16:32:32 -0400 Subject: [PATCH 13/21] ext4: remove unused page_idx Signed-off-by: yalin wang Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/readpage.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index dc54a4b60eba..e24ec3bfe1b5 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -135,7 +135,6 @@ int ext4_mpage_readpages(struct address_space *mapping, unsigned nr_pages) { struct bio *bio = NULL; - unsigned page_idx; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; @@ -157,7 +156,7 @@ int ext4_mpage_readpages(struct address_space *mapping, map.m_len = 0; map.m_flags = 0; - for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { + for (; nr_pages; nr_pages--) { int fully_mapped = 1; unsigned first_hole = blocks_per_page; From 5b9554dc5bf008ae7f68a52e3d7e76c0920938a2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 5 Jul 2016 20:01:52 -0400 Subject: [PATCH 14/21] ext4: validate s_reserved_gdt_blocks on mount If s_reserved_gdt_blocks is extremely large, it's possible for ext4_init_block_bitmap(), which is called when ext4 sets up an uninitialized block bitmap, to corrupt random kernel memory. Add the same checks which e2fsck has --- it must never be larger than blocksize / sizeof(__u32) --- and then add a backup check in ext4_init_block_bitmap() in case the superblock gets modified after the file system is mounted. Reported-by: Vegard Nossum Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/balloc.c | 3 +++ fs/ext4/super.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0b8105b3293d..799a92bdf577 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb, memset(bh->b_data, 0, sb->s_blocksize); bit_max = ext4_num_base_meta_clusters(sb, block_group); + if ((bit_max >> 3) >= bh->b_size) + return -EFSCORRUPTED; + for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5664ee66b301..13c49af7a06a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3416,6 +3416,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { + ext4_msg(sb, KERN_ERR, + "Number of reserved GDT blocks insanely large: %d", + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); + goto failed_mount; + } + if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { err = bdev_dax_supported(sb, blocksize); if (err) From 079788d01e7ba9d7366d7bd2a0db9cab5944e85b Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 5 Jul 2016 21:33:52 -0400 Subject: [PATCH 15/21] ext4: fix project quota accounting without quota limits enabled We should always transfer quota accounting, regardless of whether quota limits are enabled. Steps to reproduce: # mkfs.ext4 /dev/sda4 -O quota,project # mount /dev/sda4 /mnt/test # cp /bin/bash /mnt/test # chattr -p 123 /mnt/test/bash # quota -v -P 123 Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 28cc412852af..b5a39b00265e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -308,6 +308,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) kprojid_t kprojid; struct ext4_iloc iloc; struct ext4_inode *raw_inode; + struct dquot *transfer_to[MAXQUOTAS] = { }; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT)) { @@ -361,17 +362,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) if (err) goto out_stop; - if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) { - struct dquot *transfer_to[MAXQUOTAS] = { }; - - transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); - if (!IS_ERR(transfer_to[PRJQUOTA])) { - err = __dquot_transfer(inode, transfer_to); - dqput(transfer_to[PRJQUOTA]); - if (err) - goto out_dirty; - } + transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); + if (!IS_ERR(transfer_to[PRJQUOTA])) { + err = __dquot_transfer(inode, transfer_to); + dqput(transfer_to[PRJQUOTA]); + if (err) + goto out_dirty; } + EXT4_I(inode)->i_projid = kprojid; inode->i_ctime = ext4_current_time(inode); out_dirty: From ff0031d848a0cd7002606f9feef958de8d5edf19 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 5 Jul 2016 22:02:41 -0400 Subject: [PATCH 16/21] ext2: fix filesystem deadlock while reading corrupted xattr block This bug can be reproducible with fsfuzzer, although, I couldn't reproduce it 100% of my tries, it is quite easily reproducible. During the deletion of an inode, ext2_xattr_delete_inode() does not check if the block pointed by EXT2_I(inode)->i_file_acl is a valid data block, this might lead to a deadlock, when i_file_acl == 1, and the filesystem block size is 1024. In that situation, ext2_xattr_delete_inode, will load the superblock's buffer head (instead of a valid i_file_acl block), and then lock that buffer head, which, ext2_sync_super will also try to lock, making the filesystem deadlock in the following stack trace: root 17180 0.0 0.0 113660 660 pts/0 D+ 07:08 0:00 rmdir /media/test/dir1 [] __sync_dirty_buffer+0xaf/0x100 [] sync_dirty_buffer+0x13/0x20 [] ext2_sync_super+0xb7/0xc0 [ext2] [] ext2_error+0x119/0x130 [ext2] [] ext2_free_blocks+0x83/0x350 [ext2] [] ext2_xattr_delete_inode+0x173/0x190 [ext2] [] ext2_evict_inode+0xc9/0x130 [ext2] [] evict+0xb3/0x180 [] iput+0x1b8/0x240 [] d_delete+0x11c/0x150 [] vfs_rmdir+0xfe/0x120 [] do_rmdir+0x17e/0x1f0 [] SyS_rmdir+0x16/0x20 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 [] 0xffffffffffffffff Fix this by using the same approach ext4 uses to test data blocks validity, implementing ext2_data_block_valid. An another possibility when the superblock is very corrupted, is that i_file_acl is 1, block_count is 1 and first_data_block is 0. For such situations, we might have i_file_acl pointing to a 'valid' block, but still step over the superblock. The approach I used was to also test if the superblock is not in the range described by ext2_data_block_valid() arguments Signed-off-by: Carlos Maiolino Signed-off-by: Theodore Ts'o --- fs/ext2/balloc.c | 21 +++++++++++++++++++++ fs/ext2/ext2.h | 3 +++ fs/ext2/inode.c | 10 ++++++++++ fs/ext2/xattr.c | 9 +++++++++ 4 files changed, 43 insertions(+) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 9f9992b37924..4c40c0786e16 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1193,6 +1193,27 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) return 1; } +/* + * Returns 1 if the passed-in block region is valid; 0 if some part overlaps + * with filesystem metadata blocksi. + */ +int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, + unsigned int count) +{ + if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count < start_blk) || + (start_blk > le32_to_cpu(sbi->s_es->s_blocks_count))) + return 0; + + /* Ensure we do not step over superblock */ + if ((start_blk <= sbi->s_sb_block) && + (start_blk + count >= sbi->s_sb_block)) + return 0; + + + return 1; +} + /* * ext2_new_blocks() -- core block(s) allocation function * @inode: file inode diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 170939f379d7..3fb93681bf7f 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -367,6 +367,7 @@ struct ext2_inode { */ #define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ #define EXT2_ERROR_FS 0x0002 /* Errors detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ /* * Mount flags @@ -739,6 +740,8 @@ extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group); extern ext2_fsblk_t ext2_new_block(struct inode *, unsigned long, int *); extern ext2_fsblk_t ext2_new_blocks(struct inode *, unsigned long, unsigned long *, int *); +extern int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, + unsigned int count); extern void ext2_free_blocks (struct inode *, unsigned long, unsigned long); extern unsigned long ext2_count_free_blocks (struct super_block *); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index fcbe58641e40..d5c7d09919f3 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1389,6 +1389,16 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) ei->i_frag_size = raw_inode->i_fsize; ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); ei->i_dir_acl = 0; + + if (ei->i_file_acl && + !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) { + ext2_error(sb, "ext2_iget", "bad extended attribute block %u", + ei->i_file_acl); + brelse(bh); + ret = -EFSCORRUPTED; + goto bad_inode; + } + if (S_ISREG(inode->i_mode)) inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; else diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 1a5e3bff0b63..b7f896f3f7a7 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -759,10 +759,19 @@ void ext2_xattr_delete_inode(struct inode *inode) { struct buffer_head *bh = NULL; + struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb); down_write(&EXT2_I(inode)->xattr_sem); if (!EXT2_I(inode)->i_file_acl) goto cleanup; + + if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 0)) { + ext2_error(inode->i_sb, "ext2_xattr_delete_inode", + "inode %ld: xattr block %d is out of data blocks range", + inode->i_ino, EXT2_I(inode)->i_file_acl); + goto cleanup; + } + bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", From a7550b30ab709ffb9bbe48669adf7d8556f3698f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 10 Jul 2016 14:01:03 -0400 Subject: [PATCH 17/21] ext4 crypto: migrate into vfs's crypto engine This patch removes the most parts of internal crypto codes. And then, it modifies and adds some ext4-specific crypt codes to use the generic facility. Signed-off-by: Jaegeuk Kim Signed-off-by: Theodore Ts'o --- fs/ext4/Kconfig | 12 +- fs/ext4/Makefile | 2 - fs/ext4/crypto.c | 536 ---------------------------------------- fs/ext4/crypto_fname.c | 468 ----------------------------------- fs/ext4/crypto_key.c | 274 -------------------- fs/ext4/crypto_policy.c | 229 ----------------- fs/ext4/dir.c | 26 +- fs/ext4/ext4.h | 220 ++++++----------- fs/ext4/ext4_crypto.h | 159 ------------ fs/ext4/file.c | 10 +- fs/ext4/ialloc.c | 7 +- fs/ext4/inline.c | 14 +- fs/ext4/inode.c | 8 +- fs/ext4/ioctl.c | 20 +- fs/ext4/namei.c | 131 +++++----- fs/ext4/page-io.c | 13 +- fs/ext4/readpage.c | 45 +--- fs/ext4/super.c | 97 +++++++- fs/ext4/symlink.c | 35 ++- 19 files changed, 303 insertions(+), 2003 deletions(-) delete mode 100644 fs/ext4/crypto.c delete mode 100644 fs/ext4/crypto_fname.c delete mode 100644 fs/ext4/crypto_key.c delete mode 100644 fs/ext4/crypto_policy.c delete mode 100644 fs/ext4/ext4_crypto.h diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index b46e9fc64196..e38039fd96ff 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -99,17 +99,9 @@ config EXT4_FS_SECURITY extended attributes for file security labels, say N. config EXT4_ENCRYPTION - tristate "Ext4 Encryption" + bool "Ext4 Encryption" depends on EXT4_FS - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_ECB - select CRYPTO_XTS - select CRYPTO_CTS - select CRYPTO_CTR - select CRYPTO_SHA256 - select KEYS - select ENCRYPTED_KEYS + select FS_ENCRYPTION help Enable encryption of ext4 files and directories. This feature is similar to ecryptfs, but it is more memory diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index f52cf54f0cbc..354103f3490c 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -12,5 +12,3 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o -ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \ - crypto_key.o crypto_fname.o diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c deleted file mode 100644 index 6a6c27373b54..000000000000 --- a/fs/ext4/crypto.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * linux/fs/ext4/crypto.c - * - * Copyright (C) 2015, Google, Inc. - * - * This contains encryption functions for ext4 - * - * Written by Michael Halcrow, 2014. - * - * Filename encryption additions - * Uday Savagaonkar, 2014 - * Encryption policy handling additions - * Ildar Muslukhov, 2014 - * - * This has not yet undergone a rigorous security audit. - * - * The usage of AES-XTS should conform to recommendations in NIST - * Special Publication 800-38E and IEEE P1619/D16. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ext4_extents.h" -#include "xattr.h" - -/* Encryption added and removed here! (L: */ - -static unsigned int num_prealloc_crypto_pages = 32; -static unsigned int num_prealloc_crypto_ctxs = 128; - -module_param(num_prealloc_crypto_pages, uint, 0444); -MODULE_PARM_DESC(num_prealloc_crypto_pages, - "Number of crypto pages to preallocate"); -module_param(num_prealloc_crypto_ctxs, uint, 0444); -MODULE_PARM_DESC(num_prealloc_crypto_ctxs, - "Number of crypto contexts to preallocate"); - -static mempool_t *ext4_bounce_page_pool; - -static LIST_HEAD(ext4_free_crypto_ctxs); -static DEFINE_SPINLOCK(ext4_crypto_ctx_lock); - -static struct kmem_cache *ext4_crypto_ctx_cachep; -struct kmem_cache *ext4_crypt_info_cachep; - -/** - * ext4_release_crypto_ctx() - Releases an encryption context - * @ctx: The encryption context to release. - * - * If the encryption context was allocated from the pre-allocated pool, returns - * it to that pool. Else, frees it. - * - * If there's a bounce page in the context, this frees that. - */ -void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx) -{ - unsigned long flags; - - if (ctx->flags & EXT4_WRITE_PATH_FL && ctx->w.bounce_page) - mempool_free(ctx->w.bounce_page, ext4_bounce_page_pool); - ctx->w.bounce_page = NULL; - ctx->w.control_page = NULL; - if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) { - kmem_cache_free(ext4_crypto_ctx_cachep, ctx); - } else { - spin_lock_irqsave(&ext4_crypto_ctx_lock, flags); - list_add(&ctx->free_list, &ext4_free_crypto_ctxs); - spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); - } -} - -/** - * ext4_get_crypto_ctx() - Gets an encryption context - * @inode: The inode for which we are doing the crypto - * - * Allocates and initializes an encryption context. - * - * Return: An allocated and initialized encryption context on success; error - * value or NULL otherwise. - */ -struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode, - gfp_t gfp_flags) -{ - struct ext4_crypto_ctx *ctx = NULL; - int res = 0; - unsigned long flags; - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - - if (ci == NULL) - return ERR_PTR(-ENOKEY); - - /* - * We first try getting the ctx from a free list because in - * the common case the ctx will have an allocated and - * initialized crypto tfm, so it's probably a worthwhile - * optimization. For the bounce page, we first try getting it - * from the kernel allocator because that's just about as fast - * as getting it from a list and because a cache of free pages - * should generally be a "last resort" option for a filesystem - * to be able to do its job. - */ - spin_lock_irqsave(&ext4_crypto_ctx_lock, flags); - ctx = list_first_entry_or_null(&ext4_free_crypto_ctxs, - struct ext4_crypto_ctx, free_list); - if (ctx) - list_del(&ctx->free_list); - spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); - if (!ctx) { - ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, gfp_flags); - if (!ctx) { - res = -ENOMEM; - goto out; - } - ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL; - } - ctx->flags &= ~EXT4_WRITE_PATH_FL; - -out: - if (res) { - if (!IS_ERR_OR_NULL(ctx)) - ext4_release_crypto_ctx(ctx); - ctx = ERR_PTR(res); - } - return ctx; -} - -struct workqueue_struct *ext4_read_workqueue; -static DEFINE_MUTEX(crypto_init); - -/** - * ext4_exit_crypto() - Shutdown the ext4 encryption system - */ -void ext4_exit_crypto(void) -{ - struct ext4_crypto_ctx *pos, *n; - - list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) - kmem_cache_free(ext4_crypto_ctx_cachep, pos); - INIT_LIST_HEAD(&ext4_free_crypto_ctxs); - if (ext4_bounce_page_pool) - mempool_destroy(ext4_bounce_page_pool); - ext4_bounce_page_pool = NULL; - if (ext4_read_workqueue) - destroy_workqueue(ext4_read_workqueue); - ext4_read_workqueue = NULL; - if (ext4_crypto_ctx_cachep) - kmem_cache_destroy(ext4_crypto_ctx_cachep); - ext4_crypto_ctx_cachep = NULL; - if (ext4_crypt_info_cachep) - kmem_cache_destroy(ext4_crypt_info_cachep); - ext4_crypt_info_cachep = NULL; -} - -/** - * ext4_init_crypto() - Set up for ext4 encryption. - * - * We only call this when we start accessing encrypted files, since it - * results in memory getting allocated that wouldn't otherwise be used. - * - * Return: Zero on success, non-zero otherwise. - */ -int ext4_init_crypto(void) -{ - int i, res = -ENOMEM; - - mutex_lock(&crypto_init); - if (ext4_read_workqueue) - goto already_initialized; - ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0); - if (!ext4_read_workqueue) - goto fail; - - ext4_crypto_ctx_cachep = KMEM_CACHE(ext4_crypto_ctx, - SLAB_RECLAIM_ACCOUNT); - if (!ext4_crypto_ctx_cachep) - goto fail; - - ext4_crypt_info_cachep = KMEM_CACHE(ext4_crypt_info, - SLAB_RECLAIM_ACCOUNT); - if (!ext4_crypt_info_cachep) - goto fail; - - for (i = 0; i < num_prealloc_crypto_ctxs; i++) { - struct ext4_crypto_ctx *ctx; - - ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS); - if (!ctx) { - res = -ENOMEM; - goto fail; - } - list_add(&ctx->free_list, &ext4_free_crypto_ctxs); - } - - ext4_bounce_page_pool = - mempool_create_page_pool(num_prealloc_crypto_pages, 0); - if (!ext4_bounce_page_pool) { - res = -ENOMEM; - goto fail; - } -already_initialized: - mutex_unlock(&crypto_init); - return 0; -fail: - ext4_exit_crypto(); - mutex_unlock(&crypto_init); - return res; -} - -void ext4_restore_control_page(struct page *data_page) -{ - struct ext4_crypto_ctx *ctx = - (struct ext4_crypto_ctx *)page_private(data_page); - - set_page_private(data_page, (unsigned long)NULL); - ClearPagePrivate(data_page); - unlock_page(data_page); - ext4_release_crypto_ctx(ctx); -} - -/** - * ext4_crypt_complete() - The completion callback for page encryption - * @req: The asynchronous encryption request context - * @res: The result of the encryption operation - */ -static void ext4_crypt_complete(struct crypto_async_request *req, int res) -{ - struct ext4_completion_result *ecr = req->data; - - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); -} - -typedef enum { - EXT4_DECRYPT = 0, - EXT4_ENCRYPT, -} ext4_direction_t; - -static int ext4_page_crypto(struct inode *inode, - ext4_direction_t rw, - pgoff_t index, - struct page *src_page, - struct page *dest_page, - gfp_t gfp_flags) - -{ - u8 xts_tweak[EXT4_XTS_TWEAK_SIZE]; - struct skcipher_request *req = NULL; - DECLARE_EXT4_COMPLETION_RESULT(ecr); - struct scatterlist dst, src; - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; - int res = 0; - - req = skcipher_request_alloc(tfm, gfp_flags); - if (!req) { - printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", - __func__); - return -ENOMEM; - } - skcipher_request_set_callback( - req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - ext4_crypt_complete, &ecr); - - BUILD_BUG_ON(EXT4_XTS_TWEAK_SIZE < sizeof(index)); - memcpy(xts_tweak, &index, sizeof(index)); - memset(&xts_tweak[sizeof(index)], 0, - EXT4_XTS_TWEAK_SIZE - sizeof(index)); - - sg_init_table(&dst, 1); - sg_set_page(&dst, dest_page, PAGE_SIZE, 0); - sg_init_table(&src, 1); - sg_set_page(&src, src_page, PAGE_SIZE, 0); - skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, - xts_tweak); - if (rw == EXT4_DECRYPT) - res = crypto_skcipher_decrypt(req); - else - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } - skcipher_request_free(req); - if (res) { - printk_ratelimited( - KERN_ERR - "%s: crypto_skcipher_encrypt() returned %d\n", - __func__, res); - return res; - } - return 0; -} - -static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx, - gfp_t gfp_flags) -{ - ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, gfp_flags); - if (ctx->w.bounce_page == NULL) - return ERR_PTR(-ENOMEM); - ctx->flags |= EXT4_WRITE_PATH_FL; - return ctx->w.bounce_page; -} - -/** - * ext4_encrypt() - Encrypts a page - * @inode: The inode for which the encryption should take place - * @plaintext_page: The page to encrypt. Must be locked. - * - * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx - * encryption context. - * - * Called on the page write path. The caller must call - * ext4_restore_control_page() on the returned ciphertext page to - * release the bounce buffer and the encryption context. - * - * Return: An allocated page with the encrypted content on success. Else, an - * error value or NULL. - */ -struct page *ext4_encrypt(struct inode *inode, - struct page *plaintext_page, - gfp_t gfp_flags) -{ - struct ext4_crypto_ctx *ctx; - struct page *ciphertext_page = NULL; - int err; - - BUG_ON(!PageLocked(plaintext_page)); - - ctx = ext4_get_crypto_ctx(inode, gfp_flags); - if (IS_ERR(ctx)) - return (struct page *) ctx; - - /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_bounce_page(ctx, gfp_flags); - if (IS_ERR(ciphertext_page)) - goto errout; - ctx->w.control_page = plaintext_page; - err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index, - plaintext_page, ciphertext_page, gfp_flags); - if (err) { - ciphertext_page = ERR_PTR(err); - errout: - ext4_release_crypto_ctx(ctx); - return ciphertext_page; - } - SetPagePrivate(ciphertext_page); - set_page_private(ciphertext_page, (unsigned long)ctx); - lock_page(ciphertext_page); - return ciphertext_page; -} - -/** - * ext4_decrypt() - Decrypts a page in-place - * @ctx: The encryption context. - * @page: The page to decrypt. Must be locked. - * - * Decrypts page in-place using the ctx encryption context. - * - * Called from the read completion callback. - * - * Return: Zero on success, non-zero otherwise. - */ -int ext4_decrypt(struct page *page) -{ - BUG_ON(!PageLocked(page)); - - return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT, - page->index, page, page, GFP_NOFS); -} - -int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, - ext4_fsblk_t pblk, ext4_lblk_t len) -{ - struct ext4_crypto_ctx *ctx; - struct page *ciphertext_page = NULL; - struct bio *bio; - int ret, err = 0; - -#if 0 - ext4_msg(inode->i_sb, KERN_CRIT, - "ext4_encrypted_zeroout ino %lu lblk %u len %u", - (unsigned long) inode->i_ino, lblk, len); -#endif - - BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - - ctx = ext4_get_crypto_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT); - if (IS_ERR(ciphertext_page)) { - err = PTR_ERR(ciphertext_page); - goto errout; - } - - while (len--) { - err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - GFP_NOFS); - if (err) - goto errout; - - bio = bio_alloc(GFP_NOWAIT, 1); - if (!bio) { - err = -ENOMEM; - goto errout; - } - bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_iter.bi_sector = - pblk << (inode->i_sb->s_blocksize_bits - 9); - ret = bio_add_page(bio, ciphertext_page, - inode->i_sb->s_blocksize, 0); - if (ret != inode->i_sb->s_blocksize) { - /* should never happen! */ - ext4_msg(inode->i_sb, KERN_ERR, - "bio_add_page failed: %d", ret); - WARN_ON(1); - bio_put(bio); - err = -EIO; - goto errout; - } - err = submit_bio_wait(WRITE, bio); - if ((err == 0) && bio->bi_error) - err = -EIO; - bio_put(bio); - if (err) - goto errout; - lblk++; pblk++; - } - err = 0; -errout: - ext4_release_crypto_ctx(ctx); - return err; -} - -bool ext4_valid_contents_enc_mode(uint32_t mode) -{ - return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS); -} - -/** - * ext4_validate_encryption_key_size() - Validate the encryption key size - * @mode: The key mode. - * @size: The key size to validate. - * - * Return: The validated key size for @mode. Zero if invalid. - */ -uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) -{ - if (size == ext4_encryption_key_size(mode)) - return size; - return 0; -} - -/* - * Validate dentries for encrypted directories to make sure we aren't - * potentially caching stale data after a key has been added or - * removed. - */ -static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct dentry *dir; - struct ext4_crypt_info *ci; - int dir_has_key, cached_with_key; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - dir = dget_parent(dentry); - if (!ext4_encrypted_inode(d_inode(dir))) { - dput(dir); - return 0; - } - ci = EXT4_I(d_inode(dir))->i_crypt_info; - if (ci && ci->ci_keyring_key && - (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_DEAD)))) - ci = NULL; - - /* this should eventually be an flag in d_flags */ - cached_with_key = dentry->d_fsdata != NULL; - dir_has_key = (ci != NULL); - dput(dir); - - /* - * If the dentry was cached without the key, and it is a - * negative dentry, it might be a valid name. We can't check - * if the key has since been made available due to locking - * reasons, so we fail the validation so ext4_lookup() can do - * this check. - * - * We also fail the validation if the dentry was created with - * the key present, but we no longer have the key, or vice versa. - */ - if ((!cached_with_key && d_is_negative(dentry)) || - (!cached_with_key && dir_has_key) || - (cached_with_key && !dir_has_key)) { -#if 0 /* Revalidation debug */ - char buf[80]; - char *cp = simple_dname(dentry, buf, sizeof(buf)); - - if (IS_ERR(cp)) - cp = (char *) "???"; - pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata, - cached_with_key, d_is_negative(dentry), - dir_has_key); -#endif - return 0; - } - return 1; -} - -const struct dentry_operations ext4_encrypted_d_ops = { - .d_revalidate = ext4_d_revalidate, -}; diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c deleted file mode 100644 index 1a2f360405db..000000000000 --- a/fs/ext4/crypto_fname.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * linux/fs/ext4/crypto_fname.c - * - * Copyright (C) 2015, Google, Inc. - * - * This contains functions for filename crypto management in ext4 - * - * Written by Uday Savagaonkar, 2014. - * - * This has not yet undergone a rigorous security audit. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ext4.h" -#include "ext4_crypto.h" -#include "xattr.h" - -/** - * ext4_dir_crypt_complete() - - */ -static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res) -{ - struct ext4_completion_result *ecr = req->data; - - if (res == -EINPROGRESS) - return; - ecr->res = res; - complete(&ecr->completion); -} - -bool ext4_valid_filenames_enc_mode(uint32_t mode) -{ - return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS); -} - -static unsigned max_name_len(struct inode *inode) -{ - return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : - EXT4_NAME_LEN; -} - -/** - * ext4_fname_encrypt() - - * - * This function encrypts the input filename, and returns the length of the - * ciphertext. Errors are returned as negative numbers. We trust the caller to - * allocate sufficient memory to oname string. - */ -static int ext4_fname_encrypt(struct inode *inode, - const struct qstr *iname, - struct ext4_str *oname) -{ - u32 ciphertext_len; - struct skcipher_request *req = NULL; - DECLARE_EXT4_COMPLETION_RESULT(ecr); - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; - int res = 0; - char iv[EXT4_CRYPTO_BLOCK_SIZE]; - struct scatterlist src_sg, dst_sg; - int padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK); - char *workbuf, buf[32], *alloc_buf = NULL; - unsigned lim = max_name_len(inode); - - if (iname->len <= 0 || iname->len > lim) - return -EIO; - - ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ? - EXT4_CRYPTO_BLOCK_SIZE : iname->len; - ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding); - ciphertext_len = (ciphertext_len > lim) - ? lim : ciphertext_len; - - if (ciphertext_len <= sizeof(buf)) { - workbuf = buf; - } else { - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); - if (!alloc_buf) - return -ENOMEM; - workbuf = alloc_buf; - } - - /* Allocate request */ - req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - printk_ratelimited( - KERN_ERR "%s: crypto_request_alloc() failed\n", __func__); - kfree(alloc_buf); - return -ENOMEM; - } - skcipher_request_set_callback(req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - ext4_dir_crypt_complete, &ecr); - - /* Copy the input */ - memcpy(workbuf, iname->name, iname->len); - if (iname->len < ciphertext_len) - memset(workbuf + iname->len, 0, ciphertext_len - iname->len); - - /* Initialize IV */ - memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE); - - /* Create encryption request */ - sg_init_one(&src_sg, workbuf, ciphertext_len); - sg_init_one(&dst_sg, oname->name, ciphertext_len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } - kfree(alloc_buf); - skcipher_request_free(req); - if (res < 0) { - printk_ratelimited( - KERN_ERR "%s: Error (error code %d)\n", __func__, res); - } - oname->len = ciphertext_len; - return res; -} - -/* - * ext4_fname_decrypt() - * This function decrypts the input filename, and returns - * the length of the plaintext. - * Errors are returned as negative numbers. - * We trust the caller to allocate sufficient memory to oname string. - */ -static int ext4_fname_decrypt(struct inode *inode, - const struct ext4_str *iname, - struct ext4_str *oname) -{ - struct ext4_str tmp_in[2], tmp_out[1]; - struct skcipher_request *req = NULL; - DECLARE_EXT4_COMPLETION_RESULT(ecr); - struct scatterlist src_sg, dst_sg; - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; - int res = 0; - char iv[EXT4_CRYPTO_BLOCK_SIZE]; - unsigned lim = max_name_len(inode); - - if (iname->len <= 0 || iname->len > lim) - return -EIO; - - tmp_in[0].name = iname->name; - tmp_in[0].len = iname->len; - tmp_out[0].name = oname->name; - - /* Allocate request */ - req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - printk_ratelimited( - KERN_ERR "%s: crypto_request_alloc() failed\n", __func__); - return -ENOMEM; - } - skcipher_request_set_callback(req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - ext4_dir_crypt_complete, &ecr); - - /* Initialize IV */ - memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE); - - /* Create encryption request */ - sg_init_one(&src_sg, iname->name, iname->len); - sg_init_one(&dst_sg, oname->name, oname->len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); - res = crypto_skcipher_decrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } - skcipher_request_free(req); - if (res < 0) { - printk_ratelimited( - KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n", - __func__, res); - return res; - } - - oname->len = strnlen(oname->name, iname->len); - return oname->len; -} - -static const char *lookup_table = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; - -/** - * ext4_fname_encode_digest() - - * - * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. - * The encoded string is roughly 4/3 times the size of the input string. - */ -static int digest_encode(const char *src, int len, char *dst) -{ - int i = 0, bits = 0, ac = 0; - char *cp = dst; - - while (i < len) { - ac += (((unsigned char) src[i]) << bits); - bits += 8; - do { - *cp++ = lookup_table[ac & 0x3f]; - ac >>= 6; - bits -= 6; - } while (bits >= 6); - i++; - } - if (bits) - *cp++ = lookup_table[ac & 0x3f]; - return cp - dst; -} - -static int digest_decode(const char *src, int len, char *dst) -{ - int i = 0, bits = 0, ac = 0; - const char *p; - char *cp = dst; - - while (i < len) { - p = strchr(lookup_table, src[i]); - if (p == NULL || src[i] == 0) - return -2; - ac += (p - lookup_table) << bits; - bits += 6; - if (bits >= 8) { - *cp++ = ac & 0xff; - ac >>= 8; - bits -= 8; - } - i++; - } - if (ac) - return -1; - return cp - dst; -} - -/** - * ext4_fname_crypto_round_up() - - * - * Return: The next multiple of block size - */ -u32 ext4_fname_crypto_round_up(u32 size, u32 blksize) -{ - return ((size+blksize-1)/blksize)*blksize; -} - -unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen) -{ - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - int padding = 32; - - if (ci) - padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK); - if (ilen < EXT4_CRYPTO_BLOCK_SIZE) - ilen = EXT4_CRYPTO_BLOCK_SIZE; - return ext4_fname_crypto_round_up(ilen, padding); -} - -/* - * ext4_fname_crypto_alloc_buffer() - - * - * Allocates an output buffer that is sufficient for the crypto operation - * specified by the context and the direction. - */ -int ext4_fname_crypto_alloc_buffer(struct inode *inode, - u32 ilen, struct ext4_str *crypto_str) -{ - unsigned int olen = ext4_fname_encrypted_size(inode, ilen); - - crypto_str->len = olen; - if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) - olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2; - /* Allocated buffer can hold one more character to null-terminate the - * string */ - crypto_str->name = kmalloc(olen+1, GFP_NOFS); - if (!(crypto_str->name)) - return -ENOMEM; - return 0; -} - -/** - * ext4_fname_crypto_free_buffer() - - * - * Frees the buffer allocated for crypto operation. - */ -void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str) -{ - if (!crypto_str) - return; - kfree(crypto_str->name); - crypto_str->name = NULL; -} - -/** - * ext4_fname_disk_to_usr() - converts a filename from disk space to user space - */ -int _ext4_fname_disk_to_usr(struct inode *inode, - struct dx_hash_info *hinfo, - const struct ext4_str *iname, - struct ext4_str *oname) -{ - char buf[24]; - int ret; - - if (iname->len < 3) { - /*Check for . and .. */ - if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') { - oname->name[0] = '.'; - oname->name[iname->len-1] = '.'; - oname->len = iname->len; - return oname->len; - } - } - if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) { - EXT4_ERROR_INODE(inode, "encrypted inode too small"); - return -EUCLEAN; - } - if (EXT4_I(inode)->i_crypt_info) - return ext4_fname_decrypt(inode, iname, oname); - - if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) { - ret = digest_encode(iname->name, iname->len, oname->name); - oname->len = ret; - return ret; - } - if (hinfo) { - memcpy(buf, &hinfo->hash, 4); - memcpy(buf+4, &hinfo->minor_hash, 4); - } else - memset(buf, 0, 8); - memcpy(buf + 8, iname->name + iname->len - 16, 16); - oname->name[0] = '_'; - ret = digest_encode(buf, 24, oname->name+1); - oname->len = ret + 1; - return ret + 1; -} - -int ext4_fname_disk_to_usr(struct inode *inode, - struct dx_hash_info *hinfo, - const struct ext4_dir_entry_2 *de, - struct ext4_str *oname) -{ - struct ext4_str iname = {.name = (unsigned char *) de->name, - .len = de->name_len }; - - return _ext4_fname_disk_to_usr(inode, hinfo, &iname, oname); -} - - -/** - * ext4_fname_usr_to_disk() - converts a filename from user space to disk space - */ -int ext4_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct ext4_str *oname) -{ - int res; - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - - if (iname->len < 3) { - /*Check for . and .. */ - if (iname->name[0] == '.' && - iname->name[iname->len-1] == '.') { - oname->name[0] = '.'; - oname->name[iname->len-1] = '.'; - oname->len = iname->len; - return oname->len; - } - } - if (ci) { - res = ext4_fname_encrypt(inode, iname, oname); - return res; - } - /* Without a proper key, a user is not allowed to modify the filenames - * in a directory. Consequently, a user space name cannot be mapped to - * a disk-space name */ - return -EACCES; -} - -int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, - int lookup, struct ext4_filename *fname) -{ - struct ext4_crypt_info *ci; - int ret = 0, bigname = 0; - - memset(fname, 0, sizeof(struct ext4_filename)); - fname->usr_fname = iname; - - if (!ext4_encrypted_inode(dir) || - ((iname->name[0] == '.') && - ((iname->len == 1) || - ((iname->name[1] == '.') && (iname->len == 2))))) { - fname->disk_name.name = (unsigned char *) iname->name; - fname->disk_name.len = iname->len; - return 0; - } - ret = ext4_get_encryption_info(dir); - if (ret) - return ret; - ci = EXT4_I(dir)->i_crypt_info; - if (ci) { - ret = ext4_fname_crypto_alloc_buffer(dir, iname->len, - &fname->crypto_buf); - if (ret < 0) - return ret; - ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf); - if (ret < 0) - goto errout; - fname->disk_name.name = fname->crypto_buf.name; - fname->disk_name.len = fname->crypto_buf.len; - return 0; - } - if (!lookup) - return -EACCES; - - /* We don't have the key and we are doing a lookup; decode the - * user-supplied name - */ - if (iname->name[0] == '_') - bigname = 1; - if ((bigname && (iname->len != 33)) || - (!bigname && (iname->len > 43))) - return -ENOENT; - - fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); - if (fname->crypto_buf.name == NULL) - return -ENOMEM; - ret = digest_decode(iname->name + bigname, iname->len - bigname, - fname->crypto_buf.name); - if (ret < 0) { - ret = -ENOENT; - goto errout; - } - fname->crypto_buf.len = ret; - if (bigname) { - memcpy(&fname->hinfo.hash, fname->crypto_buf.name, 4); - memcpy(&fname->hinfo.minor_hash, fname->crypto_buf.name + 4, 4); - } else { - fname->disk_name.name = fname->crypto_buf.name; - fname->disk_name.len = fname->crypto_buf.len; - } - return 0; -errout: - kfree(fname->crypto_buf.name); - fname->crypto_buf.name = NULL; - return ret; -} - -void ext4_fname_free_filename(struct ext4_filename *fname) -{ - kfree(fname->crypto_buf.name); - fname->crypto_buf.name = NULL; - fname->usr_fname = NULL; - fname->disk_name.name = NULL; -} diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c deleted file mode 100644 index 0129d688d1f7..000000000000 --- a/fs/ext4/crypto_key.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * linux/fs/ext4/crypto_key.c - * - * Copyright (C) 2015, Google, Inc. - * - * This contains encryption key functions for ext4 - * - * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. - */ - -#include -#include -#include -#include -#include -#include - -#include "ext4.h" -#include "xattr.h" - -static void derive_crypt_complete(struct crypto_async_request *req, int rc) -{ - struct ext4_completion_result *ecr = req->data; - - if (rc == -EINPROGRESS) - return; - - ecr->res = rc; - complete(&ecr->completion); -} - -/** - * ext4_derive_key_aes() - Derive a key using AES-128-ECB - * @deriving_key: Encryption key used for derivation. - * @source_key: Source key to which to apply derivation. - * @derived_key: Derived key. - * - * Return: Zero on success; non-zero otherwise. - */ -static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], - char source_key[EXT4_AES_256_XTS_KEY_SIZE], - char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) -{ - int res = 0; - struct skcipher_request *req = NULL; - DECLARE_EXT4_COMPLETION_RESULT(ecr); - struct scatterlist src_sg, dst_sg; - struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); - - if (IS_ERR(tfm)) { - res = PTR_ERR(tfm); - tfm = NULL; - goto out; - } - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); - req = skcipher_request_alloc(tfm, GFP_NOFS); - if (!req) { - res = -ENOMEM; - goto out; - } - skcipher_request_set_callback(req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - derive_crypt_complete, &ecr); - res = crypto_skcipher_setkey(tfm, deriving_key, - EXT4_AES_128_ECB_KEY_SIZE); - if (res < 0) - goto out; - sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE); - sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, - EXT4_AES_256_XTS_KEY_SIZE, NULL); - res = crypto_skcipher_encrypt(req); - if (res == -EINPROGRESS || res == -EBUSY) { - wait_for_completion(&ecr.completion); - res = ecr.res; - } - -out: - skcipher_request_free(req); - crypto_free_skcipher(tfm); - return res; -} - -void ext4_free_crypt_info(struct ext4_crypt_info *ci) -{ - if (!ci) - return; - - if (ci->ci_keyring_key) - key_put(ci->ci_keyring_key); - crypto_free_skcipher(ci->ci_ctfm); - kmem_cache_free(ext4_crypt_info_cachep, ci); -} - -void ext4_free_encryption_info(struct inode *inode, - struct ext4_crypt_info *ci) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_crypt_info *prev; - - if (ci == NULL) - ci = ACCESS_ONCE(ei->i_crypt_info); - if (ci == NULL) - return; - prev = cmpxchg(&ei->i_crypt_info, ci, NULL); - if (prev != ci) - return; - - ext4_free_crypt_info(ci); -} - -int _ext4_get_encryption_info(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_crypt_info *crypt_info; - char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE + - (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1]; - struct key *keyring_key = NULL; - struct ext4_encryption_key *master_key; - struct ext4_encryption_context ctx; - const struct user_key_payload *ukp; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct crypto_skcipher *ctfm; - const char *cipher_str; - char raw_key[EXT4_MAX_KEY_SIZE]; - char mode; - int res; - - if (!ext4_read_workqueue) { - res = ext4_init_crypto(); - if (res) - return res; - } - -retry: - crypt_info = ACCESS_ONCE(ei->i_crypt_info); - if (crypt_info) { - if (!crypt_info->ci_keyring_key || - key_validate(crypt_info->ci_keyring_key) == 0) - return 0; - ext4_free_encryption_info(inode, crypt_info); - goto retry; - } - - res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - &ctx, sizeof(ctx)); - if (res < 0) { - if (!DUMMY_ENCRYPTION_ENABLED(sbi)) - return res; - ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; - ctx.filenames_encryption_mode = - EXT4_ENCRYPTION_MODE_AES_256_CTS; - ctx.flags = 0; - } else if (res != sizeof(ctx)) - return -EINVAL; - res = 0; - - crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL); - if (!crypt_info) - return -ENOMEM; - - crypt_info->ci_flags = ctx.flags; - crypt_info->ci_data_mode = ctx.contents_encryption_mode; - crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; - crypt_info->ci_ctfm = NULL; - crypt_info->ci_keyring_key = NULL; - memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, - sizeof(crypt_info->ci_master_key)); - if (S_ISREG(inode->i_mode)) - mode = crypt_info->ci_data_mode; - else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - mode = crypt_info->ci_filename_mode; - else - BUG(); - switch (mode) { - case EXT4_ENCRYPTION_MODE_AES_256_XTS: - cipher_str = "xts(aes)"; - break; - case EXT4_ENCRYPTION_MODE_AES_256_CTS: - cipher_str = "cts(cbc(aes))"; - break; - default: - printk_once(KERN_WARNING - "ext4: unsupported key mode %d (ino %u)\n", - mode, (unsigned) inode->i_ino); - res = -ENOKEY; - goto out; - } - if (DUMMY_ENCRYPTION_ENABLED(sbi)) { - memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE); - goto got_key; - } - memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX, - EXT4_KEY_DESC_PREFIX_SIZE); - sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE, - "%*phN", EXT4_KEY_DESCRIPTOR_SIZE, - ctx.master_key_descriptor); - full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE + - (2 * EXT4_KEY_DESCRIPTOR_SIZE)] = '\0'; - keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); - if (IS_ERR(keyring_key)) { - res = PTR_ERR(keyring_key); - keyring_key = NULL; - goto out; - } - crypt_info->ci_keyring_key = keyring_key; - if (keyring_key->type != &key_type_logon) { - printk_once(KERN_WARNING - "ext4: key type must be logon\n"); - res = -ENOKEY; - goto out; - } - down_read(&keyring_key->sem); - ukp = user_key_payload(keyring_key); - if (ukp->datalen != sizeof(struct ext4_encryption_key)) { - res = -EINVAL; - up_read(&keyring_key->sem); - goto out; - } - master_key = (struct ext4_encryption_key *)ukp->data; - BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != - EXT4_KEY_DERIVATION_NONCE_SIZE); - if (master_key->size != EXT4_AES_256_XTS_KEY_SIZE) { - printk_once(KERN_WARNING - "ext4: key size incorrect: %d\n", - master_key->size); - res = -ENOKEY; - up_read(&keyring_key->sem); - goto out; - } - res = ext4_derive_key_aes(ctx.nonce, master_key->raw, - raw_key); - up_read(&keyring_key->sem); - if (res) - goto out; -got_key: - ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); - if (!ctfm || IS_ERR(ctfm)) { - res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; - printk(KERN_DEBUG - "%s: error %d (inode %u) allocating crypto tfm\n", - __func__, res, (unsigned) inode->i_ino); - goto out; - } - crypt_info->ci_ctfm = ctfm; - crypto_skcipher_clear_flags(ctfm, ~0); - crypto_tfm_set_flags(crypto_skcipher_tfm(ctfm), - CRYPTO_TFM_REQ_WEAK_KEY); - res = crypto_skcipher_setkey(ctfm, raw_key, - ext4_encryption_key_size(mode)); - if (res) - goto out; - memzero_explicit(raw_key, sizeof(raw_key)); - if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) { - ext4_free_crypt_info(crypt_info); - goto retry; - } - return 0; - -out: - if (res == -ENOKEY) - res = 0; - ext4_free_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); - return res; -} - -int ext4_has_encryption_key(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - - return (ei->i_crypt_info != NULL); -} diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c deleted file mode 100644 index ad050698143f..000000000000 --- a/fs/ext4/crypto_policy.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * linux/fs/ext4/crypto_policy.c - * - * Copyright (C) 2015, Google, Inc. - * - * This contains encryption policy functions for ext4 - * - * Written by Michael Halcrow, 2015. - */ - -#include -#include -#include - -#include "ext4_jbd2.h" -#include "ext4.h" -#include "xattr.h" - -static int ext4_inode_has_encryption_context(struct inode *inode) -{ - int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0); - return (res > 0); -} - -/* - * check whether the policy is consistent with the encryption context - * for the inode - */ -static int ext4_is_encryption_context_consistent_with_policy( - struct inode *inode, const struct ext4_encryption_policy *policy) -{ - struct ext4_encryption_context ctx; - int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx)); - if (res != sizeof(ctx)) - return 0; - return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, - EXT4_KEY_DESCRIPTOR_SIZE) == 0 && - (ctx.flags == - policy->flags) && - (ctx.contents_encryption_mode == - policy->contents_encryption_mode) && - (ctx.filenames_encryption_mode == - policy->filenames_encryption_mode)); -} - -static int ext4_create_encryption_context_from_policy( - struct inode *inode, const struct ext4_encryption_policy *policy) -{ - struct ext4_encryption_context ctx; - handle_t *handle; - int res, res2; - - res = ext4_convert_inline_data(inode); - if (res) - return res; - - ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; - memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, - EXT4_KEY_DESCRIPTOR_SIZE); - if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) { - printk(KERN_WARNING - "%s: Invalid contents encryption mode %d\n", __func__, - policy->contents_encryption_mode); - return -EINVAL; - } - if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { - printk(KERN_WARNING - "%s: Invalid filenames encryption mode %d\n", __func__, - policy->filenames_encryption_mode); - return -EINVAL; - } - if (policy->flags & ~EXT4_POLICY_FLAGS_VALID) - return -EINVAL; - ctx.contents_encryption_mode = policy->contents_encryption_mode; - ctx.filenames_encryption_mode = policy->filenames_encryption_mode; - ctx.flags = policy->flags; - BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE); - get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); - - handle = ext4_journal_start(inode, EXT4_HT_MISC, - ext4_jbd2_credits_xattr(inode)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx), 0); - if (!res) { - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - res = ext4_mark_inode_dirty(handle, inode); - if (res) - EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); - } - res2 = ext4_journal_stop(handle); - if (!res) - res = res2; - return res; -} - -int ext4_process_policy(const struct ext4_encryption_policy *policy, - struct inode *inode) -{ - if (policy->version != 0) - return -EINVAL; - - if (!ext4_inode_has_encryption_context(inode)) { - if (!S_ISDIR(inode->i_mode)) - return -EINVAL; - if (!ext4_empty_dir(inode)) - return -ENOTEMPTY; - return ext4_create_encryption_context_from_policy(inode, - policy); - } - - if (ext4_is_encryption_context_consistent_with_policy(inode, policy)) - return 0; - - printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", - __func__); - return -EINVAL; -} - -int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy) -{ - struct ext4_encryption_context ctx; - - int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - &ctx, sizeof(ctx)); - if (res != sizeof(ctx)) - return -ENOENT; - if (ctx.format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1) - return -EINVAL; - policy->version = 0; - policy->contents_encryption_mode = ctx.contents_encryption_mode; - policy->filenames_encryption_mode = ctx.filenames_encryption_mode; - policy->flags = ctx.flags; - memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, - EXT4_KEY_DESCRIPTOR_SIZE); - return 0; -} - -int ext4_is_child_context_consistent_with_parent(struct inode *parent, - struct inode *child) -{ - struct ext4_crypt_info *parent_ci, *child_ci; - int res; - - if ((parent == NULL) || (child == NULL)) { - pr_err("parent %p child %p\n", parent, child); - WARN_ON(1); /* Should never happen */ - return 0; - } - /* no restrictions if the parent directory is not encrypted */ - if (!ext4_encrypted_inode(parent)) - return 1; - /* if the child directory is not encrypted, this is always a problem */ - if (!ext4_encrypted_inode(child)) - return 0; - res = ext4_get_encryption_info(parent); - if (res) - return 0; - res = ext4_get_encryption_info(child); - if (res) - return 0; - parent_ci = EXT4_I(parent)->i_crypt_info; - child_ci = EXT4_I(child)->i_crypt_info; - if (!parent_ci && !child_ci) - return 1; - if (!parent_ci || !child_ci) - return 0; - - return (memcmp(parent_ci->ci_master_key, - child_ci->ci_master_key, - EXT4_KEY_DESCRIPTOR_SIZE) == 0 && - (parent_ci->ci_data_mode == child_ci->ci_data_mode) && - (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && - (parent_ci->ci_flags == child_ci->ci_flags)); -} - -/** - * ext4_inherit_context() - Sets a child context from its parent - * @parent: Parent inode from which the context is inherited. - * @child: Child inode that inherits the context from @parent. - * - * Return: Zero on success, non-zero otherwise - */ -int ext4_inherit_context(struct inode *parent, struct inode *child) -{ - struct ext4_encryption_context ctx; - struct ext4_crypt_info *ci; - int res; - - res = ext4_get_encryption_info(parent); - if (res < 0) - return res; - ci = EXT4_I(parent)->i_crypt_info; - if (ci == NULL) - return -ENOKEY; - - ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; - if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) { - ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; - ctx.filenames_encryption_mode = - EXT4_ENCRYPTION_MODE_AES_256_CTS; - ctx.flags = 0; - memset(ctx.master_key_descriptor, 0x42, - EXT4_KEY_DESCRIPTOR_SIZE); - res = 0; - } else { - ctx.contents_encryption_mode = ci->ci_data_mode; - ctx.filenames_encryption_mode = ci->ci_filename_mode; - ctx.flags = ci->ci_flags; - memcpy(ctx.master_key_descriptor, ci->ci_master_key, - EXT4_KEY_DESCRIPTOR_SIZE); - } - get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); - res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, - sizeof(ctx), 0); - if (!res) { - ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT); - ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA); - res = ext4_get_encryption_info(child); - } - return res; -} diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 68323e3da3fa..67415e0e6af0 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -109,10 +109,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; int dir_has_error = 0; - struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; + struct fscrypt_str fstr = FSTR_INIT(NULL, 0); if (ext4_encrypted_inode(inode)) { - err = ext4_get_encryption_info(inode); + err = fscrypt_get_encryption_info(inode); if (err && err != -ENOKEY) return err; } @@ -139,8 +139,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } if (ext4_encrypted_inode(inode)) { - err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN, - &fname_crypto_str); + err = fscrypt_fname_alloc_buffer(inode, EXT4_NAME_LEN, &fstr); if (err < 0) return err; } @@ -253,16 +252,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) get_dtype(sb, de->file_type))) goto done; } else { - int save_len = fname_crypto_str.len; + int save_len = fstr.len; + struct fscrypt_str de_name = + FSTR_INIT(de->name, + de->name_len); /* Directory is encrypted */ - err = ext4_fname_disk_to_usr(inode, - NULL, de, &fname_crypto_str); - fname_crypto_str.len = save_len; + err = fscrypt_fname_disk_to_usr(inode, + 0, 0, &de_name, &fstr); + fstr.len = save_len; if (err < 0) goto errout; if (!dir_emit(ctx, - fname_crypto_str.name, err, + fstr.name, err, le32_to_cpu(de->inode), get_dtype(sb, de->file_type))) goto done; @@ -281,7 +283,7 @@ done: err = 0; errout: #ifdef CONFIG_EXT4_FS_ENCRYPTION - ext4_fname_crypto_free_buffer(&fname_crypto_str); + fscrypt_fname_free_buffer(&fstr); #endif brelse(bh); return err; @@ -432,7 +434,7 @@ void ext4_htree_free_dir_info(struct dir_private_info *p) int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, __u32 minor_hash, struct ext4_dir_entry_2 *dirent, - struct ext4_str *ent_name) + struct fscrypt_str *ent_name) { struct rb_node **p, *parent = NULL; struct fname *fname, *new_fn; @@ -609,7 +611,7 @@ finished: static int ext4_dir_open(struct inode * inode, struct file * filp) { if (ext4_encrypted_inode(inode)) - return ext4_get_encryption_info(inode) ? -EACCES : 0; + return fscrypt_get_encryption_info(inode) ? -EACCES : 0; return 0; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 96c73e6fec6e..ea31931386ec 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #ifdef __KERNEL__ @@ -608,15 +609,6 @@ enum { #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 -/* Encryption algorithms */ -#define EXT4_ENCRYPTION_MODE_INVALID 0 -#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1 -#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 -#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 -#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 - -#include "ext4_crypto.h" - /* * ioctl commands */ @@ -638,9 +630,9 @@ enum { #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) #define EXT4_IOC_SWAP_BOOT _IO('f', 17) #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) -#define EXT4_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct ext4_encryption_policy) -#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy) +#define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY +#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT +#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY #ifndef FS_IOC_FSGETXATTR /* Until the uapi changes get merged for project quota... */ @@ -1082,10 +1074,6 @@ struct ext4_inode_info { /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ __u32 i_csum_seed; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - /* Encryption params */ - struct ext4_crypt_info *i_crypt_info; -#endif kprojid_t i_projid; }; @@ -1344,6 +1332,11 @@ struct ext4_super_block { /* Number of quota types we support */ #define EXT4_MAXQUOTAS 3 +#ifdef CONFIG_EXT4_FS_ENCRYPTION +#define EXT4_KEY_DESC_PREFIX "ext4:" +#define EXT4_KEY_DESC_PREFIX_SIZE 5 +#endif + /* * fourth extended-fs super-block data in memory */ @@ -1513,6 +1506,12 @@ struct ext4_sb_info { /* Barrier between changing inodes' journal flags and writepages ops. */ struct percpu_rw_semaphore s_journal_flag_rwsem; + + /* Encryption support */ +#ifdef CONFIG_EXT4_FS_ENCRYPTION + u8 key_prefix[EXT4_KEY_DESC_PREFIX_SIZE]; + u8 key_prefix_size; +#endif }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1611,15 +1610,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) /* * Returns true if the inode is inode is encrypted */ -static inline int ext4_encrypted_inode(struct inode *inode) -{ -#ifdef CONFIG_EXT4_FS_ENCRYPTION - return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT); -#else - return 0; -#endif -} - #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime /* @@ -2083,10 +2073,10 @@ struct dx_hash_info struct ext4_filename { const struct qstr *usr_fname; - struct ext4_str disk_name; + struct fscrypt_str disk_name; struct dx_hash_info hinfo; #ifdef CONFIG_EXT4_FS_ENCRYPTION - struct ext4_str crypto_buf; + struct fscrypt_str crypto_buf; #endif }; @@ -2297,81 +2287,51 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -/* crypto_policy.c */ -int ext4_is_child_context_consistent_with_parent(struct inode *parent, - struct inode *child); -int ext4_inherit_context(struct inode *parent, struct inode *child); -void ext4_to_hex(char *dst, char *src, size_t src_size); -int ext4_process_policy(const struct ext4_encryption_policy *policy, - struct inode *inode); -int ext4_get_policy(struct inode *inode, - struct ext4_encryption_policy *policy); - -/* crypto.c */ -extern struct kmem_cache *ext4_crypt_info_cachep; -bool ext4_valid_contents_enc_mode(uint32_t mode); -uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); -extern struct workqueue_struct *ext4_read_workqueue; -struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode, - gfp_t gfp_flags); -void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx); -void ext4_restore_control_page(struct page *data_page); -struct page *ext4_encrypt(struct inode *inode, - struct page *plaintext_page, - gfp_t gfp_flags); -int ext4_decrypt(struct page *page); -int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, - ext4_fsblk_t pblk, ext4_lblk_t len); -extern const struct dentry_operations ext4_encrypted_d_ops; - -#ifdef CONFIG_EXT4_FS_ENCRYPTION -int ext4_init_crypto(void); -void ext4_exit_crypto(void); static inline int ext4_sb_has_crypto(struct super_block *sb) { return ext4_has_feature_encrypt(sb); } -#else -static inline int ext4_init_crypto(void) { return 0; } -static inline void ext4_exit_crypto(void) { } -static inline int ext4_sb_has_crypto(struct super_block *sb) -{ - return 0; -} -#endif -/* crypto_fname.c */ -bool ext4_valid_filenames_enc_mode(uint32_t mode); -u32 ext4_fname_crypto_round_up(u32 size, u32 blksize); -unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen); -int ext4_fname_crypto_alloc_buffer(struct inode *inode, - u32 ilen, struct ext4_str *crypto_str); -int _ext4_fname_disk_to_usr(struct inode *inode, - struct dx_hash_info *hinfo, - const struct ext4_str *iname, - struct ext4_str *oname); -int ext4_fname_disk_to_usr(struct inode *inode, - struct dx_hash_info *hinfo, - const struct ext4_dir_entry_2 *de, - struct ext4_str *oname); -int ext4_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct ext4_str *oname); -#ifdef CONFIG_EXT4_FS_ENCRYPTION -void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str); -int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, - int lookup, struct ext4_filename *fname); -void ext4_fname_free_filename(struct ext4_filename *fname); -#else -static inline -int ext4_setup_fname_crypto(struct inode *inode) +static inline bool ext4_encrypted_inode(struct inode *inode) { - return 0; + return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT); } -static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { } + +#ifdef CONFIG_EXT4_FS_ENCRYPTION static inline int ext4_fname_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct ext4_filename *fname) + const struct qstr *iname, + int lookup, struct ext4_filename *fname) +{ + struct fscrypt_name name; + int err; + + memset(fname, 0, sizeof(struct ext4_filename)); + + err = fscrypt_setup_filename(dir, iname, lookup, &name); + + fname->usr_fname = name.usr_fname; + fname->disk_name = name.disk_name; + fname->hinfo.hash = name.hash; + fname->hinfo.minor_hash = name.minor_hash; + fname->crypto_buf = name.crypto_buf; + return err; +} + +static inline void ext4_fname_free_filename(struct ext4_filename *fname) +{ + struct fscrypt_name name; + + name.crypto_buf = fname->crypto_buf; + fscrypt_free_filename(&name); + + fname->crypto_buf.name = NULL; + fname->usr_fname = NULL; + fname->disk_name.name = NULL; +} +#else +static inline int ext4_fname_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct ext4_filename *fname) { fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; @@ -2379,51 +2339,31 @@ static inline int ext4_fname_setup_filename(struct inode *dir, return 0; } static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } + +#define fscrypt_set_d_op(i) +#define fscrypt_get_ctx fscrypt_notsupp_get_ctx +#define fscrypt_release_ctx fscrypt_notsupp_release_ctx +#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page +#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page +#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages +#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page +#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page +#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range +#define fscrypt_process_policy fscrypt_notsupp_process_policy +#define fscrypt_get_policy fscrypt_notsupp_get_policy +#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context +#define fscrypt_inherit_context fscrypt_notsupp_inherit_context +#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info +#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info +#define fscrypt_setup_filename fscrypt_notsupp_setup_filename +#define fscrypt_free_filename fscrypt_notsupp_free_filename +#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size +#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer +#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer +#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr +#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk #endif - -/* crypto_key.c */ -void ext4_free_crypt_info(struct ext4_crypt_info *ci); -void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci); -int _ext4_get_encryption_info(struct inode *inode); - -#ifdef CONFIG_EXT4_FS_ENCRYPTION -int ext4_has_encryption_key(struct inode *inode); - -static inline int ext4_get_encryption_info(struct inode *inode) -{ - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - - if (!ci || - (ci->ci_keyring_key && - (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_DEAD))))) - return _ext4_get_encryption_info(inode); - return 0; -} - -static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) -{ - return EXT4_I(inode)->i_crypt_info; -} - -#else -static inline int ext4_has_encryption_key(struct inode *inode) -{ - return 0; -} -static inline int ext4_get_encryption_info(struct inode *inode) -{ - return 0; -} -static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) -{ - return NULL; -} -#endif - - /* dir.c */ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, struct file *, @@ -2436,7 +2376,7 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, __u32 minor_hash, struct ext4_dir_entry_2 *dirent, - struct ext4_str *ent_name); + struct fscrypt_str *ent_name); extern void ext4_htree_free_dir_info(struct dir_private_info *p); extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, struct buffer_head *bh, @@ -2624,7 +2564,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, void *entry_buf, int buf_size, int csum_size); -extern int ext4_empty_dir(struct inode *inode); +extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ extern int ext4_group_add(struct super_block *sb, @@ -3106,7 +3046,7 @@ extern int ext4_delete_inline_entry(handle_t *handle, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, int *has_inline_data); -extern int empty_inline_dir(struct inode *dir, int *has_inline_data); +extern bool empty_inline_dir(struct inode *dir, int *has_inline_data); extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, struct ext4_dir_entry_2 **parent_de, int *retval); diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h deleted file mode 100644 index 1f73c29717e1..000000000000 --- a/fs/ext4/ext4_crypto.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - * linux/fs/ext4/ext4_crypto.h - * - * Copyright (C) 2015, Google, Inc. - * - * This contains encryption header content for ext4 - * - * Written by Michael Halcrow, 2015. - */ - -#ifndef _EXT4_CRYPTO_H -#define _EXT4_CRYPTO_H - -#include - -#define EXT4_KEY_DESCRIPTOR_SIZE 8 - -/* Policy provided via an ioctl on the topmost directory */ -struct ext4_encryption_policy { - char version; - char contents_encryption_mode; - char filenames_encryption_mode; - char flags; - char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; -} __attribute__((__packed__)); - -#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1 -#define EXT4_KEY_DERIVATION_NONCE_SIZE 16 - -#define EXT4_POLICY_FLAGS_PAD_4 0x00 -#define EXT4_POLICY_FLAGS_PAD_8 0x01 -#define EXT4_POLICY_FLAGS_PAD_16 0x02 -#define EXT4_POLICY_FLAGS_PAD_32 0x03 -#define EXT4_POLICY_FLAGS_PAD_MASK 0x03 -#define EXT4_POLICY_FLAGS_VALID 0x03 - -/** - * Encryption context for inode - * - * Protector format: - * 1 byte: Protector format (1 = this version) - * 1 byte: File contents encryption mode - * 1 byte: File names encryption mode - * 1 byte: Reserved - * 8 bytes: Master Key descriptor - * 16 bytes: Encryption Key derivation nonce - */ -struct ext4_encryption_context { - char format; - char contents_encryption_mode; - char filenames_encryption_mode; - char flags; - char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; - char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE]; -} __attribute__((__packed__)); - -/* Encryption parameters */ -#define EXT4_XTS_TWEAK_SIZE 16 -#define EXT4_AES_128_ECB_KEY_SIZE 16 -#define EXT4_AES_256_GCM_KEY_SIZE 32 -#define EXT4_AES_256_CBC_KEY_SIZE 32 -#define EXT4_AES_256_CTS_KEY_SIZE 32 -#define EXT4_AES_256_XTS_KEY_SIZE 64 -#define EXT4_MAX_KEY_SIZE 64 - -#define EXT4_KEY_DESC_PREFIX "ext4:" -#define EXT4_KEY_DESC_PREFIX_SIZE 5 - -/* This is passed in from userspace into the kernel keyring */ -struct ext4_encryption_key { - __u32 mode; - char raw[EXT4_MAX_KEY_SIZE]; - __u32 size; -} __attribute__((__packed__)); - -struct ext4_crypt_info { - char ci_data_mode; - char ci_filename_mode; - char ci_flags; - struct crypto_skcipher *ci_ctfm; - struct key *ci_keyring_key; - char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE]; -}; - -#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define EXT4_WRITE_PATH_FL 0x00000002 - -struct ext4_crypto_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - char flags; /* Flags */ - char mode; /* Encryption mode for tfm */ -}; - -struct ext4_completion_result { - struct completion completion; - int res; -}; - -#define DECLARE_EXT4_COMPLETION_RESULT(ecr) \ - struct ext4_completion_result ecr = { \ - COMPLETION_INITIALIZER((ecr).completion), 0 } - -static inline int ext4_encryption_key_size(int mode) -{ - switch (mode) { - case EXT4_ENCRYPTION_MODE_AES_256_XTS: - return EXT4_AES_256_XTS_KEY_SIZE; - case EXT4_ENCRYPTION_MODE_AES_256_GCM: - return EXT4_AES_256_GCM_KEY_SIZE; - case EXT4_ENCRYPTION_MODE_AES_256_CBC: - return EXT4_AES_256_CBC_KEY_SIZE; - case EXT4_ENCRYPTION_MODE_AES_256_CTS: - return EXT4_AES_256_CTS_KEY_SIZE; - default: - BUG(); - } - return 0; -} - -#define EXT4_FNAME_NUM_SCATTER_ENTRIES 4 -#define EXT4_CRYPTO_BLOCK_SIZE 16 -#define EXT4_FNAME_CRYPTO_DIGEST_SIZE 32 - -struct ext4_str { - unsigned char *name; - u32 len; -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct ext4_encrypted_symlink_data { - __le16 len; - char encrypted_path[1]; -} __attribute__((__packed__)); - -/** - * This function is used to calculate the disk space required to - * store a filename of length l in encrypted symlink format. - */ -static inline u32 encrypted_symlink_data_len(u32 l) -{ - if (l < EXT4_CRYPTO_BLOCK_SIZE) - l = EXT4_CRYPTO_BLOCK_SIZE; - return (l + sizeof(struct ext4_encrypted_symlink_data) - 1); -} - -#endif /* _EXT4_CRYPTO_H */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index df44c877892a..4f615cdd22ca 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -303,10 +303,10 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file->f_mapping->host; if (ext4_encrypted_inode(inode)) { - int err = ext4_get_encryption_info(inode); + int err = fscrypt_get_encryption_info(inode); if (err) return 0; - if (ext4_encryption_info(inode) == NULL) + if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } file_accessed(file); @@ -362,16 +362,16 @@ static int ext4_file_open(struct inode * inode, struct file * filp) } } if (ext4_encrypted_inode(inode)) { - ret = ext4_get_encryption_info(inode); + ret = fscrypt_get_encryption_info(inode); if (ret) return -EACCES; - if (ext4_encryption_info(inode) == NULL) + if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } dir = dget_parent(file_dentry(filp)); if (ext4_encrypted_inode(d_inode(dir)) && - !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { + !fscrypt_has_permitted_context(d_inode(dir), inode)) { ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", (unsigned long) d_inode(dir)->i_ino, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3da4cf8d18b6..35f351895b89 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -767,10 +767,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { - err = ext4_get_encryption_info(dir); + err = fscrypt_get_encryption_info(dir); if (err) return ERR_PTR(err); - if (ext4_encryption_info(dir) == NULL) + if (!fscrypt_has_encryption_key(dir)) return ERR_PTR(-EPERM); if (!handle) nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb); @@ -1115,7 +1115,8 @@ got: } if (encrypt) { - err = ext4_inherit_context(dir, inode); + /* give pointer to avoid set_context with journal ops. */ + err = fscrypt_inherit_context(dir, inode, &encrypt, true); if (err) goto fail_free_drop; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index ff7538c26992..f74d5ee2cdec 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1326,7 +1326,7 @@ int htree_inlinedir_to_tree(struct file *dir_file, struct ext4_iloc iloc; void *dir_buf = NULL; struct ext4_dir_entry_2 fake; - struct ext4_str tmp_str; + struct fscrypt_str tmp_str; ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -1739,20 +1739,20 @@ ext4_get_inline_entry(struct inode *inode, return (struct ext4_dir_entry_2 *)(inline_pos + offset); } -int empty_inline_dir(struct inode *dir, int *has_inline_data) +bool empty_inline_dir(struct inode *dir, int *has_inline_data) { int err, inline_size; struct ext4_iloc iloc; void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; - int ret = 1; + bool ret = true; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE(dir, "error %d getting inode %lu block", err, dir->i_ino); - return 1; + return true; } down_read(&EXT4_I(dir)->xattr_sem); @@ -1766,7 +1766,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); - ret = 1; + ret = true; goto out; } @@ -1784,11 +1784,11 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); - ret = 1; + ret = true; goto out; } if (le32_to_cpu(de->inode)) { - ret = 0; + ret = false; goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ea39d191dbcb..5a6277d80f7c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -392,7 +392,7 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, int ret; if (ext4_encrypted_inode(inode)) - return ext4_encrypted_zeroout(inode, lblk, pblk, len); + return fscrypt_zeroout_range(inode, lblk, pblk, len); ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); if (ret > 0) @@ -1158,7 +1158,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (unlikely(err)) page_zero_new_buffers(page, from, to); else if (decrypt) - err = ext4_decrypt(page); + err = fscrypt_decrypt_page(page); return err; } #endif @@ -3735,9 +3735,9 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode)) { /* We expect the key to be set. */ - BUG_ON(!ext4_has_encryption_key(inode)); + BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_SIZE); - WARN_ON_ONCE(ext4_decrypt(page)); + WARN_ON_ONCE(fscrypt_decrypt_page(page)); } } if (ext4_should_journal_data(inode)) { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b5a39b00265e..10686fd67fb4 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -770,19 +770,13 @@ resizefs_out: return ext4_ext_precache(inode); case EXT4_IOC_SET_ENCRYPTION_POLICY: { #ifdef CONFIG_EXT4_FS_ENCRYPTION - struct ext4_encryption_policy policy; - int err = 0; + struct fscrypt_policy policy; if (copy_from_user(&policy, - (struct ext4_encryption_policy __user *)arg, - sizeof(policy))) { - err = -EFAULT; - goto encryption_policy_out; - } - - err = ext4_process_policy(&policy, inode); -encryption_policy_out: - return err; + (struct fscrypt_policy __user *)arg, + sizeof(policy))) + return -EFAULT; + return fscrypt_process_policy(inode, &policy); #else return -EOPNOTSUPP; #endif @@ -825,12 +819,12 @@ encryption_policy_out: } case EXT4_IOC_GET_ENCRYPTION_POLICY: { #ifdef CONFIG_EXT4_FS_ENCRYPTION - struct ext4_encryption_policy policy; + struct fscrypt_policy policy; int err = 0; if (!ext4_encrypted_inode(inode)) return -ENOENT; - err = ext4_get_policy(inode, &policy); + err = fscrypt_get_policy(inode, &policy); if (err) return err; if (copy_to_user((void __user *)arg, &policy, sizeof(policy))) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 94d22e78a7dd..4637c439ca54 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -611,19 +611,19 @@ static struct stats dx_show_leaf(struct inode *dir, #ifdef CONFIG_EXT4_FS_ENCRYPTION int len; char *name; - struct ext4_str fname_crypto_str - = {.name = NULL, .len = 0}; + struct fscrypt_str fname_crypto_str = + FSTR_INIT(NULL, 0); int res = 0; name = de->name; len = de->name_len; - if (ext4_encrypted_inode(inode)) - res = ext4_get_encryption_info(dir); + if (ext4_encrypted_inode(dir)) + res = fscrypt_get_encryption_info(dir); if (res) { printk(KERN_WARNING "Error setting up" " fname crypto: %d\n", res); } - if (ctx == NULL) { + if (!fscrypt_has_encryption_key(dir)) { /* Directory is not encrypted */ ext4fs_dirhash(de->name, de->name_len, &h); @@ -632,19 +632,21 @@ static struct stats dx_show_leaf(struct inode *dir, (unsigned) ((char *) de - base)); } else { + struct fscrypt_str de_name = + FSTR_INIT(name, len); + /* Directory is encrypted */ - res = ext4_fname_crypto_alloc_buffer( - ctx, de->name_len, + res = fscrypt_fname_alloc_buffer( + dir, len, &fname_crypto_str); - if (res < 0) { + if (res < 0) printk(KERN_WARNING "Error " "allocating crypto " "buffer--skipping " "crypto\n"); - ctx = NULL; - } - res = ext4_fname_disk_to_usr(ctx, NULL, de, - &fname_crypto_str); + res = fscrypt_fname_disk_to_usr(dir, + 0, 0, &de_name, + &fname_crypto_str); if (res < 0) { printk(KERN_WARNING "Error " "converting filename " @@ -661,8 +663,8 @@ static struct stats dx_show_leaf(struct inode *dir, printk("%*.s:(E)%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); - ext4_fname_crypto_free_buffer( - &fname_crypto_str); + fscrypt_fname_free_buffer( + &fname_crypto_str); } #else int len = de->name_len; @@ -951,7 +953,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, struct buffer_head *bh; struct ext4_dir_entry_2 *de, *top; int err = 0, count = 0; - struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str; + struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str; dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); @@ -966,12 +968,12 @@ static int htree_dirblock_to_tree(struct file *dir_file, #ifdef CONFIG_EXT4_FS_ENCRYPTION /* Check if the directory is encrypted */ if (ext4_encrypted_inode(dir)) { - err = ext4_get_encryption_info(dir); + err = fscrypt_get_encryption_info(dir); if (err < 0) { brelse(bh); return err; } - err = ext4_fname_crypto_alloc_buffer(dir, EXT4_NAME_LEN, + err = fscrypt_fname_alloc_buffer(dir, EXT4_NAME_LEN, &fname_crypto_str); if (err < 0) { brelse(bh); @@ -1002,10 +1004,13 @@ static int htree_dirblock_to_tree(struct file *dir_file, &tmp_str); } else { int save_len = fname_crypto_str.len; + struct fscrypt_str de_name = FSTR_INIT(de->name, + de->name_len); /* Directory is encrypted */ - err = ext4_fname_disk_to_usr(dir, hinfo, de, - &fname_crypto_str); + err = fscrypt_fname_disk_to_usr(dir, hinfo->hash, + hinfo->minor_hash, &de_name, + &fname_crypto_str); if (err < 0) { count = err; goto errout; @@ -1024,7 +1029,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, errout: brelse(bh); #ifdef CONFIG_EXT4_FS_ENCRYPTION - ext4_fname_crypto_free_buffer(&fname_crypto_str); + fscrypt_fname_free_buffer(&fname_crypto_str); #endif return count; } @@ -1049,7 +1054,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, int count = 0; int ret, err; __u32 hashval; - struct ext4_str tmp_str; + struct fscrypt_str tmp_str; dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); @@ -1562,26 +1567,23 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct ext4_dir_entry_2 *de; struct buffer_head *bh; - if (ext4_encrypted_inode(dir)) { - int res = ext4_get_encryption_info(dir); + if (ext4_encrypted_inode(dir)) { + int res = fscrypt_get_encryption_info(dir); /* - * This should be a properly defined flag for - * dentry->d_flags when we uplift this to the VFS. - * d_fsdata is set to (void *) 1 if if the dentry is + * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is * created while the directory was encrypted and we - * don't have access to the key. + * have access to the key. */ - dentry->d_fsdata = NULL; - if (ext4_encryption_info(dir)) - dentry->d_fsdata = (void *) 1; - d_set_d_op(dentry, &ext4_encrypted_d_ops); - if (res && res != -ENOKEY) - return ERR_PTR(res); - } + if (fscrypt_has_encryption_key(dir)) + fscrypt_set_encrypted_dentry(dentry); + fscrypt_set_d_op(dentry); + if (res && res != -ENOKEY) + return ERR_PTR(res); + } - if (dentry->d_name.len > EXT4_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); + if (dentry->d_name.len > EXT4_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); if (IS_ERR(bh)) @@ -1608,11 +1610,9 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi } if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && - !ext4_is_child_context_consistent_with_parent(dir, - inode)) { + !fscrypt_has_permitted_context(dir, inode)) { int nokey = ext4_encrypted_inode(inode) && - !ext4_encryption_info(inode); - + !fscrypt_has_encryption_key(inode); iput(inode); if (nokey) return ERR_PTR(-ENOKEY); @@ -2689,30 +2689,30 @@ out_stop: /* * routine to check that the specified directory is empty (for rmdir) */ -int ext4_empty_dir(struct inode *inode) +bool ext4_empty_dir(struct inode *inode) { unsigned int offset; struct buffer_head *bh; struct ext4_dir_entry_2 *de, *de1; struct super_block *sb; - int err = 0; if (ext4_has_inline_data(inode)) { int has_inline_data = 1; + int ret; - err = empty_inline_dir(inode, &has_inline_data); + ret = empty_inline_dir(inode, &has_inline_data); if (has_inline_data) - return err; + return ret; } sb = inode->i_sb; if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) { EXT4_ERROR_INODE(inode, "invalid size"); - return 1; + return true; } bh = ext4_read_dirblock(inode, 0, EITHER); if (IS_ERR(bh)) - return 1; + return true; de = (struct ext4_dir_entry_2 *) bh->b_data; de1 = ext4_next_entry(de, sb->s_blocksize); @@ -2721,7 +2721,7 @@ int ext4_empty_dir(struct inode *inode) strcmp(".", de->name) || strcmp("..", de1->name)) { ext4_warning_inode(inode, "directory missing '.' and/or '..'"); brelse(bh); - return 1; + return true; } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) + ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); @@ -2729,12 +2729,11 @@ int ext4_empty_dir(struct inode *inode) while (offset < inode->i_size) { if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { unsigned int lblock; - err = 0; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); if (IS_ERR(bh)) - return 1; + return true; de = (struct ext4_dir_entry_2 *) bh->b_data; } if (ext4_check_dir_entry(inode, NULL, de, bh, @@ -2746,13 +2745,13 @@ int ext4_empty_dir(struct inode *inode) } if (le32_to_cpu(de->inode)) { brelse(bh); - return 0; + return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); } brelse(bh); - return 1; + return true; } /* @@ -3075,8 +3074,8 @@ static int ext4_symlink(struct inode *dir, int err, len = strlen(symname); int credits; bool encryption_required; - struct ext4_str disk_link; - struct ext4_encrypted_symlink_data *sd = NULL; + struct fscrypt_str disk_link; + struct fscrypt_symlink_data *sd = NULL; disk_link.len = len + 1; disk_link.name = (char *) symname; @@ -3084,13 +3083,13 @@ static int ext4_symlink(struct inode *dir, encryption_required = (ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))); if (encryption_required) { - err = ext4_get_encryption_info(dir); + err = fscrypt_get_encryption_info(dir); if (err) return err; - if (ext4_encryption_info(dir) == NULL) + if (!fscrypt_has_encryption_key(dir)) return -EPERM; - disk_link.len = (ext4_fname_encrypted_size(dir, len) + - sizeof(struct ext4_encrypted_symlink_data)); + disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + + sizeof(struct fscrypt_symlink_data)); sd = kzalloc(disk_link.len, GFP_KERNEL); if (!sd) return -ENOMEM; @@ -3138,13 +3137,12 @@ static int ext4_symlink(struct inode *dir, if (encryption_required) { struct qstr istr; - struct ext4_str ostr; + struct fscrypt_str ostr = + FSTR_INIT(sd->encrypted_path, disk_link.len); istr.name = (const unsigned char *) symname; istr.len = len; - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - err = ext4_fname_usr_to_disk(inode, &istr, &ostr); + err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); if (err < 0) goto err_drop_inode; sd->len = cpu_to_le16(ostr.len); @@ -3233,7 +3231,7 @@ static int ext4_link(struct dentry *old_dentry, if (inode->i_nlink >= EXT4_LINK_MAX) return -EMLINK; if (ext4_encrypted_inode(dir) && - !ext4_is_child_context_consistent_with_parent(dir, inode)) + !fscrypt_has_permitted_context(dir, inode)) return -EPERM; if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && @@ -3556,8 +3554,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if ((old.dir != new.dir) && ext4_encrypted_inode(new.dir) && - !ext4_is_child_context_consistent_with_parent(new.dir, - old.inode)) { + !fscrypt_has_permitted_context(new.dir, old.inode)) { retval = -EPERM; goto end_rename; } @@ -3729,10 +3726,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if ((ext4_encrypted_inode(old_dir) || ext4_encrypted_inode(new_dir)) && (old_dir != new_dir) && - (!ext4_is_child_context_consistent_with_parent(new_dir, - old.inode) || - !ext4_is_child_context_consistent_with_parent(old_dir, - new.inode))) + (!fscrypt_has_permitted_context(new_dir, old.inode) || + !fscrypt_has_permitted_context(old_dir, new.inode))) return -EPERM; if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) && diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 2a01df9cc1c3..5ad05af51dd8 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" @@ -67,7 +68,6 @@ static void ext4_finish_bio(struct bio *bio) struct page *page = bvec->bv_page; #ifdef CONFIG_EXT4_FS_ENCRYPTION struct page *data_page = NULL; - struct ext4_crypto_ctx *ctx = NULL; #endif struct buffer_head *bh, *head; unsigned bio_start = bvec->bv_offset; @@ -82,8 +82,7 @@ static void ext4_finish_bio(struct bio *bio) if (!page->mapping) { /* The bounce data pages are unmapped. */ data_page = page; - ctx = (struct ext4_crypto_ctx *)page_private(data_page); - page = ctx->w.control_page; + fscrypt_pullback_bio_page(&page, false); } #endif @@ -113,8 +112,8 @@ static void ext4_finish_bio(struct bio *bio) local_irq_restore(flags); if (!under_io) { #ifdef CONFIG_EXT4_FS_ENCRYPTION - if (ctx) - ext4_restore_control_page(data_page); + if (data_page) + fscrypt_restore_control_page(data_page); #endif end_page_writeback(page); } @@ -472,7 +471,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; retry_encrypt: - data_page = ext4_encrypt(inode, page, gfp_flags); + data_page = fscrypt_encrypt_page(inode, page, gfp_flags); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { @@ -510,7 +509,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, if (ret) { out: if (data_page) - ext4_restore_control_page(data_page); + fscrypt_restore_control_page(data_page); printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); redirty_page_for_writepage(wbc, page); do { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index e24ec3bfe1b5..18b2cf23d40f 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -46,37 +46,6 @@ #include "ext4.h" -/* - * Call ext4_decrypt on every single page, reusing the encryption - * context. - */ -static void completion_pages(struct work_struct *work) -{ -#ifdef CONFIG_EXT4_FS_ENCRYPTION - struct ext4_crypto_ctx *ctx = - container_of(work, struct ext4_crypto_ctx, r.work); - struct bio *bio = ctx->r.bio; - struct bio_vec *bv; - int i; - - bio_for_each_segment_all(bv, bio, i) { - struct page *page = bv->bv_page; - - int ret = ext4_decrypt(page); - if (ret) { - WARN_ON_ONCE(1); - SetPageError(page); - } else - SetPageUptodate(page); - unlock_page(page); - } - ext4_release_crypto_ctx(ctx); - bio_put(bio); -#else - BUG(); -#endif -} - static inline bool ext4_bio_encrypted(struct bio *bio) { #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -104,14 +73,10 @@ static void mpage_end_io(struct bio *bio) int i; if (ext4_bio_encrypted(bio)) { - struct ext4_crypto_ctx *ctx = bio->bi_private; - if (bio->bi_error) { - ext4_release_crypto_ctx(ctx); + fscrypt_release_ctx(bio->bi_private); } else { - INIT_WORK(&ctx->r.work, completion_pages); - ctx->r.bio = bio; - queue_work(ext4_read_workqueue, &ctx->r.work); + fscrypt_decrypt_bio_pages(bio->bi_private, bio); return; } } @@ -274,11 +239,11 @@ int ext4_mpage_readpages(struct address_space *mapping, bio = NULL; } if (bio == NULL) { - struct ext4_crypto_ctx *ctx = NULL; + struct fscrypt_ctx *ctx = NULL; if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - ctx = ext4_get_crypto_ctx(inode, GFP_NOFS); + ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) goto set_error_page; } @@ -286,7 +251,7 @@ int ext4_mpage_readpages(struct address_space *mapping, min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) { if (ctx) - ext4_release_crypto_ctx(ctx); + fscrypt_release_ctx(ctx); goto set_error_page; } bio->bi_bdev = bdev; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 13c49af7a06a..1e3fd5c9a72b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -945,9 +945,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_datasync_tid = 0; atomic_set(&ei->i_unwritten, 0); INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); -#ifdef CONFIG_EXT4_FS_ENCRYPTION - ei->i_crypt_info = NULL; -#endif return &ei->vfs_inode; } @@ -1026,8 +1023,7 @@ void ext4_clear_inode(struct inode *inode) EXT4_I(inode)->jinode = NULL; } #ifdef CONFIG_EXT4_FS_ENCRYPTION - if (EXT4_I(inode)->i_crypt_info) - ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info); + fscrypt_put_encryption_info(inode, NULL); #endif } @@ -1094,6 +1090,90 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, return try_to_free_buffers(page); } +#ifdef CONFIG_EXT4_FS_ENCRYPTION +static int ext4_get_context(struct inode *inode, void *ctx, size_t len) +{ + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); +} + +static int ext4_key_prefix(struct inode *inode, u8 **key) +{ + *key = EXT4_SB(inode->i_sb)->key_prefix; + return EXT4_SB(inode->i_sb)->key_prefix_size; +} + +static int ext4_prepare_context(struct inode *inode) +{ + return ext4_convert_inline_data(inode); +} + +static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, + void *fs_data) +{ + handle_t *handle; + int res, res2; + + /* fs_data is null when internally used. */ + if (fs_data) { + res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, + len, 0); + if (!res) { + ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); + ext4_clear_inode_state(inode, + EXT4_STATE_MAY_INLINE_DATA); + } + return res; + } + + handle = ext4_journal_start(inode, EXT4_HT_MISC, + ext4_jbd2_credits_xattr(inode)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, + len, 0); + if (!res) { + ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); + res = ext4_mark_inode_dirty(handle, inode); + if (res) + EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); + } + res2 = ext4_journal_stop(handle); + if (!res) + res = res2; + return res; +} + +static int ext4_dummy_context(struct inode *inode) +{ + return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); +} + +static unsigned ext4_max_namelen(struct inode *inode) +{ + return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : + EXT4_NAME_LEN; +} + +static struct fscrypt_operations ext4_cryptops = { + .get_context = ext4_get_context, + .key_prefix = ext4_key_prefix, + .prepare_context = ext4_prepare_context, + .set_context = ext4_set_context, + .dummy_context = ext4_dummy_context, + .is_encrypted = ext4_encrypted_inode, + .empty_dir = ext4_empty_dir, + .max_namelen = ext4_max_namelen, +}; +#else +static struct fscrypt_operations ext4_cryptops = { + .is_encrypted = ext4_encrypted_inode, +}; +#endif + #ifdef CONFIG_QUOTA static char *quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) @@ -3693,6 +3773,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; + sb->s_cop = &ext4_cryptops; #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (ext4_has_feature_quota(sb)) @@ -4003,6 +4084,11 @@ no_journal: ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); kfree(orig_data); +#ifdef CONFIG_EXT4_FS_ENCRYPTION + memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX, + EXT4_KEY_DESC_PREFIX_SIZE); + sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE; +#endif return 0; cantfind_ext4: @@ -5431,7 +5517,6 @@ out5: static void __exit ext4_exit_fs(void) { - ext4_exit_crypto(); ext4_destroy_lazyinit_thread(); unregister_as_ext2(); unregister_as_ext3(); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 75ed5c2f0c16..4d83d9e05f2e 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -22,23 +22,22 @@ #include "ext4.h" #include "xattr.h" -#ifdef CONFIG_EXT4_FS_ENCRYPTION static const char *ext4_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct page *cpage = NULL; char *caddr, *paddr = NULL; - struct ext4_str cstr, pstr; - struct ext4_encrypted_symlink_data *sd; + struct fscrypt_str cstr, pstr; + struct fscrypt_symlink_data *sd; loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); int res; - u32 plen, max_size = inode->i_sb->s_blocksize; + u32 max_size = inode->i_sb->s_blocksize; if (!dentry) return ERR_PTR(-ECHILD); - res = ext4_get_encryption_info(inode); + res = fscrypt_get_encryption_info(inode); if (res) return ERR_PTR(res); @@ -54,30 +53,27 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, } /* Symlink is encrypted */ - sd = (struct ext4_encrypted_symlink_data *)caddr; + sd = (struct fscrypt_symlink_data *)caddr; cstr.name = sd->encrypted_path; cstr.len = le16_to_cpu(sd->len); - if ((cstr.len + - sizeof(struct ext4_encrypted_symlink_data) - 1) > - max_size) { + if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { /* Symlink data on the disk is corrupted */ res = -EFSCORRUPTED; goto errout; } - plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ? - EXT4_FNAME_CRYPTO_DIGEST_SIZE*2 : cstr.len; - paddr = kmalloc(plen + 1, GFP_NOFS); - if (!paddr) { - res = -ENOMEM; + + res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); + if (res) goto errout; - } - pstr.name = paddr; - pstr.len = plen; - res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr); + + res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); if (res < 0) goto errout; + + paddr = pstr.name; + /* Null-terminate the name */ - if (res <= plen) + if (res <= pstr.len) paddr[res] = '\0'; if (cpage) put_page(cpage); @@ -99,7 +95,6 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = { .listxattr = ext4_listxattr, .removexattr = generic_removexattr, }; -#endif const struct inode_operations ext4_symlink_inode_operations = { .readlink = generic_readlink, From 598c7d7abc832e35677b851f6afb93141c09993b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Jul 2016 22:44:13 -0400 Subject: [PATCH 18/21] MAINTAINRES: fs-crypto maintainers update Signed-off-by: Theodore Ts'o Cc: Jaegeuk Kim --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e37a98..b2234f85eaca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4925,6 +4925,13 @@ F: Documentation/filesystems/caching/ F: fs/fscache/ F: include/linux/fscache*.h +FS-CRYPTO: FILE SYSTEM LEVEL ENCRYPTION SUPPORT +M: Theodore Y. Ts'o +M: Jaegeuk Kim +S: Supported +F: fs/crypto/ +F: include/linux/fscrypto.h + F2FS FILE SYSTEM M: Jaegeuk Kim M: Changman Lee From 554a5ccc4e4a20c5f3ec859de0842db4b4b9c77e Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 14 Jul 2016 23:02:47 -0400 Subject: [PATCH 19/21] ext4: fix reference counting bug on block allocation error If we hit this error when mounted with errors=continue or errors=remount-ro: EXT4-fs error (device loop0): ext4_mb_mark_diskspace_used:2940: comm ext4.exe: Allocating blocks 5090-6081 which overlap fs metadata then ext4_mb_new_blocks() will call ext4_mb_release_context() and try to continue. However, ext4_mb_release_context() is the wrong thing to call here since we are still actually using the allocation context. Instead, just error out. We could retry the allocation, but there is a possibility of getting stuck in an infinite loop instead, so this seems safer. [ Fixed up so we don't return EAGAIN to userspace. --tytso ] Fixes: 8556e8f3b6 ("ext4: Don't allow new groups to be added during block allocation") Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Cc: Aneesh Kumar K.V Cc: stable@vger.kernel.org --- fs/ext4/mballoc.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 77249e1f5c3a..11562161e24a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2943,7 +2943,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata", block, block+len); /* File system mounted not to panic on error - * Fix the bitmap and repeat the block allocation + * Fix the bitmap and return EFSCORRUPTED * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); @@ -2952,7 +2952,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) - err = -EAGAIN; + err = -EFSCORRUPTED; goto out_err; } @@ -4517,18 +4517,7 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); - if (*errp == -EAGAIN) { - /* - * drop the reference that we took - * in ext4_mb_use_best_found - */ - ext4_mb_release_context(ac); - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; - ac->ac_status = AC_STATUS_CONTINUE; - goto repeat; - } else if (*errp) { + if (*errp) { ext4_discard_allocated_blocks(ac); goto errout; } else { From c65d5c6c81a1f27dec5f627f67840726fcd146de Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 14 Jul 2016 23:21:35 -0400 Subject: [PATCH 20/21] ext4: short-cut orphan cleanup on error If we encounter a filesystem error during orphan cleanup, we should stop. Otherwise, we may end up in an infinite loop where the same inode is processed again and again. EXT4-fs (loop0): warning: checktime reached, running e2fsck is recommended EXT4-fs error (device loop0): ext4_mb_generate_buddy:758: group 2, block bitmap and bg descriptor inconsistent: 6117 vs 0 free clusters Aborting journal on device loop0-8. EXT4-fs (loop0): Remounting filesystem read-only EXT4-fs error (device loop0) in ext4_free_blocks:4895: Journal has aborted EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted EXT4-fs error (device loop0) in ext4_ext_remove_space:3068: IO failure EXT4-fs error (device loop0) in ext4_ext_truncate:4667: Journal has aborted EXT4-fs error (device loop0) in ext4_orphan_del:2927: Journal has aborted EXT4-fs error (device loop0) in ext4_do_update_inode:4893: Journal has aborted EXT4-fs (loop0): Inode 16 (00000000618192a0): orphan list check failed! [...] EXT4-fs (loop0): Inode 16 (0000000061819748): orphan list check failed! [...] EXT4-fs (loop0): Inode 16 (0000000061819bf0): orphan list check failed! [...] See-also: c9eb13a9105 ("ext4: fix hang when processing corrupted orphaned inode list") Cc: Jan Kara Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1e3fd5c9a72b..c13a4e464738 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2358,6 +2358,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, while (es->s_last_orphan) { struct inode *inode; + /* + * We may have encountered an error during cleanup; if + * so, skip the rest. + */ + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { + jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); + es->s_last_orphan = 0; + break; + } + inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); if (IS_ERR(inode)) { es->s_last_orphan = 0; From 7bc9491645118c9461bd21099c31755ff6783593 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 15 Jul 2016 00:22:07 -0400 Subject: [PATCH 21/21] ext4: verify extent header depth Although the extent tree depth of 5 should enough be for the worst case of 2*32 extents of length 1, the extent tree code does not currently to merge nodes which are less than half-full with a sibling node, or to shrink the tree depth if possible. So it's possible, at least in theory, for the tree depth to be greater than 5. However, even in the worst case, a tree depth of 32 is highly unlikely, and if the file system is maliciously corrupted, an insanely large eh_depth can cause memory allocation failures that will trigger kernel warnings (here, eh_depth = 65280): JBD2: ext4.exe wants too many credits credits:195849 rsv_credits:0 max:256 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 50 at fs/jbd2/transaction.c:293 start_this_handle+0x569/0x580 CPU: 0 PID: 50 Comm: ext4.exe Not tainted 4.7.0-rc5+ #508 Stack: 604a8947 625badd8 0002fd09 00000000 60078643 00000000 62623910 601bf9bc 62623970 6002fc84 626239b0 900000125 Call Trace: [<6001c2dc>] show_stack+0xdc/0x1a0 [<601bf9bc>] dump_stack+0x2a/0x2e [<6002fc84>] __warn+0x114/0x140 [<6002fdff>] warn_slowpath_null+0x1f/0x30 [<60165829>] start_this_handle+0x569/0x580 [<60165d4e>] jbd2__journal_start+0x11e/0x220 [<60146690>] __ext4_journal_start_sb+0x60/0xa0 [<60120a81>] ext4_truncate+0x131/0x3a0 [<60123677>] ext4_setattr+0x757/0x840 [<600d5d0f>] notify_change+0x16f/0x2a0 [<600b2b16>] do_truncate+0x76/0xc0 [<600c3e56>] path_openat+0x806/0x1300 [<600c55c9>] do_filp_open+0x89/0xf0 [<600b4074>] do_sys_open+0x134/0x1e0 [<600b4140>] SyS_open+0x20/0x30 [<6001ea68>] handle_syscall+0x88/0x90 [<600295fd>] userspace+0x3fd/0x500 [<6001ac55>] fork_handler+0x85/0x90 ---[ end trace 08b0b88b6387a244 ]--- [ Commit message modified and the extent tree depath check changed from 5 to 32 -- tytso ] Cc: Darrick J. Wong Signed-off-by: Vegard Nossum Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2f258c68d3e0..d7ccb7f51dfc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -478,6 +478,10 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid extent entries"; goto corrupted; } + if (unlikely(depth > 32)) { + error_msg = "too large eh_depth"; + goto corrupted; + } /* Verify checksum on non-root extent tree nodes */ if (ext_depth(inode) != depth && !ext4_extent_block_csum_verify(inode, eh)) {