2019-05-19 20:08:20 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2015-05-16 07:26:10 +08:00
|
|
|
/*
|
|
|
|
* This contains encryption functions for per-file encryption.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2015, Google, Inc.
|
|
|
|
* Copyright (C) 2015, Motorola Mobility
|
|
|
|
*
|
|
|
|
* Written by Michael Halcrow, 2014.
|
|
|
|
*
|
|
|
|
* Filename encryption additions
|
|
|
|
* Uday Savagaonkar, 2014
|
|
|
|
* Encryption policy handling additions
|
|
|
|
* Ildar Muslukhov, 2014
|
|
|
|
* Add fscrypt_pullback_bio_page()
|
|
|
|
* Jaegeuk Kim, 2015.
|
|
|
|
*
|
|
|
|
* This has not yet undergone a rigorous security audit.
|
|
|
|
*
|
|
|
|
* The usage of AES-XTS should conform to recommendations in NIST
|
|
|
|
* Special Publication 800-38E and IEEE P1619/D16.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/mempool.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/scatterlist.h>
|
|
|
|
#include <linux/ratelimit.h>
|
|
|
|
#include <linux/dcache.h>
|
2016-04-13 07:05:36 +08:00
|
|
|
#include <linux/namei.h>
|
2017-06-19 15:27:58 +08:00
|
|
|
#include <crypto/aes.h>
|
2018-01-06 02:45:00 +08:00
|
|
|
#include <crypto/skcipher.h>
|
2016-11-27 11:05:18 +08:00
|
|
|
#include "fscrypt_private.h"
|
2015-05-16 07:26:10 +08:00
|
|
|
|
|
|
|
static unsigned int num_prealloc_crypto_pages = 32;
|
|
|
|
static unsigned int num_prealloc_crypto_ctxs = 128;
|
|
|
|
|
|
|
|
module_param(num_prealloc_crypto_pages, uint, 0444);
|
|
|
|
MODULE_PARM_DESC(num_prealloc_crypto_pages,
|
|
|
|
"Number of crypto pages to preallocate");
|
|
|
|
module_param(num_prealloc_crypto_ctxs, uint, 0444);
|
|
|
|
MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
|
|
|
|
"Number of crypto contexts to preallocate");
|
|
|
|
|
|
|
|
static mempool_t *fscrypt_bounce_page_pool = NULL;
|
|
|
|
|
|
|
|
static LIST_HEAD(fscrypt_free_ctxs);
|
|
|
|
static DEFINE_SPINLOCK(fscrypt_ctx_lock);
|
|
|
|
|
2018-04-19 02:09:47 +08:00
|
|
|
static struct workqueue_struct *fscrypt_read_workqueue;
|
2015-05-16 07:26:10 +08:00
|
|
|
static DEFINE_MUTEX(fscrypt_init_mutex);
|
|
|
|
|
|
|
|
static struct kmem_cache *fscrypt_ctx_cachep;
|
|
|
|
struct kmem_cache *fscrypt_info_cachep;
|
|
|
|
|
2018-04-19 02:09:47 +08:00
|
|
|
void fscrypt_enqueue_decrypt_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
queue_work(fscrypt_read_workqueue, work);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work);
|
|
|
|
|
2015-05-16 07:26:10 +08:00
|
|
|
/**
|
2019-05-21 00:29:40 +08:00
|
|
|
* fscrypt_release_ctx() - Release a decryption context
|
|
|
|
* @ctx: The decryption context to release.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:40 +08:00
|
|
|
* If the decryption context was allocated from the pre-allocated pool, return
|
|
|
|
* it to that pool. Else, free it.
|
2015-05-16 07:26:10 +08:00
|
|
|
*/
|
|
|
|
void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
|
|
|
|
kmem_cache_free(fscrypt_ctx_cachep, ctx);
|
|
|
|
} else {
|
|
|
|
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
|
|
|
|
list_add(&ctx->free_list, &fscrypt_free_ctxs);
|
|
|
|
spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fscrypt_release_ctx);
|
|
|
|
|
|
|
|
/**
|
2019-05-21 00:29:40 +08:00
|
|
|
* fscrypt_get_ctx() - Get a decryption context
|
2016-04-12 06:51:57 +08:00
|
|
|
* @gfp_flags: The gfp flag for memory allocation
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:40 +08:00
|
|
|
* Allocate and initialize a decryption context.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:40 +08:00
|
|
|
* Return: A new decryption context on success; an ERR_PTR() otherwise.
|
2015-05-16 07:26:10 +08:00
|
|
|
*/
|
2019-03-19 01:23:33 +08:00
|
|
|
struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
|
2015-05-16 07:26:10 +08:00
|
|
|
{
|
2019-03-19 01:23:33 +08:00
|
|
|
struct fscrypt_ctx *ctx;
|
2015-05-16 07:26:10 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
/*
|
2019-05-21 00:29:39 +08:00
|
|
|
* First try getting a ctx from the free list so that we don't have to
|
|
|
|
* call into the slab allocator.
|
2015-05-16 07:26:10 +08:00
|
|
|
*/
|
|
|
|
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
|
|
|
|
ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
|
|
|
|
struct fscrypt_ctx, free_list);
|
|
|
|
if (ctx)
|
|
|
|
list_del(&ctx->free_list);
|
|
|
|
spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
|
|
|
|
if (!ctx) {
|
2016-04-12 06:51:57 +08:00
|
|
|
ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags);
|
2015-05-16 07:26:10 +08:00
|
|
|
if (!ctx)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
|
|
|
|
} else {
|
|
|
|
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
|
|
|
|
}
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fscrypt_get_ctx);
|
|
|
|
|
2019-05-21 00:29:39 +08:00
|
|
|
struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags)
|
|
|
|
{
|
|
|
|
return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fscrypt_free_bounce_page() - free a ciphertext bounce page
|
|
|
|
*
|
2019-05-21 00:29:44 +08:00
|
|
|
* Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(),
|
|
|
|
* or by fscrypt_alloc_bounce_page() directly.
|
2019-05-21 00:29:39 +08:00
|
|
|
*/
|
|
|
|
void fscrypt_free_bounce_page(struct page *bounce_page)
|
|
|
|
{
|
|
|
|
if (!bounce_page)
|
|
|
|
return;
|
|
|
|
set_page_private(bounce_page, (unsigned long)NULL);
|
|
|
|
ClearPagePrivate(bounce_page);
|
|
|
|
mempool_free(bounce_page, fscrypt_bounce_page_pool);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fscrypt_free_bounce_page);
|
|
|
|
|
fscrypt: add Adiantum support
Add support for the Adiantum encryption mode to fscrypt. Adiantum is a
tweakable, length-preserving encryption mode with security provably
reducible to that of XChaCha12 and AES-256, subject to a security bound.
It's also a true wide-block mode, unlike XTS. See the paper
"Adiantum: length-preserving encryption for entry-level processors"
(https://eprint.iacr.org/2018/720.pdf) for more details. Also see
commit 059c2a4d8e16 ("crypto: adiantum - add Adiantum support").
On sufficiently long messages, Adiantum's bottlenecks are XChaCha12 and
the NH hash function. These algorithms are fast even on processors
without dedicated crypto instructions. Adiantum makes it feasible to
enable storage encryption on low-end mobile devices that lack AES
instructions; currently such devices are unencrypted. On ARM Cortex-A7,
on 4096-byte messages Adiantum encryption is about 4 times faster than
AES-256-XTS encryption; decryption is about 5 times faster.
In fscrypt, Adiantum is suitable for encrypting both file contents and
names. With filenames, it fixes a known weakness: when two filenames in
a directory share a common prefix of >= 16 bytes, with CTS-CBC their
encrypted filenames share a common prefix too, leaking information.
Adiantum does not have this problem.
Since Adiantum also accepts long tweaks (IVs), it's also safe to use the
master key directly for Adiantum encryption rather than deriving
per-file keys, provided that the per-file nonce is included in the IVs
and the master key isn't used for any other encryption mode. This
configuration saves memory and improves performance. A new fscrypt
policy flag is added to allow users to opt-in to this configuration.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2019-01-06 21:36:21 +08:00
|
|
|
void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
|
|
|
|
const struct fscrypt_info *ci)
|
|
|
|
{
|
|
|
|
memset(iv, 0, ci->ci_mode->ivsize);
|
|
|
|
iv->lblk_num = cpu_to_le64(lblk_num);
|
|
|
|
|
2019-08-05 10:35:44 +08:00
|
|
|
if (ci->ci_flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
|
fscrypt: add Adiantum support
Add support for the Adiantum encryption mode to fscrypt. Adiantum is a
tweakable, length-preserving encryption mode with security provably
reducible to that of XChaCha12 and AES-256, subject to a security bound.
It's also a true wide-block mode, unlike XTS. See the paper
"Adiantum: length-preserving encryption for entry-level processors"
(https://eprint.iacr.org/2018/720.pdf) for more details. Also see
commit 059c2a4d8e16 ("crypto: adiantum - add Adiantum support").
On sufficiently long messages, Adiantum's bottlenecks are XChaCha12 and
the NH hash function. These algorithms are fast even on processors
without dedicated crypto instructions. Adiantum makes it feasible to
enable storage encryption on low-end mobile devices that lack AES
instructions; currently such devices are unencrypted. On ARM Cortex-A7,
on 4096-byte messages Adiantum encryption is about 4 times faster than
AES-256-XTS encryption; decryption is about 5 times faster.
In fscrypt, Adiantum is suitable for encrypting both file contents and
names. With filenames, it fixes a known weakness: when two filenames in
a directory share a common prefix of >= 16 bytes, with CTS-CBC their
encrypted filenames share a common prefix too, leaking information.
Adiantum does not have this problem.
Since Adiantum also accepts long tweaks (IVs), it's also safe to use the
master key directly for Adiantum encryption rather than deriving
per-file keys, provided that the per-file nonce is included in the IVs
and the master key isn't used for any other encryption mode. This
configuration saves memory and improves performance. A new fscrypt
policy flag is added to allow users to opt-in to this configuration.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2019-01-06 21:36:21 +08:00
|
|
|
memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
|
|
|
|
|
|
|
|
if (ci->ci_essiv_tfm != NULL)
|
|
|
|
crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
|
|
|
|
}
|
|
|
|
|
2019-05-21 00:29:41 +08:00
|
|
|
/* Encrypt or decrypt a single filesystem block of file contents */
|
|
|
|
int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
|
|
|
|
u64 lblk_num, struct page *src_page,
|
|
|
|
struct page *dest_page, unsigned int len,
|
|
|
|
unsigned int offs, gfp_t gfp_flags)
|
2015-05-16 07:26:10 +08:00
|
|
|
{
|
fscrypt: add Adiantum support
Add support for the Adiantum encryption mode to fscrypt. Adiantum is a
tweakable, length-preserving encryption mode with security provably
reducible to that of XChaCha12 and AES-256, subject to a security bound.
It's also a true wide-block mode, unlike XTS. See the paper
"Adiantum: length-preserving encryption for entry-level processors"
(https://eprint.iacr.org/2018/720.pdf) for more details. Also see
commit 059c2a4d8e16 ("crypto: adiantum - add Adiantum support").
On sufficiently long messages, Adiantum's bottlenecks are XChaCha12 and
the NH hash function. These algorithms are fast even on processors
without dedicated crypto instructions. Adiantum makes it feasible to
enable storage encryption on low-end mobile devices that lack AES
instructions; currently such devices are unencrypted. On ARM Cortex-A7,
on 4096-byte messages Adiantum encryption is about 4 times faster than
AES-256-XTS encryption; decryption is about 5 times faster.
In fscrypt, Adiantum is suitable for encrypting both file contents and
names. With filenames, it fixes a known weakness: when two filenames in
a directory share a common prefix of >= 16 bytes, with CTS-CBC their
encrypted filenames share a common prefix too, leaking information.
Adiantum does not have this problem.
Since Adiantum also accepts long tweaks (IVs), it's also safe to use the
master key directly for Adiantum encryption rather than deriving
per-file keys, provided that the per-file nonce is included in the IVs
and the master key isn't used for any other encryption mode. This
configuration saves memory and improves performance. A new fscrypt
policy flag is added to allow users to opt-in to this configuration.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2019-01-06 21:36:21 +08:00
|
|
|
union fscrypt_iv iv;
|
2016-03-22 02:03:02 +08:00
|
|
|
struct skcipher_request *req = NULL;
|
2017-10-18 15:00:44 +08:00
|
|
|
DECLARE_CRYPTO_WAIT(wait);
|
2015-05-16 07:26:10 +08:00
|
|
|
struct scatterlist dst, src;
|
|
|
|
struct fscrypt_info *ci = inode->i_crypt_info;
|
2016-03-22 02:03:02 +08:00
|
|
|
struct crypto_skcipher *tfm = ci->ci_ctfm;
|
2015-05-16 07:26:10 +08:00
|
|
|
int res = 0;
|
|
|
|
|
2019-05-21 00:29:42 +08:00
|
|
|
if (WARN_ON_ONCE(len <= 0))
|
|
|
|
return -EINVAL;
|
|
|
|
if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0))
|
|
|
|
return -EINVAL;
|
2016-12-07 06:53:55 +08:00
|
|
|
|
fscrypt: add Adiantum support
Add support for the Adiantum encryption mode to fscrypt. Adiantum is a
tweakable, length-preserving encryption mode with security provably
reducible to that of XChaCha12 and AES-256, subject to a security bound.
It's also a true wide-block mode, unlike XTS. See the paper
"Adiantum: length-preserving encryption for entry-level processors"
(https://eprint.iacr.org/2018/720.pdf) for more details. Also see
commit 059c2a4d8e16 ("crypto: adiantum - add Adiantum support").
On sufficiently long messages, Adiantum's bottlenecks are XChaCha12 and
the NH hash function. These algorithms are fast even on processors
without dedicated crypto instructions. Adiantum makes it feasible to
enable storage encryption on low-end mobile devices that lack AES
instructions; currently such devices are unencrypted. On ARM Cortex-A7,
on 4096-byte messages Adiantum encryption is about 4 times faster than
AES-256-XTS encryption; decryption is about 5 times faster.
In fscrypt, Adiantum is suitable for encrypting both file contents and
names. With filenames, it fixes a known weakness: when two filenames in
a directory share a common prefix of >= 16 bytes, with CTS-CBC their
encrypted filenames share a common prefix too, leaking information.
Adiantum does not have this problem.
Since Adiantum also accepts long tweaks (IVs), it's also safe to use the
master key directly for Adiantum encryption rather than deriving
per-file keys, provided that the per-file nonce is included in the IVs
and the master key isn't used for any other encryption mode. This
configuration saves memory and improves performance. A new fscrypt
policy flag is added to allow users to opt-in to this configuration.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2019-01-06 21:36:21 +08:00
|
|
|
fscrypt_generate_iv(&iv, lblk_num, ci);
|
2017-06-19 15:27:58 +08:00
|
|
|
|
2016-04-12 06:51:57 +08:00
|
|
|
req = skcipher_request_alloc(tfm, gfp_flags);
|
2018-05-01 06:51:38 +08:00
|
|
|
if (!req)
|
2015-05-16 07:26:10 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2016-03-22 02:03:02 +08:00
|
|
|
skcipher_request_set_callback(
|
2015-05-16 07:26:10 +08:00
|
|
|
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
|
2017-10-18 15:00:44 +08:00
|
|
|
crypto_req_done, &wait);
|
2015-05-16 07:26:10 +08:00
|
|
|
|
|
|
|
sg_init_table(&dst, 1);
|
2016-12-07 06:53:55 +08:00
|
|
|
sg_set_page(&dst, dest_page, len, offs);
|
2015-05-16 07:26:10 +08:00
|
|
|
sg_init_table(&src, 1);
|
2016-12-07 06:53:55 +08:00
|
|
|
sg_set_page(&src, src_page, len, offs);
|
2017-06-19 15:27:58 +08:00
|
|
|
skcipher_request_set_crypt(req, &src, &dst, len, &iv);
|
2015-05-16 07:26:10 +08:00
|
|
|
if (rw == FS_DECRYPT)
|
2017-10-18 15:00:44 +08:00
|
|
|
res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
|
2015-05-16 07:26:10 +08:00
|
|
|
else
|
2017-10-18 15:00:44 +08:00
|
|
|
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
|
2016-03-22 02:03:02 +08:00
|
|
|
skcipher_request_free(req);
|
2015-05-16 07:26:10 +08:00
|
|
|
if (res) {
|
2019-07-25 02:07:58 +08:00
|
|
|
fscrypt_err(inode, "%scryption failed for block %llu: %d",
|
|
|
|
(rw == FS_DECRYPT ? "De" : "En"), lblk_num, res);
|
2015-05-16 07:26:10 +08:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-05-21 00:29:44 +08:00
|
|
|
* fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page
|
|
|
|
* @page: The locked pagecache page containing the block(s) to encrypt
|
|
|
|
* @len: Total size of the block(s) to encrypt. Must be a nonzero
|
|
|
|
* multiple of the filesystem's block size.
|
|
|
|
* @offs: Byte offset within @page of the first block to encrypt. Must be
|
|
|
|
* a multiple of the filesystem's block size.
|
|
|
|
* @gfp_flags: Memory allocation flags
|
|
|
|
*
|
|
|
|
* A new bounce page is allocated, and the specified block(s) are encrypted into
|
|
|
|
* it. In the bounce page, the ciphertext block(s) will be located at the same
|
|
|
|
* offsets at which the plaintext block(s) were located in the source page; any
|
|
|
|
* other parts of the bounce page will be left uninitialized. However, normally
|
|
|
|
* blocksize == PAGE_SIZE and the whole page is encrypted at once.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:44 +08:00
|
|
|
* This is for use by the filesystem's ->writepages() method.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:44 +08:00
|
|
|
* Return: the new encrypted bounce page on success; an ERR_PTR() on failure
|
2015-05-16 07:26:10 +08:00
|
|
|
*/
|
2019-05-21 00:29:44 +08:00
|
|
|
struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
|
|
|
|
unsigned int len,
|
|
|
|
unsigned int offs,
|
|
|
|
gfp_t gfp_flags)
|
2016-11-14 05:20:46 +08:00
|
|
|
|
2015-05-16 07:26:10 +08:00
|
|
|
{
|
2019-05-21 00:29:44 +08:00
|
|
|
const struct inode *inode = page->mapping->host;
|
|
|
|
const unsigned int blockbits = inode->i_blkbits;
|
|
|
|
const unsigned int blocksize = 1 << blockbits;
|
2019-05-21 00:29:43 +08:00
|
|
|
struct page *ciphertext_page;
|
2019-05-21 00:29:44 +08:00
|
|
|
u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
|
|
|
|
(offs >> blockbits);
|
|
|
|
unsigned int i;
|
2015-05-16 07:26:10 +08:00
|
|
|
int err;
|
|
|
|
|
2019-05-21 00:29:42 +08:00
|
|
|
if (WARN_ON_ONCE(!PageLocked(page)))
|
|
|
|
return ERR_PTR(-EINVAL);
|
2016-12-07 06:53:56 +08:00
|
|
|
|
2019-05-21 00:29:44 +08:00
|
|
|
if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
2019-05-21 00:29:39 +08:00
|
|
|
ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags);
|
|
|
|
if (!ciphertext_page)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
2015-05-16 07:26:10 +08:00
|
|
|
|
2019-05-21 00:29:44 +08:00
|
|
|
for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
|
|
|
|
err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num,
|
|
|
|
page, ciphertext_page,
|
|
|
|
blocksize, i, gfp_flags);
|
|
|
|
if (err) {
|
|
|
|
fscrypt_free_bounce_page(ciphertext_page);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
2015-05-16 07:26:10 +08:00
|
|
|
}
|
2016-12-07 06:53:54 +08:00
|
|
|
SetPagePrivate(ciphertext_page);
|
2019-05-21 00:29:39 +08:00
|
|
|
set_page_private(ciphertext_page, (unsigned long)page);
|
2015-05-16 07:26:10 +08:00
|
|
|
return ciphertext_page;
|
|
|
|
}
|
2019-05-21 00:29:44 +08:00
|
|
|
EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks);
|
2015-05-16 07:26:10 +08:00
|
|
|
|
2019-05-21 00:29:43 +08:00
|
|
|
/**
|
|
|
|
* fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place
|
|
|
|
* @inode: The inode to which this block belongs
|
|
|
|
* @page: The page containing the block to encrypt
|
|
|
|
* @len: Size of block to encrypt. Doesn't need to be a multiple of the
|
|
|
|
* fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
|
|
|
|
* @offs: Byte offset within @page at which the block to encrypt begins
|
|
|
|
* @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
|
|
|
|
* number of the block within the file
|
|
|
|
* @gfp_flags: Memory allocation flags
|
|
|
|
*
|
|
|
|
* Encrypt a possibly-compressed filesystem block that is located in an
|
|
|
|
* arbitrary page, not necessarily in the original pagecache page. The @inode
|
|
|
|
* and @lblk_num must be specified, as they can't be determined from @page.
|
|
|
|
*
|
|
|
|
* Return: 0 on success; -errno on failure
|
|
|
|
*/
|
|
|
|
int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
|
|
|
|
unsigned int len, unsigned int offs,
|
|
|
|
u64 lblk_num, gfp_t gfp_flags)
|
|
|
|
{
|
|
|
|
return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page,
|
|
|
|
len, offs, gfp_flags);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fscrypt_encrypt_block_inplace);
|
|
|
|
|
2015-05-16 07:26:10 +08:00
|
|
|
/**
|
2019-05-21 00:29:47 +08:00
|
|
|
* fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page
|
|
|
|
* @page: The locked pagecache page containing the block(s) to decrypt
|
|
|
|
* @len: Total size of the block(s) to decrypt. Must be a nonzero
|
|
|
|
* multiple of the filesystem's block size.
|
|
|
|
* @offs: Byte offset within @page of the first block to decrypt. Must be
|
|
|
|
* a multiple of the filesystem's block size.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:47 +08:00
|
|
|
* The specified block(s) are decrypted in-place within the pagecache page,
|
|
|
|
* which must still be locked and not uptodate. Normally, blocksize ==
|
|
|
|
* PAGE_SIZE and the whole page is decrypted at once.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:47 +08:00
|
|
|
* This is for use by the filesystem's ->readpages() method.
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
2019-05-21 00:29:47 +08:00
|
|
|
* Return: 0 on success; -errno on failure
|
2015-05-16 07:26:10 +08:00
|
|
|
*/
|
2019-05-21 00:29:47 +08:00
|
|
|
int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
|
|
|
|
unsigned int offs)
|
2015-05-16 07:26:10 +08:00
|
|
|
{
|
2019-05-21 00:29:47 +08:00
|
|
|
const struct inode *inode = page->mapping->host;
|
|
|
|
const unsigned int blockbits = inode->i_blkbits;
|
|
|
|
const unsigned int blocksize = 1 << blockbits;
|
|
|
|
u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
|
|
|
|
(offs >> blockbits);
|
|
|
|
unsigned int i;
|
|
|
|
int err;
|
|
|
|
|
2019-05-21 00:29:46 +08:00
|
|
|
if (WARN_ON_ONCE(!PageLocked(page)))
|
2019-05-21 00:29:42 +08:00
|
|
|
return -EINVAL;
|
2016-12-07 06:53:56 +08:00
|
|
|
|
2019-05-21 00:29:47 +08:00
|
|
|
if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
|
|
|
|
err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page,
|
|
|
|
page, blocksize, i, GFP_NOFS);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
return 0;
|
2015-05-16 07:26:10 +08:00
|
|
|
}
|
2019-05-21 00:29:47 +08:00
|
|
|
EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks);
|
2015-05-16 07:26:10 +08:00
|
|
|
|
2019-05-21 00:29:46 +08:00
|
|
|
/**
|
|
|
|
* fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place
|
|
|
|
* @inode: The inode to which this block belongs
|
|
|
|
* @page: The page containing the block to decrypt
|
|
|
|
* @len: Size of block to decrypt. Doesn't need to be a multiple of the
|
|
|
|
* fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
|
|
|
|
* @offs: Byte offset within @page at which the block to decrypt begins
|
|
|
|
* @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
|
|
|
|
* number of the block within the file
|
|
|
|
*
|
|
|
|
* Decrypt a possibly-compressed filesystem block that is located in an
|
|
|
|
* arbitrary page, not necessarily in the original pagecache page. The @inode
|
|
|
|
* and @lblk_num must be specified, as they can't be determined from @page.
|
|
|
|
*
|
|
|
|
* Return: 0 on success; -errno on failure
|
|
|
|
*/
|
|
|
|
int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page,
|
|
|
|
unsigned int len, unsigned int offs,
|
|
|
|
u64 lblk_num)
|
|
|
|
{
|
|
|
|
return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page,
|
|
|
|
len, offs, GFP_NOFS);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(fscrypt_decrypt_block_inplace);
|
|
|
|
|
2015-05-16 07:26:10 +08:00
|
|
|
/*
|
2019-03-21 02:39:09 +08:00
|
|
|
* Validate dentries in encrypted directories to make sure we aren't potentially
|
|
|
|
* caching stale dentries after a key has been added.
|
2015-05-16 07:26:10 +08:00
|
|
|
*/
|
|
|
|
static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
|
|
|
|
{
|
2016-04-12 06:10:11 +08:00
|
|
|
struct dentry *dir;
|
2019-03-21 02:39:09 +08:00
|
|
|
int err;
|
|
|
|
int valid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Plaintext names are always valid, since fscrypt doesn't support
|
|
|
|
* reverting to ciphertext names without evicting the directory's inode
|
|
|
|
* -- which implies eviction of the dentries in the directory.
|
|
|
|
*/
|
|
|
|
if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ciphertext name; valid if the directory's key is still unavailable.
|
|
|
|
*
|
|
|
|
* Although fscrypt forbids rename() on ciphertext names, we still must
|
|
|
|
* use dget_parent() here rather than use ->d_parent directly. That's
|
|
|
|
* because a corrupted fs image may contain directory hard links, which
|
|
|
|
* the VFS handles by moving the directory's dentry tree in the dcache
|
|
|
|
* each time ->lookup() finds the directory and it already has a dentry
|
|
|
|
* elsewhere. Thus ->d_parent can be changing, and we must safely grab
|
|
|
|
* a reference to some ->d_parent to prevent it from being freed.
|
|
|
|
*/
|
2015-05-16 07:26:10 +08:00
|
|
|
|
2016-04-13 07:05:36 +08:00
|
|
|
if (flags & LOOKUP_RCU)
|
|
|
|
return -ECHILD;
|
|
|
|
|
2016-04-12 06:10:11 +08:00
|
|
|
dir = dget_parent(dentry);
|
2019-03-21 02:39:09 +08:00
|
|
|
err = fscrypt_get_encryption_info(d_inode(dir));
|
|
|
|
valid = !fscrypt_has_encryption_key(d_inode(dir));
|
2016-04-12 06:10:11 +08:00
|
|
|
dput(dir);
|
2015-05-16 07:26:10 +08:00
|
|
|
|
2019-03-21 02:39:09 +08:00
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
return valid;
|
2015-05-16 07:26:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct dentry_operations fscrypt_d_ops = {
|
|
|
|
.d_revalidate = fscrypt_d_revalidate,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void fscrypt_destroy(void)
|
|
|
|
{
|
|
|
|
struct fscrypt_ctx *pos, *n;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list)
|
|
|
|
kmem_cache_free(fscrypt_ctx_cachep, pos);
|
|
|
|
INIT_LIST_HEAD(&fscrypt_free_ctxs);
|
|
|
|
mempool_destroy(fscrypt_bounce_page_pool);
|
|
|
|
fscrypt_bounce_page_pool = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fscrypt_initialize() - allocate major buffers for fs encryption.
|
2016-12-07 06:53:57 +08:00
|
|
|
* @cop_flags: fscrypt operations flags
|
2015-05-16 07:26:10 +08:00
|
|
|
*
|
|
|
|
* We only call this when we start accessing encrypted files, since it
|
|
|
|
* results in memory getting allocated that wouldn't otherwise be used.
|
|
|
|
*
|
|
|
|
* Return: Zero on success, non-zero otherwise.
|
|
|
|
*/
|
2016-12-07 06:53:57 +08:00
|
|
|
int fscrypt_initialize(unsigned int cop_flags)
|
2015-05-16 07:26:10 +08:00
|
|
|
{
|
|
|
|
int i, res = -ENOMEM;
|
|
|
|
|
2017-10-29 18:30:19 +08:00
|
|
|
/* No need to allocate a bounce page pool if this FS won't use it. */
|
|
|
|
if (cop_flags & FS_CFLG_OWN_PAGES)
|
2015-05-16 07:26:10 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
mutex_lock(&fscrypt_init_mutex);
|
|
|
|
if (fscrypt_bounce_page_pool)
|
|
|
|
goto already_initialized;
|
|
|
|
|
|
|
|
for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
|
|
|
|
struct fscrypt_ctx *ctx;
|
|
|
|
|
|
|
|
ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS);
|
|
|
|
if (!ctx)
|
|
|
|
goto fail;
|
|
|
|
list_add(&ctx->free_list, &fscrypt_free_ctxs);
|
|
|
|
}
|
|
|
|
|
|
|
|
fscrypt_bounce_page_pool =
|
|
|
|
mempool_create_page_pool(num_prealloc_crypto_pages, 0);
|
|
|
|
if (!fscrypt_bounce_page_pool)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
already_initialized:
|
|
|
|
mutex_unlock(&fscrypt_init_mutex);
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
|
|
fscrypt_destroy();
|
|
|
|
mutex_unlock(&fscrypt_init_mutex);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2019-07-25 02:07:58 +08:00
|
|
|
void fscrypt_msg(const struct inode *inode, const char *level,
|
2018-05-01 06:51:47 +08:00
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
|
|
|
|
DEFAULT_RATELIMIT_BURST);
|
|
|
|
struct va_format vaf;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!__ratelimit(&rs))
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
vaf.fmt = fmt;
|
|
|
|
vaf.va = &args;
|
2019-07-25 02:07:58 +08:00
|
|
|
if (inode)
|
|
|
|
printk("%sfscrypt (%s, inode %lu): %pV\n",
|
|
|
|
level, inode->i_sb->s_id, inode->i_ino, &vaf);
|
2018-05-01 06:51:47 +08:00
|
|
|
else
|
|
|
|
printk("%sfscrypt: %pV\n", level, &vaf);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2015-05-16 07:26:10 +08:00
|
|
|
/**
|
|
|
|
* fscrypt_init() - Set up for fs encryption.
|
|
|
|
*/
|
|
|
|
static int __init fscrypt_init(void)
|
|
|
|
{
|
fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl
Add a new fscrypt ioctl, FS_IOC_ADD_ENCRYPTION_KEY. This ioctl adds an
encryption key to the filesystem's fscrypt keyring ->s_master_keys,
making any files encrypted with that key appear "unlocked".
Why we need this
~~~~~~~~~~~~~~~~
The main problem is that the "locked/unlocked" (ciphertext/plaintext)
status of encrypted files is global, but the fscrypt keys are not.
fscrypt only looks for keys in the keyring(s) the process accessing the
filesystem is subscribed to: the thread keyring, process keyring, and
session keyring, where the session keyring may contain the user keyring.
Therefore, userspace has to put fscrypt keys in the keyrings for
individual users or sessions. But this means that when a process with a
different keyring tries to access encrypted files, whether they appear
"unlocked" or not is nondeterministic. This is because it depends on
whether the files are currently present in the inode cache.
Fixing this by consistently providing each process its own view of the
filesystem depending on whether it has the key or not isn't feasible due
to how the VFS caches work. Furthermore, while sometimes users expect
this behavior, it is misguided for two reasons. First, it would be an
OS-level access control mechanism largely redundant with existing access
control mechanisms such as UNIX file permissions, ACLs, LSMs, etc.
Encryption is actually for protecting the data at rest.
Second, almost all users of fscrypt actually do need the keys to be
global. The largest users of fscrypt, Android and Chromium OS, achieve
this by having PID 1 create a "session keyring" that is inherited by
every process. This works, but it isn't scalable because it prevents
session keyrings from being used for any other purpose.
On general-purpose Linux distros, the 'fscrypt' userspace tool [1] can't
similarly abuse the session keyring, so to make 'sudo' work on all
systems it has to link all the user keyrings into root's user keyring
[2]. This is ugly and raises security concerns. Moreover it can't make
the keys available to system services, such as sshd trying to access the
user's '~/.ssh' directory (see [3], [4]) or NetworkManager trying to
read certificates from the user's home directory (see [5]); or to Docker
containers (see [6], [7]).
By having an API to add a key to the *filesystem* we'll be able to fix
the above bugs, remove userspace workarounds, and clearly express the
intended semantics: the locked/unlocked status of an encrypted directory
is global, and encryption is orthogonal to OS-level access control.
Why not use the add_key() syscall
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We use an ioctl for this API rather than the existing add_key() system
call because the ioctl gives us the flexibility needed to implement
fscrypt-specific semantics that will be introduced in later patches:
- Supporting key removal with the semantics such that the secret is
removed immediately and any unused inodes using the key are evicted;
also, the eviction of any in-use inodes can be retried.
- Calculating a key-dependent cryptographic identifier and returning it
to userspace.
- Allowing keys to be added and removed by non-root users, but only keys
for v2 encryption policies; and to prevent denial-of-service attacks,
users can only remove keys they themselves have added, and a key is
only really removed after all users who added it have removed it.
Trying to shoehorn these semantics into the keyrings syscalls would be
very difficult, whereas the ioctls make things much easier.
However, to reuse code the implementation still uses the keyrings
service internally. Thus we get lockless RCU-mode key lookups without
having to re-implement it, and the keys automatically show up in
/proc/keys for debugging purposes.
References:
[1] https://github.com/google/fscrypt
[2] https://goo.gl/55cCrI#heading=h.vf09isp98isb
[3] https://github.com/google/fscrypt/issues/111#issuecomment-444347939
[4] https://github.com/google/fscrypt/issues/116
[5] https://bugs.launchpad.net/ubuntu/+source/fscrypt/+bug/1770715
[6] https://github.com/google/fscrypt/issues/128
[7] https://askubuntu.com/questions/1130306/cannot-run-docker-on-an-encrypted-filesystem
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-08-05 10:35:46 +08:00
|
|
|
int err = -ENOMEM;
|
|
|
|
|
fscrypt: use unbound workqueue for decryption
Improve fscrypt read performance by switching the decryption workqueue
from bound to unbound. With the bound workqueue, when multiple bios
completed on the same CPU, they were decrypted on that same CPU. But
with the unbound queue, they are now decrypted in parallel on any CPU.
Although fscrypt read performance can be tough to measure due to the
many sources of variation, this change is most beneficial when
decryption is slow, e.g. on CPUs without AES instructions. For example,
I timed tarring up encrypted directories on f2fs. On x86 with AES-NI
instructions disabled, the unbound workqueue improved performance by
about 25-35%, using 1 to NUM_CPUs jobs with 4 or 8 CPUs available. But
with AES-NI enabled, performance was unchanged to within ~2%.
I also did the same test on a quad-core ARM CPU using xts-speck128-neon
encryption. There performance was usually about 10% better with the
unbound workqueue, bringing it closer to the unencrypted speed.
The unbound workqueue may be worse in some cases due to worse locality,
but I think it's still the better default. dm-crypt uses an unbound
workqueue by default too, so this change makes fscrypt match.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2018-04-21 07:30:02 +08:00
|
|
|
/*
|
|
|
|
* Use an unbound workqueue to allow bios to be decrypted in parallel
|
|
|
|
* even when they happen to complete on the same CPU. This sacrifices
|
|
|
|
* locality, but it's worthwhile since decryption is CPU-intensive.
|
|
|
|
*
|
|
|
|
* Also use a high-priority workqueue to prioritize decryption work,
|
|
|
|
* which blocks reads from completing, over regular application tasks.
|
|
|
|
*/
|
2015-05-16 07:26:10 +08:00
|
|
|
fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
|
fscrypt: use unbound workqueue for decryption
Improve fscrypt read performance by switching the decryption workqueue
from bound to unbound. With the bound workqueue, when multiple bios
completed on the same CPU, they were decrypted on that same CPU. But
with the unbound queue, they are now decrypted in parallel on any CPU.
Although fscrypt read performance can be tough to measure due to the
many sources of variation, this change is most beneficial when
decryption is slow, e.g. on CPUs without AES instructions. For example,
I timed tarring up encrypted directories on f2fs. On x86 with AES-NI
instructions disabled, the unbound workqueue improved performance by
about 25-35%, using 1 to NUM_CPUs jobs with 4 or 8 CPUs available. But
with AES-NI enabled, performance was unchanged to within ~2%.
I also did the same test on a quad-core ARM CPU using xts-speck128-neon
encryption. There performance was usually about 10% better with the
unbound workqueue, bringing it closer to the unencrypted speed.
The unbound workqueue may be worse in some cases due to worse locality,
but I think it's still the better default. dm-crypt uses an unbound
workqueue by default too, so this change makes fscrypt match.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2018-04-21 07:30:02 +08:00
|
|
|
WQ_UNBOUND | WQ_HIGHPRI,
|
|
|
|
num_online_cpus());
|
2015-05-16 07:26:10 +08:00
|
|
|
if (!fscrypt_read_workqueue)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT);
|
|
|
|
if (!fscrypt_ctx_cachep)
|
|
|
|
goto fail_free_queue;
|
|
|
|
|
|
|
|
fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT);
|
|
|
|
if (!fscrypt_info_cachep)
|
|
|
|
goto fail_free_ctx;
|
|
|
|
|
fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl
Add a new fscrypt ioctl, FS_IOC_ADD_ENCRYPTION_KEY. This ioctl adds an
encryption key to the filesystem's fscrypt keyring ->s_master_keys,
making any files encrypted with that key appear "unlocked".
Why we need this
~~~~~~~~~~~~~~~~
The main problem is that the "locked/unlocked" (ciphertext/plaintext)
status of encrypted files is global, but the fscrypt keys are not.
fscrypt only looks for keys in the keyring(s) the process accessing the
filesystem is subscribed to: the thread keyring, process keyring, and
session keyring, where the session keyring may contain the user keyring.
Therefore, userspace has to put fscrypt keys in the keyrings for
individual users or sessions. But this means that when a process with a
different keyring tries to access encrypted files, whether they appear
"unlocked" or not is nondeterministic. This is because it depends on
whether the files are currently present in the inode cache.
Fixing this by consistently providing each process its own view of the
filesystem depending on whether it has the key or not isn't feasible due
to how the VFS caches work. Furthermore, while sometimes users expect
this behavior, it is misguided for two reasons. First, it would be an
OS-level access control mechanism largely redundant with existing access
control mechanisms such as UNIX file permissions, ACLs, LSMs, etc.
Encryption is actually for protecting the data at rest.
Second, almost all users of fscrypt actually do need the keys to be
global. The largest users of fscrypt, Android and Chromium OS, achieve
this by having PID 1 create a "session keyring" that is inherited by
every process. This works, but it isn't scalable because it prevents
session keyrings from being used for any other purpose.
On general-purpose Linux distros, the 'fscrypt' userspace tool [1] can't
similarly abuse the session keyring, so to make 'sudo' work on all
systems it has to link all the user keyrings into root's user keyring
[2]. This is ugly and raises security concerns. Moreover it can't make
the keys available to system services, such as sshd trying to access the
user's '~/.ssh' directory (see [3], [4]) or NetworkManager trying to
read certificates from the user's home directory (see [5]); or to Docker
containers (see [6], [7]).
By having an API to add a key to the *filesystem* we'll be able to fix
the above bugs, remove userspace workarounds, and clearly express the
intended semantics: the locked/unlocked status of an encrypted directory
is global, and encryption is orthogonal to OS-level access control.
Why not use the add_key() syscall
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We use an ioctl for this API rather than the existing add_key() system
call because the ioctl gives us the flexibility needed to implement
fscrypt-specific semantics that will be introduced in later patches:
- Supporting key removal with the semantics such that the secret is
removed immediately and any unused inodes using the key are evicted;
also, the eviction of any in-use inodes can be retried.
- Calculating a key-dependent cryptographic identifier and returning it
to userspace.
- Allowing keys to be added and removed by non-root users, but only keys
for v2 encryption policies; and to prevent denial-of-service attacks,
users can only remove keys they themselves have added, and a key is
only really removed after all users who added it have removed it.
Trying to shoehorn these semantics into the keyrings syscalls would be
very difficult, whereas the ioctls make things much easier.
However, to reuse code the implementation still uses the keyrings
service internally. Thus we get lockless RCU-mode key lookups without
having to re-implement it, and the keys automatically show up in
/proc/keys for debugging purposes.
References:
[1] https://github.com/google/fscrypt
[2] https://goo.gl/55cCrI#heading=h.vf09isp98isb
[3] https://github.com/google/fscrypt/issues/111#issuecomment-444347939
[4] https://github.com/google/fscrypt/issues/116
[5] https://bugs.launchpad.net/ubuntu/+source/fscrypt/+bug/1770715
[6] https://github.com/google/fscrypt/issues/128
[7] https://askubuntu.com/questions/1130306/cannot-run-docker-on-an-encrypted-filesystem
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-08-05 10:35:46 +08:00
|
|
|
err = fscrypt_init_keyring();
|
|
|
|
if (err)
|
|
|
|
goto fail_free_info;
|
|
|
|
|
2015-05-16 07:26:10 +08:00
|
|
|
return 0;
|
|
|
|
|
fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl
Add a new fscrypt ioctl, FS_IOC_ADD_ENCRYPTION_KEY. This ioctl adds an
encryption key to the filesystem's fscrypt keyring ->s_master_keys,
making any files encrypted with that key appear "unlocked".
Why we need this
~~~~~~~~~~~~~~~~
The main problem is that the "locked/unlocked" (ciphertext/plaintext)
status of encrypted files is global, but the fscrypt keys are not.
fscrypt only looks for keys in the keyring(s) the process accessing the
filesystem is subscribed to: the thread keyring, process keyring, and
session keyring, where the session keyring may contain the user keyring.
Therefore, userspace has to put fscrypt keys in the keyrings for
individual users or sessions. But this means that when a process with a
different keyring tries to access encrypted files, whether they appear
"unlocked" or not is nondeterministic. This is because it depends on
whether the files are currently present in the inode cache.
Fixing this by consistently providing each process its own view of the
filesystem depending on whether it has the key or not isn't feasible due
to how the VFS caches work. Furthermore, while sometimes users expect
this behavior, it is misguided for two reasons. First, it would be an
OS-level access control mechanism largely redundant with existing access
control mechanisms such as UNIX file permissions, ACLs, LSMs, etc.
Encryption is actually for protecting the data at rest.
Second, almost all users of fscrypt actually do need the keys to be
global. The largest users of fscrypt, Android and Chromium OS, achieve
this by having PID 1 create a "session keyring" that is inherited by
every process. This works, but it isn't scalable because it prevents
session keyrings from being used for any other purpose.
On general-purpose Linux distros, the 'fscrypt' userspace tool [1] can't
similarly abuse the session keyring, so to make 'sudo' work on all
systems it has to link all the user keyrings into root's user keyring
[2]. This is ugly and raises security concerns. Moreover it can't make
the keys available to system services, such as sshd trying to access the
user's '~/.ssh' directory (see [3], [4]) or NetworkManager trying to
read certificates from the user's home directory (see [5]); or to Docker
containers (see [6], [7]).
By having an API to add a key to the *filesystem* we'll be able to fix
the above bugs, remove userspace workarounds, and clearly express the
intended semantics: the locked/unlocked status of an encrypted directory
is global, and encryption is orthogonal to OS-level access control.
Why not use the add_key() syscall
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We use an ioctl for this API rather than the existing add_key() system
call because the ioctl gives us the flexibility needed to implement
fscrypt-specific semantics that will be introduced in later patches:
- Supporting key removal with the semantics such that the secret is
removed immediately and any unused inodes using the key are evicted;
also, the eviction of any in-use inodes can be retried.
- Calculating a key-dependent cryptographic identifier and returning it
to userspace.
- Allowing keys to be added and removed by non-root users, but only keys
for v2 encryption policies; and to prevent denial-of-service attacks,
users can only remove keys they themselves have added, and a key is
only really removed after all users who added it have removed it.
Trying to shoehorn these semantics into the keyrings syscalls would be
very difficult, whereas the ioctls make things much easier.
However, to reuse code the implementation still uses the keyrings
service internally. Thus we get lockless RCU-mode key lookups without
having to re-implement it, and the keys automatically show up in
/proc/keys for debugging purposes.
References:
[1] https://github.com/google/fscrypt
[2] https://goo.gl/55cCrI#heading=h.vf09isp98isb
[3] https://github.com/google/fscrypt/issues/111#issuecomment-444347939
[4] https://github.com/google/fscrypt/issues/116
[5] https://bugs.launchpad.net/ubuntu/+source/fscrypt/+bug/1770715
[6] https://github.com/google/fscrypt/issues/128
[7] https://askubuntu.com/questions/1130306/cannot-run-docker-on-an-encrypted-filesystem
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-08-05 10:35:46 +08:00
|
|
|
fail_free_info:
|
|
|
|
kmem_cache_destroy(fscrypt_info_cachep);
|
2015-05-16 07:26:10 +08:00
|
|
|
fail_free_ctx:
|
|
|
|
kmem_cache_destroy(fscrypt_ctx_cachep);
|
|
|
|
fail_free_queue:
|
|
|
|
destroy_workqueue(fscrypt_read_workqueue);
|
|
|
|
fail:
|
fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl
Add a new fscrypt ioctl, FS_IOC_ADD_ENCRYPTION_KEY. This ioctl adds an
encryption key to the filesystem's fscrypt keyring ->s_master_keys,
making any files encrypted with that key appear "unlocked".
Why we need this
~~~~~~~~~~~~~~~~
The main problem is that the "locked/unlocked" (ciphertext/plaintext)
status of encrypted files is global, but the fscrypt keys are not.
fscrypt only looks for keys in the keyring(s) the process accessing the
filesystem is subscribed to: the thread keyring, process keyring, and
session keyring, where the session keyring may contain the user keyring.
Therefore, userspace has to put fscrypt keys in the keyrings for
individual users or sessions. But this means that when a process with a
different keyring tries to access encrypted files, whether they appear
"unlocked" or not is nondeterministic. This is because it depends on
whether the files are currently present in the inode cache.
Fixing this by consistently providing each process its own view of the
filesystem depending on whether it has the key or not isn't feasible due
to how the VFS caches work. Furthermore, while sometimes users expect
this behavior, it is misguided for two reasons. First, it would be an
OS-level access control mechanism largely redundant with existing access
control mechanisms such as UNIX file permissions, ACLs, LSMs, etc.
Encryption is actually for protecting the data at rest.
Second, almost all users of fscrypt actually do need the keys to be
global. The largest users of fscrypt, Android and Chromium OS, achieve
this by having PID 1 create a "session keyring" that is inherited by
every process. This works, but it isn't scalable because it prevents
session keyrings from being used for any other purpose.
On general-purpose Linux distros, the 'fscrypt' userspace tool [1] can't
similarly abuse the session keyring, so to make 'sudo' work on all
systems it has to link all the user keyrings into root's user keyring
[2]. This is ugly and raises security concerns. Moreover it can't make
the keys available to system services, such as sshd trying to access the
user's '~/.ssh' directory (see [3], [4]) or NetworkManager trying to
read certificates from the user's home directory (see [5]); or to Docker
containers (see [6], [7]).
By having an API to add a key to the *filesystem* we'll be able to fix
the above bugs, remove userspace workarounds, and clearly express the
intended semantics: the locked/unlocked status of an encrypted directory
is global, and encryption is orthogonal to OS-level access control.
Why not use the add_key() syscall
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We use an ioctl for this API rather than the existing add_key() system
call because the ioctl gives us the flexibility needed to implement
fscrypt-specific semantics that will be introduced in later patches:
- Supporting key removal with the semantics such that the secret is
removed immediately and any unused inodes using the key are evicted;
also, the eviction of any in-use inodes can be retried.
- Calculating a key-dependent cryptographic identifier and returning it
to userspace.
- Allowing keys to be added and removed by non-root users, but only keys
for v2 encryption policies; and to prevent denial-of-service attacks,
users can only remove keys they themselves have added, and a key is
only really removed after all users who added it have removed it.
Trying to shoehorn these semantics into the keyrings syscalls would be
very difficult, whereas the ioctls make things much easier.
However, to reuse code the implementation still uses the keyrings
service internally. Thus we get lockless RCU-mode key lookups without
having to re-implement it, and the keys automatically show up in
/proc/keys for debugging purposes.
References:
[1] https://github.com/google/fscrypt
[2] https://goo.gl/55cCrI#heading=h.vf09isp98isb
[3] https://github.com/google/fscrypt/issues/111#issuecomment-444347939
[4] https://github.com/google/fscrypt/issues/116
[5] https://bugs.launchpad.net/ubuntu/+source/fscrypt/+bug/1770715
[6] https://github.com/google/fscrypt/issues/128
[7] https://askubuntu.com/questions/1130306/cannot-run-docker-on-an-encrypted-filesystem
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
2019-08-05 10:35:46 +08:00
|
|
|
return err;
|
2015-05-16 07:26:10 +08:00
|
|
|
}
|
2019-07-25 02:07:57 +08:00
|
|
|
late_initcall(fscrypt_init)
|