OpenCloudOS-Kernel/include/linux/ext3_fs_i.h

/*
 *  linux/include/linux/ext3_fs_i.h
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/include/linux/minix_fs_i.h
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#ifndef _LINUX_EXT3_FS_I
#define _LINUX_EXT3_FS_I

#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/mutex.h>

/* data type for block offset of block group */
typedef int ext3_grpblk_t;

/* data type for filesystem-wide blocks number */
typedef unsigned long ext3_fsblk_t;

#define E3FSBLK "%lu"

struct ext3_reserve_window {
	ext3_fsblk_t	_rsv_start;	/* First byte reserved */
	ext3_fsblk_t	_rsv_end;	/* Last byte reserved or 0 */
};

struct ext3_reserve_window_node {
	struct rb_node		rsv_node;
	__u32			rsv_goal_size;
	__u32			rsv_alloc_hit;
	struct ext3_reserve_window	rsv_window;
};

struct ext3_block_alloc_info {
	/* information about reservation window */
	struct ext3_reserve_window_node	rsv_window_node;
	/*
	 * was i_next_alloc_block in ext3_inode_info
	 * is the logical (file-relative) number of the
	 * most-recently-allocated block in this file.
	 * We use this for detecting linearly ascending allocation requests.
	 */
	__u32                   last_alloc_logical_block;
	/*
	 * Was i_next_alloc_goal in ext3_inode_info
	 * is the *physical* companion to i_next_alloc_block.
	 * it the physical block number of the block which was most-recentl
	 * allocated to this file.  This give us the goal (target) for the next
	 * allocation when we detect linearly ascending requests.
	 */
	ext3_fsblk_t		last_alloc_physical_block;
};

#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end

/*
 * third extended file system inode data in memory
 */
struct ext3_inode_info {
	__le32	i_data[15];	/* unconverted */
	__u32	i_flags;
#ifdef EXT3_FRAGMENTS
	__u32	i_faddr;
	__u8	i_frag_no;
	__u8	i_frag_size;
#endif
	ext3_fsblk_t	i_file_acl;
	__u32	i_dir_acl;
	__u32	i_dtime;

	/*
	 * i_block_group is the number of the block group which contains
	 * this file's inode.  Constant across the lifetime of the inode,
	 * it is ued for making block allocation decisions - we try to
	 * place a file's data blocks near its inode block, and new inodes
	 * near to their parent directory's inode.
	 */
	__u32	i_block_group;
	unsigned long	i_state_flags;	/* Dynamic state flags for ext3 */

	/* block reservation info */
	struct ext3_block_alloc_info *i_block_alloc_info;

	__u32	i_dir_start_lookup;
#ifdef CONFIG_EXT3_FS_XATTR
	/*
	 * Extended attributes can be read independently of the main file
	 * data. Taking i_mutex even when reading would cause contention
	 * between readers of EAs and writers of regular file data, so
	 * instead we synchronize on xattr_sem when reading or changing
	 * EAs.
	 */
	struct rw_semaphore xattr_sem;
#endif

	struct list_head i_orphan;	/* unlinked but open inodes */

	/*
	 * i_disksize keeps track of what the inode size is ON DISK, not
	 * in memory.  During truncate, i_size is set to the new size by
	 * the VFS prior to calling ext3_truncate(), but the filesystem won't
	 * set i_disksize to 0 until the truncate is actually under way.
	 *
	 * The intent is that i_disksize always represents the blocks which
	 * are used by this file.  This allows recovery to restart truncate
	 * on orphans if we crash during truncate.  We actually write i_disksize
	 * into the on-disk inode when writing inodes out, instead of i_size.
	 *
	 * The only time when i_disksize and i_size may be different is when
	 * a truncate is in progress.  The only things which change i_disksize
	 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
	 */
	loff_t	i_disksize;

	/* on-disk additional length */
	__u16 i_extra_isize;

	/*
	 * truncate_mutex is for serialising ext3_truncate() against
	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
	 * data tree are chopped off during truncate. We can't do that in
	 * ext3 because whenever we perform intermediate commits during
	 * truncate, the inode and all the metadata blocks *must* be in a
	 * consistent state which allows truncation of the orphans to restart
	 * during recovery.  Hence we must fix the get_block-vs-truncate race
	 * by other means, so we have truncate_mutex.
	 */
	struct mutex truncate_mutex;

	/*
	 * Transactions that contain inode's metadata needed to complete
	 * fsync and fdatasync, respectively.
	 */
	atomic_t i_sync_tid;
	atomic_t i_datasync_tid;

	struct inode vfs_inode;
};

#endif	/* _LINUX_EXT3_FS_I */
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`/*`
			`* linux/include/linux/ext3_fs_i.h`
			`*`
			`* Copyright (C) 1992, 1993, 1994, 1995`
			`* Remy Card (card@masi.ibp.fr)`
			`* Laboratoire MASI - Institut Blaise Pascal`
			`* Universite Pierre et Marie Curie (Paris VI)`
			`*`
			`* from`
			`*`
			`* linux/include/linux/minix_fs_i.h`
			`*`
			`* Copyright (C) 1991, 1992 Linus Torvalds`
			`*/`

			`#ifndef _LINUX_EXT3_FS_I`
			`#define _LINUX_EXT3_FS_I`

			`#include <linux/rwsem.h>`
			`#include <linux/rbtree.h>`
			`#include <linux/seqlock.h>`
[PATCH] convert ext3's truncate_sem to a mutex ext3's truncate_sem is always released in the same function it's taken and it otherwise is a mutex as well.. Signed-off-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-03-23 19:00:42 +08:00			`#include <linux/mutex.h>`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00
[PATCH] ext3_fsblk_t: filesystem, group blocks and bug fixes Some of the in-kernel ext3 block variable type are treated as signed 4 bytes int type, thus limited ext3 filesystem to 8TB (4kblock size based). While trying to fix them, it seems quite confusing in the ext3 code where some blocks are filesystem-wide blocks, some are group relative offsets that need to be signed value (as -1 has special meaning). So it seem saner to define two types of physical blocks: one is filesystem wide blocks, another is group-relative blocks. The following patches clarify these two types of blocks in the ext3 code, and fix the type bugs which limit current 32 bit ext3 filesystem limit to 8TB. With this series of patches and the percpu counter data type changes in the mm tree, we are able to extend exts filesystem limit to 16TB. This work is also a pre-request for the recent >32 bit ext3 work, and makes the kernel to able to address 48 bit ext3 block a lot easier: Simply redefine ext3_fsblk_t from unsigned long to sector_t and redefine the format string for ext3 filesystem block corresponding. Two RFC with a series patches have been posted to ext2-devel list and have been reviewed and discussed: http://marc.theaimsgroup.com/?l=ext2-devel&m=114722190816690&w=2 http://marc.theaimsgroup.com/?l=ext2-devel&m=114784919525942&w=2 Patches are tested on both 32 bit machine and 64 bit machine, <8TB ext3 and >8TB ext3 filesystem(with the latest to be released e2fsprogs-1.39). Tests includes overnight fsx, tiobench, dbench and fsstress. This patch: Defines ext3_fsblk_t and ext3_grpblk_t, and the printk format string for filesystem wide blocks. This patch classifies all block group relative blocks, and ext3_fsblk_t blocks occurs in the same function where used to be confusing before. Also include kernel bug fixes for filesystem wide in-kernel block variables. There are some fileystem wide blocks are treated as int/unsigned int type in the kernel currently, especially in ext3 block allocation and reservation code. This patch fixed those bugs by converting those variables to ext3_fsblk_t(unsigned long) type. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-06-25 20:48:06 +08:00			`/* data type for block offset of block group */`
			`typedef int ext3_grpblk_t;`

			`/* data type for filesystem-wide blocks number */`
			`typedef unsigned long ext3_fsblk_t;`

			`#define E3FSBLK "%lu"`

Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`struct ext3_reserve_window {`
[PATCH] ext3_fsblk_t: the rest of in-kernel filesystem blocks conversion Convert the ext3 in-kernel filesystem blocks to ext3_fsblk_t. Convert the rest of all unsigned long type in-kernel filesystem blocks to ext3_fsblk_t, and replace the printk format string respondingly. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-06-25 20:48:07 +08:00			`ext3_fsblk_t _rsv_start; /* First byte reserved */`
			`ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`};`

			`struct ext3_reserve_window_node {`
[PATCH] ext3: More whitespace cleanups More white space cleanups in preparation of cloning ext4 from ext3. Removing spaces that precede a tab. Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-09-27 16:49:35 +08:00			`struct rb_node rsv_node;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`__u32 rsv_goal_size;`
			`__u32 rsv_alloc_hit;`
			`struct ext3_reserve_window rsv_window;`
			`};`

			`struct ext3_block_alloc_info {`
			`/* information about reservation window */`
			`struct ext3_reserve_window_node rsv_window_node;`
			`/*`
			`* was i_next_alloc_block in ext3_inode_info`
			`* is the logical (file-relative) number of the`
			`* most-recently-allocated block in this file.`
			`* We use this for detecting linearly ascending allocation requests.`
			`*/`
			`__u32 last_alloc_logical_block;`
			`/*`
			`* Was i_next_alloc_goal in ext3_inode_info`
			`* is the physical companion to i_next_alloc_block.`
Fix occurrences of "the the " Signed-off-by: Michael Opdenacker <michael@free-electrons.com> Signed-off-by: Adrian Bunk <bunk@stusta.de> 2007-05-09 14:57:56 +08:00			`* it the physical block number of the block which was most-recentl`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`* allocated to this file. This give us the goal (target) for the next`
			`* allocation when we detect linearly ascending requests.`
			`*/`
[PATCH] ext3_fsblk_t: the rest of in-kernel filesystem blocks conversion Convert the ext3 in-kernel filesystem blocks to ext3_fsblk_t. Convert the rest of all unsigned long type in-kernel filesystem blocks to ext3_fsblk_t, and replace the printk format string respondingly. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-06-25 20:48:07 +08:00			`ext3_fsblk_t last_alloc_physical_block;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`};`

			`#define rsv_start rsv_window._rsv_start`
			`#define rsv_end rsv_window._rsv_end`

			`/*`
			`* third extended file system inode data in memory`
			`*/`
			`struct ext3_inode_info {`
			`__le32 i_data[15]; /* unconverted */`
			`__u32 i_flags;`
			`#ifdef EXT3_FRAGMENTS`
			`__u32 i_faddr;`
			`__u8 i_frag_no;`
			`__u8 i_frag_size;`
			`#endif`
[PATCH] ext3_fsblk_t: the rest of in-kernel filesystem blocks conversion Convert the ext3 in-kernel filesystem blocks to ext3_fsblk_t. Convert the rest of all unsigned long type in-kernel filesystem blocks to ext3_fsblk_t, and replace the printk format string respondingly. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-06-25 20:48:07 +08:00			`ext3_fsblk_t i_file_acl;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`__u32 i_dir_acl;`
			`__u32 i_dtime;`

			`/*`
			`* i_block_group is the number of the block group which contains`
			`* this file's inode. Constant across the lifetime of the inode,`
			`* it is ued for making block allocation decisions - we try to`
			`* place a file's data blocks near its inode block, and new inodes`
			`* near to their parent directory's inode.`
			`*/`
			`__u32 i_block_group;`
ext3: fix broken handling of EXT3_STATE_NEW In commit 9df93939b735 ("ext3: Use bitops to read/modify EXT3_I(inode)->i_state") ext3 changed its internal 'i_state' variable to use bitops for its state handling. However, unline the same ext4 change, it didn't actually change the name of the field when it changed the semantics of it. As a result, an old use of 'i_state' remained in fs/ext3/ialloc.c that initialized the field to EXT3_STATE_NEW. And that does not work _at_all_ when we're now working with individually named bits rather than values that get masked. So the code tried to mark the state to be new, but in actual fact set the field to EXT3_STATE_JDATA. Which makes no sense at all, and screws up all the code that checks whether the inode was newly allocated. In particular, it made the xattr code unhappy, and caused various random behavior, like apparently https://bugzilla.redhat.com/show_bug.cgi?id=577911 So fix the initialization, and rename the field to match ext4 so that we don't have this happen again. Cc: James Morris <jmorris@namei.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: Daniel J Walsh <dwalsh@redhat.com> Cc: Eric Paris <eparis@redhat.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2010-03-30 05:30:19 +08:00			`unsigned long i_state_flags; /* Dynamic state flags for ext3 */`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00
			`/* block reservation info */`
			`struct ext3_block_alloc_info *i_block_alloc_info;`

			`__u32 i_dir_start_lookup;`
			`#ifdef CONFIG_EXT3_FS_XATTR`
			`/*`
			`* Extended attributes can be read independently of the main file`
[PATCH] mutex subsystem, semaphore to mutex: VFS, ->i_sem This patch converts the inode semaphore to a mutex. I have tested it on XFS and compiled as much as one can consider on an ia64. Anyway your luck with it might be different. Modified-by: Ingo Molnar <mingo@elte.hu> (finished the conversion) Signed-off-by: Jes Sorensen <jes@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> 2006-01-10 07:59:24 +08:00			`* data. Taking i_mutex even when reading would cause contention`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`* between readers of EAs and writers of regular file data, so`
			`* instead we synchronize on xattr_sem when reading or changing`
			`* EAs.`
			`*/`
			`struct rw_semaphore xattr_sem;`
			`#endif`

			`struct list_head i_orphan; /* unlinked but open inodes */`

			`/*`
			`* i_disksize keeps track of what the inode size is ON DISK, not`
			`* in memory. During truncate, i_size is set to the new size by`
			`* the VFS prior to calling ext3_truncate(), but the filesystem won't`
			`* set i_disksize to 0 until the truncate is actually under way.`
			`*`
			`* The intent is that i_disksize always represents the blocks which`
			`* are used by this file. This allows recovery to restart truncate`
			`* on orphans if we crash during truncate. We actually write i_disksize`
			`* into the on-disk inode when writing inodes out, instead of i_size.`
			`*`
			`* The only time when i_disksize and i_size may be different is when`
			`* a truncate is in progress. The only things which change i_disksize`
			`* are ext3_get_block (growth) and ext3_truncate (shrinkth).`
			`*/`
			`loff_t i_disksize;`

			`/* on-disk additional length */`
			`__u16 i_extra_isize;`

			`/*`
[PATCH] convert ext3's truncate_sem to a mutex ext3's truncate_sem is always released in the same function it's taken and it otherwise is a mutex as well.. Signed-off-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-03-23 19:00:42 +08:00			`* truncate_mutex is for serialising ext3_truncate() against`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`* ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's`
			`* data tree are chopped off during truncate. We can't do that in`
			`* ext3 because whenever we perform intermediate commits during`
			`* truncate, the inode and all the metadata blocks must be in a`
			`* consistent state which allows truncation of the orphans to restart`
			`* during recovery. Hence we must fix the get_block-vs-truncate race`
[PATCH] convert ext3's truncate_sem to a mutex ext3's truncate_sem is always released in the same function it's taken and it otherwise is a mutex as well.. Signed-off-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-03-23 19:00:42 +08:00			`* by other means, so we have truncate_mutex.`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`*/`
[PATCH] convert ext3's truncate_sem to a mutex ext3's truncate_sem is always released in the same function it's taken and it otherwise is a mutex as well.. Signed-off-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-03-23 19:00:42 +08:00			`struct mutex truncate_mutex;`
ext3: Wait for proper transaction commit on fsync We cannot rely on buffer dirty bits during fsync because pdflush can come before fsync is called and clear dirty bits without forcing a transaction commit. What we do is that we track which transaction has last changed the inode and which transaction last changed allocation and force it to disk on fsync. Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 2009-10-17 01:26:15 +08:00
			`/*`
			`* Transactions that contain inode's metadata needed to complete`
			`* fsync and fdatasync, respectively.`
			`*/`
			`atomic_t i_sync_tid;`
			`atomic_t i_datasync_tid;`

Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 06:20:36 +08:00			`struct inode vfs_inode;`
			`};`

			`#endif /* _LINUX_EXT3_FS_I */`