2046 lines
53 KiB
C
2046 lines
53 KiB
C
/* -*- mode: c; c-basic-offset: 8; -*-
|
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
|
*
|
|
* alloc.c
|
|
*
|
|
* Extent allocs and frees
|
|
*
|
|
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/types.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/highmem.h>
|
|
|
|
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
|
|
#include <cluster/masklog.h>
|
|
|
|
#include "ocfs2.h"
|
|
|
|
#include "alloc.h"
|
|
#include "dlmglue.h"
|
|
#include "extent_map.h"
|
|
#include "inode.h"
|
|
#include "journal.h"
|
|
#include "localalloc.h"
|
|
#include "suballoc.h"
|
|
#include "sysfile.h"
|
|
#include "file.h"
|
|
#include "super.h"
|
|
#include "uptodate.h"
|
|
|
|
#include "buffer_head_io.h"
|
|
|
|
static int ocfs2_extent_contig(struct inode *inode,
|
|
struct ocfs2_extent_rec *ext,
|
|
u64 blkno);
|
|
|
|
static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
int wanted,
|
|
struct ocfs2_alloc_context *meta_ac,
|
|
struct buffer_head *bhs[]);
|
|
|
|
static int ocfs2_add_branch(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct buffer_head *eb_bh,
|
|
struct buffer_head *last_eb_bh,
|
|
struct ocfs2_alloc_context *meta_ac);
|
|
|
|
static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct ocfs2_alloc_context *meta_ac,
|
|
struct buffer_head **ret_new_eb_bh);
|
|
|
|
static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
u64 blkno,
|
|
u32 new_clusters);
|
|
|
|
static int ocfs2_find_branch_target(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct buffer_head **target_bh);
|
|
|
|
static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct ocfs2_dinode *fe,
|
|
unsigned int new_i_clusters,
|
|
struct buffer_head *old_last_eb,
|
|
struct buffer_head **new_last_eb);
|
|
|
|
static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
|
|
|
|
static int ocfs2_extent_contig(struct inode *inode,
|
|
struct ocfs2_extent_rec *ext,
|
|
u64 blkno)
|
|
{
|
|
return blkno == (le64_to_cpu(ext->e_blkno) +
|
|
ocfs2_clusters_to_blocks(inode->i_sb,
|
|
le32_to_cpu(ext->e_clusters)));
|
|
}
|
|
|
|
/*
|
|
* How many free extents have we got before we need more meta data?
|
|
*/
|
|
int ocfs2_num_free_extents(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct ocfs2_dinode *fe)
|
|
{
|
|
int retval;
|
|
struct ocfs2_extent_list *el;
|
|
struct ocfs2_extent_block *eb;
|
|
struct buffer_head *eb_bh = NULL;
|
|
|
|
mlog_entry_void();
|
|
|
|
if (!OCFS2_IS_VALID_DINODE(fe)) {
|
|
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
|
|
retval = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
if (fe->i_last_eb_blk) {
|
|
retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
|
|
&eb_bh, OCFS2_BH_CACHED, inode);
|
|
if (retval < 0) {
|
|
mlog_errno(retval);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
|
|
el = &eb->h_list;
|
|
} else
|
|
el = &fe->id2.i_list;
|
|
|
|
BUG_ON(el->l_tree_depth != 0);
|
|
|
|
retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
|
|
bail:
|
|
if (eb_bh)
|
|
brelse(eb_bh);
|
|
|
|
mlog_exit(retval);
|
|
return retval;
|
|
}
|
|
|
|
/* expects array to already be allocated
|
|
*
|
|
* sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
|
|
* l_count for you
|
|
*/
|
|
static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
int wanted,
|
|
struct ocfs2_alloc_context *meta_ac,
|
|
struct buffer_head *bhs[])
|
|
{
|
|
int count, status, i;
|
|
u16 suballoc_bit_start;
|
|
u32 num_got;
|
|
u64 first_blkno;
|
|
struct ocfs2_extent_block *eb;
|
|
|
|
mlog_entry_void();
|
|
|
|
count = 0;
|
|
while (count < wanted) {
|
|
status = ocfs2_claim_metadata(osb,
|
|
handle,
|
|
meta_ac,
|
|
wanted - count,
|
|
&suballoc_bit_start,
|
|
&num_got,
|
|
&first_blkno);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
for(i = count; i < (num_got + count); i++) {
|
|
bhs[i] = sb_getblk(osb->sb, first_blkno);
|
|
if (bhs[i] == NULL) {
|
|
status = -EIO;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
|
|
|
|
status = ocfs2_journal_access(handle, inode, bhs[i],
|
|
OCFS2_JOURNAL_ACCESS_CREATE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
|
|
eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
|
|
/* Ok, setup the minimal stuff here. */
|
|
strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
|
|
eb->h_blkno = cpu_to_le64(first_blkno);
|
|
eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
|
|
|
|
#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
|
|
/* we always use slot zero's suballocator */
|
|
eb->h_suballoc_slot = 0;
|
|
#else
|
|
eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
|
|
#endif
|
|
eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
|
|
eb->h_list.l_count =
|
|
cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
|
|
|
|
suballoc_bit_start++;
|
|
first_blkno++;
|
|
|
|
/* We'll also be dirtied by the caller, so
|
|
* this isn't absolutely necessary. */
|
|
status = ocfs2_journal_dirty(handle, bhs[i]);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
count += num_got;
|
|
}
|
|
|
|
status = 0;
|
|
bail:
|
|
if (status < 0) {
|
|
for(i = 0; i < wanted; i++) {
|
|
if (bhs[i])
|
|
brelse(bhs[i]);
|
|
bhs[i] = NULL;
|
|
}
|
|
}
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* Add an entire tree branch to our inode. eb_bh is the extent block
|
|
* to start at, if we don't want to start the branch at the dinode
|
|
* structure.
|
|
*
|
|
* last_eb_bh is required as we have to update it's next_leaf pointer
|
|
* for the new last extent block.
|
|
*
|
|
* the new branch will be 'empty' in the sense that every block will
|
|
* contain a single record with e_clusters == 0.
|
|
*/
|
|
static int ocfs2_add_branch(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct buffer_head *eb_bh,
|
|
struct buffer_head *last_eb_bh,
|
|
struct ocfs2_alloc_context *meta_ac)
|
|
{
|
|
int status, new_blocks, i;
|
|
u64 next_blkno, new_last_eb_blk;
|
|
struct buffer_head *bh;
|
|
struct buffer_head **new_eb_bhs = NULL;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *eb_el;
|
|
struct ocfs2_extent_list *el;
|
|
|
|
mlog_entry_void();
|
|
|
|
BUG_ON(!last_eb_bh);
|
|
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
|
|
if (eb_bh) {
|
|
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
|
|
el = &eb->h_list;
|
|
} else
|
|
el = &fe->id2.i_list;
|
|
|
|
/* we never add a branch to a leaf. */
|
|
BUG_ON(!el->l_tree_depth);
|
|
|
|
new_blocks = le16_to_cpu(el->l_tree_depth);
|
|
|
|
/* allocate the number of new eb blocks we need */
|
|
new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
|
|
GFP_KERNEL);
|
|
if (!new_eb_bhs) {
|
|
status = -ENOMEM;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
|
|
meta_ac, new_eb_bhs);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
|
|
* linked with the rest of the tree.
|
|
* conversly, new_eb_bhs[0] is the new bottommost leaf.
|
|
*
|
|
* when we leave the loop, new_last_eb_blk will point to the
|
|
* newest leaf, and next_blkno will point to the topmost extent
|
|
* block. */
|
|
next_blkno = new_last_eb_blk = 0;
|
|
for(i = 0; i < new_blocks; i++) {
|
|
bh = new_eb_bhs[i];
|
|
eb = (struct ocfs2_extent_block *) bh->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
eb_el = &eb->h_list;
|
|
|
|
status = ocfs2_journal_access(handle, inode, bh,
|
|
OCFS2_JOURNAL_ACCESS_CREATE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
eb->h_next_leaf_blk = 0;
|
|
eb_el->l_tree_depth = cpu_to_le16(i);
|
|
eb_el->l_next_free_rec = cpu_to_le16(1);
|
|
eb_el->l_recs[0].e_cpos = fe->i_clusters;
|
|
eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
|
|
eb_el->l_recs[0].e_clusters = cpu_to_le32(0);
|
|
if (!eb_el->l_tree_depth)
|
|
new_last_eb_blk = le64_to_cpu(eb->h_blkno);
|
|
|
|
status = ocfs2_journal_dirty(handle, bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
next_blkno = le64_to_cpu(eb->h_blkno);
|
|
}
|
|
|
|
/* This is a bit hairy. We want to update up to three blocks
|
|
* here without leaving any of them in an inconsistent state
|
|
* in case of error. We don't have to worry about
|
|
* journal_dirty erroring as it won't unless we've aborted the
|
|
* handle (in which case we would never be here) so reserving
|
|
* the write with journal_access is all we need to do. */
|
|
status = ocfs2_journal_access(handle, inode, last_eb_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
status = ocfs2_journal_access(handle, inode, fe_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
if (eb_bh) {
|
|
status = ocfs2_journal_access(handle, inode, eb_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
/* Link the new branch into the rest of the tree (el will
|
|
* either be on the fe, or the extent block passed in. */
|
|
i = le16_to_cpu(el->l_next_free_rec);
|
|
el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
|
|
el->l_recs[i].e_cpos = fe->i_clusters;
|
|
el->l_recs[i].e_clusters = 0;
|
|
le16_add_cpu(&el->l_next_free_rec, 1);
|
|
|
|
/* fe needs a new last extent block pointer, as does the
|
|
* next_leaf on the previously last-extent-block. */
|
|
fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
|
|
|
|
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
|
|
eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
|
|
|
|
status = ocfs2_journal_dirty(handle, last_eb_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
status = ocfs2_journal_dirty(handle, fe_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
if (eb_bh) {
|
|
status = ocfs2_journal_dirty(handle, eb_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
}
|
|
|
|
status = 0;
|
|
bail:
|
|
if (new_eb_bhs) {
|
|
for (i = 0; i < new_blocks; i++)
|
|
if (new_eb_bhs[i])
|
|
brelse(new_eb_bhs[i]);
|
|
kfree(new_eb_bhs);
|
|
}
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* adds another level to the allocation tree.
|
|
* returns back the new extent block so you can add a branch to it
|
|
* after this call.
|
|
*/
|
|
static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct ocfs2_alloc_context *meta_ac,
|
|
struct buffer_head **ret_new_eb_bh)
|
|
{
|
|
int status, i;
|
|
struct buffer_head *new_eb_bh = NULL;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *fe_el;
|
|
struct ocfs2_extent_list *eb_el;
|
|
|
|
mlog_entry_void();
|
|
|
|
status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
|
|
&new_eb_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
eb_el = &eb->h_list;
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
fe_el = &fe->id2.i_list;
|
|
|
|
status = ocfs2_journal_access(handle, inode, new_eb_bh,
|
|
OCFS2_JOURNAL_ACCESS_CREATE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
/* copy the fe data into the new extent block */
|
|
eb_el->l_tree_depth = fe_el->l_tree_depth;
|
|
eb_el->l_next_free_rec = fe_el->l_next_free_rec;
|
|
for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
|
|
eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos;
|
|
eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
|
|
eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
|
|
}
|
|
|
|
status = ocfs2_journal_dirty(handle, new_eb_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
status = ocfs2_journal_access(handle, inode, fe_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
/* update fe now */
|
|
le16_add_cpu(&fe_el->l_tree_depth, 1);
|
|
fe_el->l_recs[0].e_cpos = 0;
|
|
fe_el->l_recs[0].e_blkno = eb->h_blkno;
|
|
fe_el->l_recs[0].e_clusters = fe->i_clusters;
|
|
for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
|
|
fe_el->l_recs[i].e_cpos = 0;
|
|
fe_el->l_recs[i].e_clusters = 0;
|
|
fe_el->l_recs[i].e_blkno = 0;
|
|
}
|
|
fe_el->l_next_free_rec = cpu_to_le16(1);
|
|
|
|
/* If this is our 1st tree depth shift, then last_eb_blk
|
|
* becomes the allocated extent block */
|
|
if (fe_el->l_tree_depth == cpu_to_le16(1))
|
|
fe->i_last_eb_blk = eb->h_blkno;
|
|
|
|
status = ocfs2_journal_dirty(handle, fe_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
*ret_new_eb_bh = new_eb_bh;
|
|
new_eb_bh = NULL;
|
|
status = 0;
|
|
bail:
|
|
if (new_eb_bh)
|
|
brelse(new_eb_bh);
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* Expects the tree to already have room in the rightmost leaf for the
|
|
* extent. Updates all the extent blocks (and the dinode) on the way
|
|
* down.
|
|
*/
|
|
static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
u64 start_blk,
|
|
u32 new_clusters)
|
|
{
|
|
int status, i, num_bhs = 0;
|
|
u64 next_blkno;
|
|
u16 next_free;
|
|
struct buffer_head **eb_bhs = NULL;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *el;
|
|
|
|
mlog_entry_void();
|
|
|
|
status = ocfs2_journal_access(handle, inode, fe_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
el = &fe->id2.i_list;
|
|
if (el->l_tree_depth) {
|
|
/* This is another operation where we want to be
|
|
* careful about our tree updates. An error here means
|
|
* none of the previous changes we made should roll
|
|
* forward. As a result, we have to record the buffers
|
|
* for this part of the tree in an array and reserve a
|
|
* journal write to them before making any changes. */
|
|
num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth);
|
|
eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *),
|
|
GFP_KERNEL);
|
|
if (!eb_bhs) {
|
|
status = -ENOMEM;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
i = 0;
|
|
while(el->l_tree_depth) {
|
|
next_free = le16_to_cpu(el->l_next_free_rec);
|
|
if (next_free == 0) {
|
|
ocfs2_error(inode->i_sb,
|
|
"Dinode %llu has a bad extent list",
|
|
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno);
|
|
|
|
BUG_ON(i >= num_bhs);
|
|
status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i],
|
|
OCFS2_BH_CACHED, inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
|
|
eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
status = ocfs2_journal_access(handle, inode, eb_bhs[i],
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
el = &eb->h_list;
|
|
i++;
|
|
/* When we leave this loop, eb_bhs[num_bhs - 1] will
|
|
* hold the bottom-most leaf extent block. */
|
|
}
|
|
BUG_ON(el->l_tree_depth);
|
|
|
|
el = &fe->id2.i_list;
|
|
/* If we have tree depth, then the fe update is
|
|
* trivial, and we want to switch el out for the
|
|
* bottom-most leaf in order to update it with the
|
|
* actual extent data below. */
|
|
next_free = le16_to_cpu(el->l_next_free_rec);
|
|
if (next_free == 0) {
|
|
ocfs2_error(inode->i_sb,
|
|
"Dinode %llu has a bad extent list",
|
|
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
|
|
new_clusters);
|
|
/* (num_bhs - 1) to avoid the leaf */
|
|
for(i = 0; i < (num_bhs - 1); i++) {
|
|
eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
|
|
el = &eb->h_list;
|
|
|
|
/* finally, make our actual change to the
|
|
* intermediate extent blocks. */
|
|
next_free = le16_to_cpu(el->l_next_free_rec);
|
|
le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
|
|
new_clusters);
|
|
|
|
status = ocfs2_journal_dirty(handle, eb_bhs[i]);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
}
|
|
BUG_ON(i != (num_bhs - 1));
|
|
/* note that the leaf block wasn't touched in
|
|
* the loop above */
|
|
eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data;
|
|
el = &eb->h_list;
|
|
BUG_ON(el->l_tree_depth);
|
|
}
|
|
|
|
/* yay, we can finally add the actual extent now! */
|
|
i = le16_to_cpu(el->l_next_free_rec) - 1;
|
|
if (le16_to_cpu(el->l_next_free_rec) &&
|
|
ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) {
|
|
le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters);
|
|
} else if (le16_to_cpu(el->l_next_free_rec) &&
|
|
(le32_to_cpu(el->l_recs[i].e_clusters) == 0)) {
|
|
/* having an empty extent at eof is legal. */
|
|
if (el->l_recs[i].e_cpos != fe->i_clusters) {
|
|
ocfs2_error(inode->i_sb,
|
|
"Dinode %llu trailing extent is bad: "
|
|
"cpos (%u) != number of clusters (%u)",
|
|
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
|
le32_to_cpu(el->l_recs[i].e_cpos),
|
|
le32_to_cpu(fe->i_clusters));
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
|
|
el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
|
|
} else {
|
|
/* No contiguous record, or no empty record at eof, so
|
|
* we add a new one. */
|
|
|
|
BUG_ON(le16_to_cpu(el->l_next_free_rec) >=
|
|
le16_to_cpu(el->l_count));
|
|
i = le16_to_cpu(el->l_next_free_rec);
|
|
|
|
el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
|
|
el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
|
|
el->l_recs[i].e_cpos = fe->i_clusters;
|
|
le16_add_cpu(&el->l_next_free_rec, 1);
|
|
}
|
|
|
|
/*
|
|
* extent_map errors are not fatal, so they are ignored outside
|
|
* of flushing the thing.
|
|
*/
|
|
status = ocfs2_extent_map_append(inode, &el->l_recs[i],
|
|
new_clusters);
|
|
if (status) {
|
|
mlog_errno(status);
|
|
ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters));
|
|
}
|
|
|
|
status = ocfs2_journal_dirty(handle, fe_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
if (fe->id2.i_list.l_tree_depth) {
|
|
status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
}
|
|
|
|
status = 0;
|
|
bail:
|
|
if (eb_bhs) {
|
|
for (i = 0; i < num_bhs; i++)
|
|
if (eb_bhs[i])
|
|
brelse(eb_bhs[i]);
|
|
kfree(eb_bhs);
|
|
}
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* Should only be called when there is no space left in any of the
|
|
* leaf nodes. What we want to do is find the lowest tree depth
|
|
* non-leaf extent block with room for new records. There are three
|
|
* valid results of this search:
|
|
*
|
|
* 1) a lowest extent block is found, then we pass it back in
|
|
* *lowest_eb_bh and return '0'
|
|
*
|
|
* 2) the search fails to find anything, but the dinode has room. We
|
|
* pass NULL back in *lowest_eb_bh, but still return '0'
|
|
*
|
|
* 3) the search fails to find anything AND the dinode is full, in
|
|
* which case we return > 0
|
|
*
|
|
* return status < 0 indicates an error.
|
|
*/
|
|
static int ocfs2_find_branch_target(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct buffer_head **target_bh)
|
|
{
|
|
int status = 0, i;
|
|
u64 blkno;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *el;
|
|
struct buffer_head *bh = NULL;
|
|
struct buffer_head *lowest_bh = NULL;
|
|
|
|
mlog_entry_void();
|
|
|
|
*target_bh = NULL;
|
|
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
el = &fe->id2.i_list;
|
|
|
|
while(le16_to_cpu(el->l_tree_depth) > 1) {
|
|
if (le16_to_cpu(el->l_next_free_rec) == 0) {
|
|
ocfs2_error(inode->i_sb, "Dinode %llu has empty "
|
|
"extent list (next_free_rec == 0)",
|
|
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
i = le16_to_cpu(el->l_next_free_rec) - 1;
|
|
blkno = le64_to_cpu(el->l_recs[i].e_blkno);
|
|
if (!blkno) {
|
|
ocfs2_error(inode->i_sb, "Dinode %llu has extent "
|
|
"list where extent # %d has no physical "
|
|
"block start",
|
|
(unsigned long long)OCFS2_I(inode)->ip_blkno, i);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
if (bh) {
|
|
brelse(bh);
|
|
bh = NULL;
|
|
}
|
|
|
|
status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
|
|
inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
eb = (struct ocfs2_extent_block *) bh->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
el = &eb->h_list;
|
|
|
|
if (le16_to_cpu(el->l_next_free_rec) <
|
|
le16_to_cpu(el->l_count)) {
|
|
if (lowest_bh)
|
|
brelse(lowest_bh);
|
|
lowest_bh = bh;
|
|
get_bh(lowest_bh);
|
|
}
|
|
}
|
|
|
|
/* If we didn't find one and the fe doesn't have any room,
|
|
* then return '1' */
|
|
if (!lowest_bh
|
|
&& (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
|
|
status = 1;
|
|
|
|
*target_bh = lowest_bh;
|
|
bail:
|
|
if (bh)
|
|
brelse(bh);
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/* the caller needs to update fe->i_clusters */
|
|
int ocfs2_insert_extent(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
u64 start_blk,
|
|
u32 new_clusters,
|
|
struct ocfs2_alloc_context *meta_ac)
|
|
{
|
|
int status, i, shift;
|
|
struct buffer_head *last_eb_bh = NULL;
|
|
struct buffer_head *bh = NULL;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *el;
|
|
|
|
mlog_entry_void();
|
|
|
|
mlog(0, "add %u clusters starting at block %llu to inode %llu\n",
|
|
new_clusters, (unsigned long long)start_blk,
|
|
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
|
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
el = &fe->id2.i_list;
|
|
|
|
if (el->l_tree_depth) {
|
|
/* jump to end of tree */
|
|
status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
|
|
&last_eb_bh, OCFS2_BH_CACHED, inode);
|
|
if (status < 0) {
|
|
mlog_exit(status);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
|
|
el = &eb->h_list;
|
|
}
|
|
|
|
/* Can we allocate without adding/shifting tree bits? */
|
|
i = le16_to_cpu(el->l_next_free_rec) - 1;
|
|
if (le16_to_cpu(el->l_next_free_rec) == 0
|
|
|| (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count))
|
|
|| le32_to_cpu(el->l_recs[i].e_clusters) == 0
|
|
|| ocfs2_extent_contig(inode, &el->l_recs[i], start_blk))
|
|
goto out_add;
|
|
|
|
mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing "
|
|
"tree now.\n");
|
|
|
|
shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
|
|
if (shift < 0) {
|
|
status = shift;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
/* We traveled all the way to the bottom of the allocation tree
|
|
* and didn't find room for any more extents - we need to add
|
|
* another tree level */
|
|
if (shift) {
|
|
/* if we hit a leaf, we'd better be empty :) */
|
|
BUG_ON(le16_to_cpu(el->l_next_free_rec) !=
|
|
le16_to_cpu(el->l_count));
|
|
BUG_ON(bh);
|
|
mlog(0, "ocfs2_allocate_extent: need to shift tree depth "
|
|
"(current = %u)\n",
|
|
le16_to_cpu(fe->id2.i_list.l_tree_depth));
|
|
|
|
/* ocfs2_shift_tree_depth will return us a buffer with
|
|
* the new extent block (so we can pass that to
|
|
* ocfs2_add_branch). */
|
|
status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
|
|
meta_ac, &bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
/* Special case: we have room now if we shifted from
|
|
* tree_depth 0 */
|
|
if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1))
|
|
goto out_add;
|
|
}
|
|
|
|
/* call ocfs2_add_branch to add the final part of the tree with
|
|
* the new data. */
|
|
mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh);
|
|
status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
|
|
meta_ac);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
out_add:
|
|
/* Finally, we can add clusters. */
|
|
status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh,
|
|
start_blk, new_clusters);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
|
|
bail:
|
|
if (bh)
|
|
brelse(bh);
|
|
|
|
if (last_eb_bh)
|
|
brelse(last_eb_bh);
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
|
|
{
|
|
struct buffer_head *tl_bh = osb->osb_tl_bh;
|
|
struct ocfs2_dinode *di;
|
|
struct ocfs2_truncate_log *tl;
|
|
|
|
di = (struct ocfs2_dinode *) tl_bh->b_data;
|
|
tl = &di->id2.i_dealloc;
|
|
|
|
mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
|
|
"slot %d, invalid truncate log parameters: used = "
|
|
"%u, count = %u\n", osb->slot_num,
|
|
le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
|
|
return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
|
|
}
|
|
|
|
static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
|
|
unsigned int new_start)
|
|
{
|
|
unsigned int tail_index;
|
|
unsigned int current_tail;
|
|
|
|
/* No records, nothing to coalesce */
|
|
if (!le16_to_cpu(tl->tl_used))
|
|
return 0;
|
|
|
|
tail_index = le16_to_cpu(tl->tl_used) - 1;
|
|
current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
|
|
current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
|
|
|
|
return current_tail == new_start;
|
|
}
|
|
|
|
static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
u64 start_blk,
|
|
unsigned int num_clusters)
|
|
{
|
|
int status, index;
|
|
unsigned int start_cluster, tl_count;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
struct buffer_head *tl_bh = osb->osb_tl_bh;
|
|
struct ocfs2_dinode *di;
|
|
struct ocfs2_truncate_log *tl;
|
|
|
|
mlog_entry("start_blk = %llu, num_clusters = %u\n",
|
|
(unsigned long long)start_blk, num_clusters);
|
|
|
|
BUG_ON(mutex_trylock(&tl_inode->i_mutex));
|
|
|
|
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
|
|
|
|
di = (struct ocfs2_dinode *) tl_bh->b_data;
|
|
tl = &di->id2.i_dealloc;
|
|
if (!OCFS2_IS_VALID_DINODE(di)) {
|
|
OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
tl_count = le16_to_cpu(tl->tl_count);
|
|
mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
|
|
tl_count == 0,
|
|
"Truncate record count on #%llu invalid "
|
|
"wanted %u, actual %u\n",
|
|
(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
|
|
ocfs2_truncate_recs_per_inode(osb->sb),
|
|
le16_to_cpu(tl->tl_count));
|
|
|
|
/* Caller should have known to flush before calling us. */
|
|
index = le16_to_cpu(tl->tl_used);
|
|
if (index >= tl_count) {
|
|
status = -ENOSPC;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
status = ocfs2_journal_access(handle, tl_inode, tl_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
mlog(0, "Log truncate of %u clusters starting at cluster %u to "
|
|
"%llu (index = %d)\n", num_clusters, start_cluster,
|
|
(unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
|
|
|
|
if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
|
|
/*
|
|
* Move index back to the record we are coalescing with.
|
|
* ocfs2_truncate_log_can_coalesce() guarantees nonzero
|
|
*/
|
|
index--;
|
|
|
|
num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
|
|
mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
|
|
index, le32_to_cpu(tl->tl_recs[index].t_start),
|
|
num_clusters);
|
|
} else {
|
|
tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
|
|
tl->tl_used = cpu_to_le16(index + 1);
|
|
}
|
|
tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
|
|
|
|
status = ocfs2_journal_dirty(handle, tl_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
bail:
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
|
|
handle_t *handle,
|
|
struct inode *data_alloc_inode,
|
|
struct buffer_head *data_alloc_bh)
|
|
{
|
|
int status = 0;
|
|
int i;
|
|
unsigned int num_clusters;
|
|
u64 start_blk;
|
|
struct ocfs2_truncate_rec rec;
|
|
struct ocfs2_dinode *di;
|
|
struct ocfs2_truncate_log *tl;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
struct buffer_head *tl_bh = osb->osb_tl_bh;
|
|
|
|
mlog_entry_void();
|
|
|
|
di = (struct ocfs2_dinode *) tl_bh->b_data;
|
|
tl = &di->id2.i_dealloc;
|
|
i = le16_to_cpu(tl->tl_used) - 1;
|
|
while (i >= 0) {
|
|
/* Caller has given us at least enough credits to
|
|
* update the truncate log dinode */
|
|
status = ocfs2_journal_access(handle, tl_inode, tl_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
tl->tl_used = cpu_to_le16(i);
|
|
|
|
status = ocfs2_journal_dirty(handle, tl_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
/* TODO: Perhaps we can calculate the bulk of the
|
|
* credits up front rather than extending like
|
|
* this. */
|
|
status = ocfs2_extend_trans(handle,
|
|
OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
rec = tl->tl_recs[i];
|
|
start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
|
|
le32_to_cpu(rec.t_start));
|
|
num_clusters = le32_to_cpu(rec.t_clusters);
|
|
|
|
/* if start_blk is not set, we ignore the record as
|
|
* invalid. */
|
|
if (start_blk) {
|
|
mlog(0, "free record %d, start = %u, clusters = %u\n",
|
|
i, le32_to_cpu(rec.t_start), num_clusters);
|
|
|
|
status = ocfs2_free_clusters(handle, data_alloc_inode,
|
|
data_alloc_bh, start_blk,
|
|
num_clusters);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
i--;
|
|
}
|
|
|
|
bail:
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/* Expects you to already be holding tl_inode->i_mutex */
|
|
static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
|
|
{
|
|
int status;
|
|
unsigned int num_to_flush;
|
|
handle_t *handle;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
struct inode *data_alloc_inode = NULL;
|
|
struct buffer_head *tl_bh = osb->osb_tl_bh;
|
|
struct buffer_head *data_alloc_bh = NULL;
|
|
struct ocfs2_dinode *di;
|
|
struct ocfs2_truncate_log *tl;
|
|
|
|
mlog_entry_void();
|
|
|
|
BUG_ON(mutex_trylock(&tl_inode->i_mutex));
|
|
|
|
di = (struct ocfs2_dinode *) tl_bh->b_data;
|
|
tl = &di->id2.i_dealloc;
|
|
if (!OCFS2_IS_VALID_DINODE(di)) {
|
|
OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
|
|
status = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
num_to_flush = le16_to_cpu(tl->tl_used);
|
|
mlog(0, "Flush %u records from truncate log #%llu\n",
|
|
num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
|
|
if (!num_to_flush) {
|
|
status = 0;
|
|
goto out;
|
|
}
|
|
|
|
data_alloc_inode = ocfs2_get_system_file_inode(osb,
|
|
GLOBAL_BITMAP_SYSTEM_INODE,
|
|
OCFS2_INVALID_SLOT);
|
|
if (!data_alloc_inode) {
|
|
status = -EINVAL;
|
|
mlog(ML_ERROR, "Could not get bitmap inode!\n");
|
|
goto out;
|
|
}
|
|
|
|
mutex_lock(&data_alloc_inode->i_mutex);
|
|
|
|
status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto out_mutex;
|
|
}
|
|
|
|
handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
|
|
if (IS_ERR(handle)) {
|
|
status = PTR_ERR(handle);
|
|
mlog_errno(status);
|
|
goto out_unlock;
|
|
}
|
|
|
|
status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
|
|
data_alloc_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
|
|
ocfs2_commit_trans(osb, handle);
|
|
|
|
out_unlock:
|
|
brelse(data_alloc_bh);
|
|
ocfs2_meta_unlock(data_alloc_inode, 1);
|
|
|
|
out_mutex:
|
|
mutex_unlock(&data_alloc_inode->i_mutex);
|
|
iput(data_alloc_inode);
|
|
|
|
out:
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
|
|
{
|
|
int status;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
|
|
mutex_lock(&tl_inode->i_mutex);
|
|
status = __ocfs2_flush_truncate_log(osb);
|
|
mutex_unlock(&tl_inode->i_mutex);
|
|
|
|
return status;
|
|
}
|
|
|
|
static void ocfs2_truncate_log_worker(struct work_struct *work)
|
|
{
|
|
int status;
|
|
struct ocfs2_super *osb =
|
|
container_of(work, struct ocfs2_super,
|
|
osb_truncate_log_wq.work);
|
|
|
|
mlog_entry_void();
|
|
|
|
status = ocfs2_flush_truncate_log(osb);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
|
|
mlog_exit(status);
|
|
}
|
|
|
|
#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
|
|
void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
|
|
int cancel)
|
|
{
|
|
if (osb->osb_tl_inode) {
|
|
/* We want to push off log flushes while truncates are
|
|
* still running. */
|
|
if (cancel)
|
|
cancel_delayed_work(&osb->osb_truncate_log_wq);
|
|
|
|
queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
|
|
OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
|
|
}
|
|
}
|
|
|
|
static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
|
|
int slot_num,
|
|
struct inode **tl_inode,
|
|
struct buffer_head **tl_bh)
|
|
{
|
|
int status;
|
|
struct inode *inode = NULL;
|
|
struct buffer_head *bh = NULL;
|
|
|
|
inode = ocfs2_get_system_file_inode(osb,
|
|
TRUNCATE_LOG_SYSTEM_INODE,
|
|
slot_num);
|
|
if (!inode) {
|
|
status = -EINVAL;
|
|
mlog(ML_ERROR, "Could not get load truncate log inode!\n");
|
|
goto bail;
|
|
}
|
|
|
|
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
|
|
OCFS2_BH_CACHED, inode);
|
|
if (status < 0) {
|
|
iput(inode);
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
*tl_inode = inode;
|
|
*tl_bh = bh;
|
|
bail:
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/* called during the 1st stage of node recovery. we stamp a clean
|
|
* truncate log and pass back a copy for processing later. if the
|
|
* truncate log does not require processing, a *tl_copy is set to
|
|
* NULL. */
|
|
int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
|
|
int slot_num,
|
|
struct ocfs2_dinode **tl_copy)
|
|
{
|
|
int status;
|
|
struct inode *tl_inode = NULL;
|
|
struct buffer_head *tl_bh = NULL;
|
|
struct ocfs2_dinode *di;
|
|
struct ocfs2_truncate_log *tl;
|
|
|
|
*tl_copy = NULL;
|
|
|
|
mlog(0, "recover truncate log from slot %d\n", slot_num);
|
|
|
|
status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
di = (struct ocfs2_dinode *) tl_bh->b_data;
|
|
tl = &di->id2.i_dealloc;
|
|
if (!OCFS2_IS_VALID_DINODE(di)) {
|
|
OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
if (le16_to_cpu(tl->tl_used)) {
|
|
mlog(0, "We'll have %u logs to recover\n",
|
|
le16_to_cpu(tl->tl_used));
|
|
|
|
*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
|
|
if (!(*tl_copy)) {
|
|
status = -ENOMEM;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
/* Assuming the write-out below goes well, this copy
|
|
* will be passed back to recovery for processing. */
|
|
memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
|
|
|
|
/* All we need to do to clear the truncate log is set
|
|
* tl_used. */
|
|
tl->tl_used = 0;
|
|
|
|
status = ocfs2_write_block(osb, tl_bh, tl_inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
bail:
|
|
if (tl_inode)
|
|
iput(tl_inode);
|
|
if (tl_bh)
|
|
brelse(tl_bh);
|
|
|
|
if (status < 0 && (*tl_copy)) {
|
|
kfree(*tl_copy);
|
|
*tl_copy = NULL;
|
|
}
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
|
|
struct ocfs2_dinode *tl_copy)
|
|
{
|
|
int status = 0;
|
|
int i;
|
|
unsigned int clusters, num_recs, start_cluster;
|
|
u64 start_blk;
|
|
handle_t *handle;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
struct ocfs2_truncate_log *tl;
|
|
|
|
mlog_entry_void();
|
|
|
|
if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
|
|
mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
tl = &tl_copy->id2.i_dealloc;
|
|
num_recs = le16_to_cpu(tl->tl_used);
|
|
mlog(0, "cleanup %u records from %llu\n", num_recs,
|
|
(unsigned long long)tl_copy->i_blkno);
|
|
|
|
mutex_lock(&tl_inode->i_mutex);
|
|
for(i = 0; i < num_recs; i++) {
|
|
if (ocfs2_truncate_log_needs_flush(osb)) {
|
|
status = __ocfs2_flush_truncate_log(osb);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail_up;
|
|
}
|
|
}
|
|
|
|
handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
|
|
if (IS_ERR(handle)) {
|
|
status = PTR_ERR(handle);
|
|
mlog_errno(status);
|
|
goto bail_up;
|
|
}
|
|
|
|
clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
|
|
start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
|
|
start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
|
|
|
|
status = ocfs2_truncate_log_append(osb, handle,
|
|
start_blk, clusters);
|
|
ocfs2_commit_trans(osb, handle);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail_up;
|
|
}
|
|
}
|
|
|
|
bail_up:
|
|
mutex_unlock(&tl_inode->i_mutex);
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
|
|
{
|
|
int status;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
|
|
mlog_entry_void();
|
|
|
|
if (tl_inode) {
|
|
cancel_delayed_work(&osb->osb_truncate_log_wq);
|
|
flush_workqueue(ocfs2_wq);
|
|
|
|
status = ocfs2_flush_truncate_log(osb);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
|
|
brelse(osb->osb_tl_bh);
|
|
iput(osb->osb_tl_inode);
|
|
}
|
|
|
|
mlog_exit_void();
|
|
}
|
|
|
|
int ocfs2_truncate_log_init(struct ocfs2_super *osb)
|
|
{
|
|
int status;
|
|
struct inode *tl_inode = NULL;
|
|
struct buffer_head *tl_bh = NULL;
|
|
|
|
mlog_entry_void();
|
|
|
|
status = ocfs2_get_truncate_log_info(osb,
|
|
osb->slot_num,
|
|
&tl_inode,
|
|
&tl_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
|
|
/* ocfs2_truncate_log_shutdown keys on the existence of
|
|
* osb->osb_tl_inode so we don't set any of the osb variables
|
|
* until we're sure all is well. */
|
|
INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
|
|
ocfs2_truncate_log_worker);
|
|
osb->osb_tl_bh = tl_bh;
|
|
osb->osb_tl_inode = tl_inode;
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/* This function will figure out whether the currently last extent
|
|
* block will be deleted, and if it will, what the new last extent
|
|
* block will be so we can update his h_next_leaf_blk field, as well
|
|
* as the dinodes i_last_eb_blk */
|
|
static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct ocfs2_dinode *fe,
|
|
u32 new_i_clusters,
|
|
struct buffer_head *old_last_eb,
|
|
struct buffer_head **new_last_eb)
|
|
{
|
|
int i, status = 0;
|
|
u64 block = 0;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *el;
|
|
struct buffer_head *bh = NULL;
|
|
|
|
*new_last_eb = NULL;
|
|
|
|
if (!OCFS2_IS_VALID_DINODE(fe)) {
|
|
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
/* we have no tree, so of course, no last_eb. */
|
|
if (!fe->id2.i_list.l_tree_depth)
|
|
goto bail;
|
|
|
|
/* trunc to zero special case - this makes tree_depth = 0
|
|
* regardless of what it is. */
|
|
if (!new_i_clusters)
|
|
goto bail;
|
|
|
|
eb = (struct ocfs2_extent_block *) old_last_eb->b_data;
|
|
el = &(eb->h_list);
|
|
BUG_ON(!el->l_next_free_rec);
|
|
|
|
/* Make sure that this guy will actually be empty after we
|
|
* clear away the data. */
|
|
if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters)
|
|
goto bail;
|
|
|
|
/* Ok, at this point, we know that last_eb will definitely
|
|
* change, so lets traverse the tree and find the second to
|
|
* last extent block. */
|
|
el = &(fe->id2.i_list);
|
|
/* go down the tree, */
|
|
do {
|
|
for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) {
|
|
if (le32_to_cpu(el->l_recs[i].e_cpos) <
|
|
new_i_clusters) {
|
|
block = le64_to_cpu(el->l_recs[i].e_blkno);
|
|
break;
|
|
}
|
|
}
|
|
BUG_ON(i < 0);
|
|
|
|
if (bh) {
|
|
brelse(bh);
|
|
bh = NULL;
|
|
}
|
|
|
|
status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED,
|
|
inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *) bh->b_data;
|
|
el = &eb->h_list;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
} while (el->l_tree_depth);
|
|
|
|
*new_last_eb = bh;
|
|
get_bh(*new_last_eb);
|
|
mlog(0, "returning block %llu\n",
|
|
(unsigned long long)le64_to_cpu(eb->h_blkno));
|
|
bail:
|
|
if (bh)
|
|
brelse(bh);
|
|
|
|
return status;
|
|
}
|
|
|
|
static int ocfs2_do_truncate(struct ocfs2_super *osb,
|
|
unsigned int clusters_to_del,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct buffer_head *old_last_eb_bh,
|
|
handle_t *handle,
|
|
struct ocfs2_truncate_context *tc)
|
|
{
|
|
int status, i, depth;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_block *last_eb = NULL;
|
|
struct ocfs2_extent_list *el;
|
|
struct buffer_head *eb_bh = NULL;
|
|
struct buffer_head *last_eb_bh = NULL;
|
|
u64 next_eb = 0;
|
|
u64 delete_blk = 0;
|
|
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
|
|
status = ocfs2_find_new_last_ext_blk(osb,
|
|
inode,
|
|
fe,
|
|
le32_to_cpu(fe->i_clusters) -
|
|
clusters_to_del,
|
|
old_last_eb_bh,
|
|
&last_eb_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
if (last_eb_bh)
|
|
last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
|
|
|
|
status = ocfs2_journal_access(handle, inode, fe_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
el = &(fe->id2.i_list);
|
|
|
|
spin_lock(&OCFS2_I(inode)->ip_lock);
|
|
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
|
|
clusters_to_del;
|
|
spin_unlock(&OCFS2_I(inode)->ip_lock);
|
|
le32_add_cpu(&fe->i_clusters, -clusters_to_del);
|
|
fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec);
|
|
fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
|
|
|
|
i = le16_to_cpu(el->l_next_free_rec) - 1;
|
|
|
|
BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
|
|
le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
|
|
/* tree depth zero, we can just delete the clusters, otherwise
|
|
* we need to record the offset of the next level extent block
|
|
* as we may overwrite it. */
|
|
if (!el->l_tree_depth)
|
|
delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
|
|
+ ocfs2_clusters_to_blocks(osb->sb,
|
|
le32_to_cpu(el->l_recs[i].e_clusters));
|
|
else
|
|
next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
|
|
|
|
if (!el->l_recs[i].e_clusters) {
|
|
/* if we deleted the whole extent record, then clear
|
|
* out the other fields and update the extent
|
|
* list. For depth > 0 trees, we've already recorded
|
|
* the extent block in 'next_eb' */
|
|
el->l_recs[i].e_cpos = 0;
|
|
el->l_recs[i].e_blkno = 0;
|
|
BUG_ON(!el->l_next_free_rec);
|
|
le16_add_cpu(&el->l_next_free_rec, -1);
|
|
}
|
|
|
|
depth = le16_to_cpu(el->l_tree_depth);
|
|
if (!fe->i_clusters) {
|
|
/* trunc to zero is a special case. */
|
|
el->l_tree_depth = 0;
|
|
fe->i_last_eb_blk = 0;
|
|
} else if (last_eb)
|
|
fe->i_last_eb_blk = last_eb->h_blkno;
|
|
|
|
status = ocfs2_journal_dirty(handle, fe_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
if (last_eb) {
|
|
/* If there will be a new last extent block, then by
|
|
* definition, there cannot be any leaves to the right of
|
|
* him. */
|
|
status = ocfs2_journal_access(handle, inode, last_eb_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
last_eb->h_next_leaf_blk = 0;
|
|
status = ocfs2_journal_dirty(handle, last_eb_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
/* if our tree depth > 0, update all the tree blocks below us. */
|
|
while (depth) {
|
|
mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n",
|
|
depth, (unsigned long long)next_eb);
|
|
status = ocfs2_read_block(osb, next_eb, &eb_bh,
|
|
OCFS2_BH_CACHED, inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *)eb_bh->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
el = &(eb->h_list);
|
|
|
|
status = ocfs2_journal_access(handle, inode, eb_bh,
|
|
OCFS2_JOURNAL_ACCESS_WRITE);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
|
|
BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1));
|
|
|
|
i = le16_to_cpu(el->l_next_free_rec) - 1;
|
|
|
|
mlog(0, "extent block %llu, before: record %d: "
|
|
"(%u, %u, %llu), next = %u\n",
|
|
(unsigned long long)le64_to_cpu(eb->h_blkno), i,
|
|
le32_to_cpu(el->l_recs[i].e_cpos),
|
|
le32_to_cpu(el->l_recs[i].e_clusters),
|
|
(unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
|
|
le16_to_cpu(el->l_next_free_rec));
|
|
|
|
BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
|
|
le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
|
|
|
|
next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
|
|
/* bottom-most block requires us to delete data.*/
|
|
if (!el->l_tree_depth)
|
|
delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
|
|
+ ocfs2_clusters_to_blocks(osb->sb,
|
|
le32_to_cpu(el->l_recs[i].e_clusters));
|
|
if (!el->l_recs[i].e_clusters) {
|
|
el->l_recs[i].e_cpos = 0;
|
|
el->l_recs[i].e_blkno = 0;
|
|
BUG_ON(!el->l_next_free_rec);
|
|
le16_add_cpu(&el->l_next_free_rec, -1);
|
|
}
|
|
mlog(0, "extent block %llu, after: record %d: "
|
|
"(%u, %u, %llu), next = %u\n",
|
|
(unsigned long long)le64_to_cpu(eb->h_blkno), i,
|
|
le32_to_cpu(el->l_recs[i].e_cpos),
|
|
le32_to_cpu(el->l_recs[i].e_clusters),
|
|
(unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
|
|
le16_to_cpu(el->l_next_free_rec));
|
|
|
|
status = ocfs2_journal_dirty(handle, eb_bh);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
if (!el->l_next_free_rec) {
|
|
mlog(0, "deleting this extent block.\n");
|
|
|
|
ocfs2_remove_from_cache(inode, eb_bh);
|
|
|
|
BUG_ON(el->l_recs[0].e_clusters);
|
|
BUG_ON(el->l_recs[0].e_cpos);
|
|
BUG_ON(el->l_recs[0].e_blkno);
|
|
if (eb->h_suballoc_slot == 0) {
|
|
/*
|
|
* This code only understands how to
|
|
* lock the suballocator in slot 0,
|
|
* which is fine because allocation is
|
|
* only ever done out of that
|
|
* suballocator too. A future version
|
|
* might change that however, so avoid
|
|
* a free if we don't know how to
|
|
* handle it. This way an fs incompat
|
|
* bit will not be necessary.
|
|
*/
|
|
status = ocfs2_free_extent_block(handle,
|
|
tc->tc_ext_alloc_inode,
|
|
tc->tc_ext_alloc_bh,
|
|
eb);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
}
|
|
brelse(eb_bh);
|
|
eb_bh = NULL;
|
|
depth--;
|
|
}
|
|
|
|
BUG_ON(!delete_blk);
|
|
status = ocfs2_truncate_log_append(osb, handle, delete_blk,
|
|
clusters_to_del);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
status = 0;
|
|
bail:
|
|
if (!status)
|
|
ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters));
|
|
else
|
|
ocfs2_extent_map_drop(inode, 0);
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* It is expected, that by the time you call this function,
|
|
* inode->i_size and fe->i_size have been adjusted.
|
|
*
|
|
* WARNING: This will kfree the truncate context
|
|
*/
|
|
int ocfs2_commit_truncate(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct ocfs2_truncate_context *tc)
|
|
{
|
|
int status, i, credits, tl_sem = 0;
|
|
u32 clusters_to_del, target_i_clusters;
|
|
u64 last_eb = 0;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *el;
|
|
struct buffer_head *last_eb_bh;
|
|
handle_t *handle = NULL;
|
|
struct inode *tl_inode = osb->osb_tl_inode;
|
|
|
|
mlog_entry_void();
|
|
|
|
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
|
|
|
target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
|
|
i_size_read(inode));
|
|
|
|
last_eb_bh = tc->tc_last_eb_bh;
|
|
tc->tc_last_eb_bh = NULL;
|
|
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
|
|
if (fe->id2.i_list.l_tree_depth) {
|
|
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
|
|
el = &eb->h_list;
|
|
} else
|
|
el = &fe->id2.i_list;
|
|
last_eb = le64_to_cpu(fe->i_last_eb_blk);
|
|
start:
|
|
mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
|
|
"last_eb = %llu, fe->i_last_eb_blk = %llu, "
|
|
"fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
|
|
le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb,
|
|
(unsigned long long)le64_to_cpu(fe->i_last_eb_blk),
|
|
le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
|
|
|
|
if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
|
|
mlog(0, "last_eb changed!\n");
|
|
BUG_ON(!fe->id2.i_list.l_tree_depth);
|
|
last_eb = le64_to_cpu(fe->i_last_eb_blk);
|
|
/* i_last_eb_blk may have changed, read it if
|
|
* necessary. We don't have to worry about the
|
|
* truncate to zero case here (where there becomes no
|
|
* last_eb) because we never loop back after our work
|
|
* is done. */
|
|
if (last_eb_bh) {
|
|
brelse(last_eb_bh);
|
|
last_eb_bh = NULL;
|
|
}
|
|
|
|
status = ocfs2_read_block(osb, last_eb,
|
|
&last_eb_bh, OCFS2_BH_CACHED,
|
|
inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
el = &(eb->h_list);
|
|
}
|
|
|
|
/* by now, el will point to the extent list on the bottom most
|
|
* portion of this tree. */
|
|
i = le16_to_cpu(el->l_next_free_rec) - 1;
|
|
if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters)
|
|
clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters);
|
|
else
|
|
clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) +
|
|
le32_to_cpu(el->l_recs[i].e_cpos)) -
|
|
target_i_clusters;
|
|
|
|
mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
|
|
|
|
mutex_lock(&tl_inode->i_mutex);
|
|
tl_sem = 1;
|
|
/* ocfs2_truncate_log_needs_flush guarantees us at least one
|
|
* record is free for use. If there isn't any, we flush to get
|
|
* an empty truncate log. */
|
|
if (ocfs2_truncate_log_needs_flush(osb)) {
|
|
status = __ocfs2_flush_truncate_log(osb);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
|
|
fe, el);
|
|
handle = ocfs2_start_trans(osb, credits);
|
|
if (IS_ERR(handle)) {
|
|
status = PTR_ERR(handle);
|
|
handle = NULL;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
|
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
|
|
if (status < 0)
|
|
mlog_errno(status);
|
|
|
|
status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh,
|
|
last_eb_bh, handle, tc);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
mutex_unlock(&tl_inode->i_mutex);
|
|
tl_sem = 0;
|
|
|
|
ocfs2_commit_trans(osb, handle);
|
|
handle = NULL;
|
|
|
|
BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters);
|
|
if (le32_to_cpu(fe->i_clusters) > target_i_clusters)
|
|
goto start;
|
|
bail:
|
|
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
|
|
|
ocfs2_schedule_truncate_log_flush(osb, 1);
|
|
|
|
if (tl_sem)
|
|
mutex_unlock(&tl_inode->i_mutex);
|
|
|
|
if (handle)
|
|
ocfs2_commit_trans(osb, handle);
|
|
|
|
if (last_eb_bh)
|
|
brelse(last_eb_bh);
|
|
|
|
/* This will drop the ext_alloc cluster lock for us */
|
|
ocfs2_free_truncate_context(tc);
|
|
|
|
mlog_exit(status);
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
* Expects the inode to already be locked. This will figure out which
|
|
* inodes need to be locked and will put them on the returned truncate
|
|
* context.
|
|
*/
|
|
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
|
|
struct inode *inode,
|
|
struct buffer_head *fe_bh,
|
|
struct ocfs2_truncate_context **tc)
|
|
{
|
|
int status, metadata_delete;
|
|
unsigned int new_i_clusters;
|
|
struct ocfs2_dinode *fe;
|
|
struct ocfs2_extent_block *eb;
|
|
struct ocfs2_extent_list *el;
|
|
struct buffer_head *last_eb_bh = NULL;
|
|
struct inode *ext_alloc_inode = NULL;
|
|
struct buffer_head *ext_alloc_bh = NULL;
|
|
|
|
mlog_entry_void();
|
|
|
|
*tc = NULL;
|
|
|
|
new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
|
|
i_size_read(inode));
|
|
fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
|
|
|
mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
|
|
"%llu\n", fe->i_clusters, new_i_clusters,
|
|
(unsigned long long)fe->i_size);
|
|
|
|
if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
|
|
ocfs2_error(inode->i_sb, "Dinode %llu has cluster count "
|
|
"%u and size %llu whereas struct inode has "
|
|
"cluster count %u and size %llu which caused an "
|
|
"invalid truncate to %u clusters.",
|
|
(unsigned long long)le64_to_cpu(fe->i_blkno),
|
|
le32_to_cpu(fe->i_clusters),
|
|
(unsigned long long)le64_to_cpu(fe->i_size),
|
|
OCFS2_I(inode)->ip_clusters, i_size_read(inode),
|
|
new_i_clusters);
|
|
mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
|
|
*tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
|
|
if (!(*tc)) {
|
|
status = -ENOMEM;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
metadata_delete = 0;
|
|
if (fe->id2.i_list.l_tree_depth) {
|
|
/* If we have a tree, then the truncate may result in
|
|
* metadata deletes. Figure this out from the
|
|
* rightmost leaf block.*/
|
|
status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
|
|
&last_eb_bh, OCFS2_BH_CACHED, inode);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
|
|
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
|
|
OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
|
|
|
|
brelse(last_eb_bh);
|
|
status = -EIO;
|
|
goto bail;
|
|
}
|
|
el = &(eb->h_list);
|
|
if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters)
|
|
metadata_delete = 1;
|
|
}
|
|
|
|
(*tc)->tc_last_eb_bh = last_eb_bh;
|
|
|
|
if (metadata_delete) {
|
|
mlog(0, "Will have to delete metadata for this trunc. "
|
|
"locking allocator.\n");
|
|
ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
|
|
if (!ext_alloc_inode) {
|
|
status = -ENOMEM;
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
|
|
mutex_lock(&ext_alloc_inode->i_mutex);
|
|
(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
|
|
|
|
status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1);
|
|
if (status < 0) {
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
(*tc)->tc_ext_alloc_bh = ext_alloc_bh;
|
|
(*tc)->tc_ext_alloc_locked = 1;
|
|
}
|
|
|
|
status = 0;
|
|
bail:
|
|
if (status < 0) {
|
|
if (*tc)
|
|
ocfs2_free_truncate_context(*tc);
|
|
*tc = NULL;
|
|
}
|
|
mlog_exit_void();
|
|
return status;
|
|
}
|
|
|
|
static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
|
|
{
|
|
if (tc->tc_ext_alloc_inode) {
|
|
if (tc->tc_ext_alloc_locked)
|
|
ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
|
|
|
|
mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
|
|
iput(tc->tc_ext_alloc_inode);
|
|
}
|
|
|
|
if (tc->tc_ext_alloc_bh)
|
|
brelse(tc->tc_ext_alloc_bh);
|
|
|
|
if (tc->tc_last_eb_bh)
|
|
brelse(tc->tc_last_eb_bh);
|
|
|
|
kfree(tc);
|
|
}
|