272 lines
9.0 KiB
C
272 lines
9.0 KiB
C
/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
|
|
*
|
|
* librsync -- the library for network deltas
|
|
* Id: scoop.c,v 1.24 2001/03/18 10:51:55 mbp Exp
|
|
*
|
|
* Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation; either version 2.1 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
/*
|
|
* scoop.c -- This file deals with readahead from caller-supplied
|
|
* buffers.
|
|
*
|
|
* Many functions require a certain minimum amount of input to do their
|
|
* processing. For example, to calculate a strong checksum of a block
|
|
* we need at least a block of input.
|
|
*
|
|
* Since we put the buffers completely under the control of the caller,
|
|
* we can't count on ever getting this much data all in one go. We
|
|
* can't simply wait, because the caller might have a smaller buffer
|
|
* than we require and so we'll never get it. For the same reason we
|
|
* must always accept all the data we're given.
|
|
*
|
|
* So, stream input data that's required for readahead is put into a
|
|
* special buffer, from which the caller can then read. It's
|
|
* essentially like an internal pipe, which on any given read request
|
|
* may or may not be able to actually supply the data.
|
|
*
|
|
* As a future optimization, we might try to take data directly from the
|
|
* input buffer if there's already enough there.
|
|
*/
|
|
|
|
/*
|
|
* TODO: We probably know a maximum amount of data that can be scooped
|
|
* up, so we could just avoid dynamic allocation. However that can't
|
|
* be fixed at compile time, because when generating a delta it needs
|
|
* to be large enough to hold one full block. Perhaps we can set it
|
|
* up when the job is allocated? It would be kind of nice to not do
|
|
* any memory allocation after startup, as bzlib does this.
|
|
*/
|
|
|
|
|
|
/*
|
|
| To walk on water you've gotta sink
|
|
| in the ice.
|
|
| -- Shihad, `The General Electric'.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "rsync.h"
|
|
#include "job.h"
|
|
#include "stream.h"
|
|
#include "trace.h"
|
|
#include "util.h"
|
|
|
|
|
|
#if 0
|
|
# undef rs_trace
|
|
# define rs_trace(s...)
|
|
#endif
|
|
|
|
|
|
/**
|
|
* Try to accept a from the input buffer to get LEN bytes in the scoop.
|
|
*/
|
|
void rs_scoop_input(rs_job_t *job, size_t len)
|
|
{
|
|
rs_buffers_t *stream = job->stream;
|
|
size_t tocopy;
|
|
|
|
assert(len > job->scoop_avail);
|
|
|
|
if (job->scoop_alloc < len) {
|
|
/* need to allocate a new buffer, too */
|
|
char *newbuf;
|
|
int newsize = 2 * len;
|
|
newbuf = rs_alloc(newsize, "scoop buffer");
|
|
if (job->scoop_avail)
|
|
memcpy(newbuf, job->scoop_next, job->scoop_avail);
|
|
if (job->scoop_buf)
|
|
free(job->scoop_buf);
|
|
job->scoop_buf = job->scoop_next = newbuf;
|
|
rs_trace("resized scoop buffer to %.0f bytes from %.0f",
|
|
(double) newsize, (double) job->scoop_alloc);
|
|
job->scoop_alloc = newsize;
|
|
} else {
|
|
/* this buffer size is fine, but move the existing
|
|
* data down to the front. */
|
|
memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
|
|
job->scoop_next = job->scoop_buf;
|
|
}
|
|
|
|
/* take as much input as is available, to give up to LEN bytes
|
|
* in the scoop. */
|
|
tocopy = len - job->scoop_avail;
|
|
if (tocopy > stream->avail_in)
|
|
tocopy = stream->avail_in;
|
|
assert(tocopy + job->scoop_avail <= job->scoop_alloc);
|
|
|
|
memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
|
|
rs_trace("accepted %.0f bytes from input to scoop", (double) tocopy);
|
|
job->scoop_avail += tocopy;
|
|
stream->next_in += tocopy;
|
|
stream->avail_in -= tocopy;
|
|
}
|
|
|
|
|
|
/**
|
|
* Advance the input cursor forward \p len bytes. This is used after
|
|
* doing readahead, when you decide you want to keep it. \p len must
|
|
* be no more than the amount of available data, so you can't cheat.
|
|
*
|
|
* So when creating a delta, we require one block of readahead. But
|
|
* after examining that block, we might decide to advance over all of
|
|
* it (if there is a match), or just one byte (if not).
|
|
*/
|
|
void rs_scoop_advance(rs_job_t *job, size_t len)
|
|
{
|
|
rs_buffers_t *stream = job->stream;
|
|
|
|
/* It never makes sense to advance over a mixture of bytes from
|
|
* the scoop and input, because you couldn't possibly have looked
|
|
* at them all at the same time. */
|
|
if (job->scoop_avail) {
|
|
/* reading from the scoop buffer */
|
|
rs_trace("advance over %d bytes from scoop", len);
|
|
assert(len <= job->scoop_avail);
|
|
job->scoop_avail -= len;
|
|
job->scoop_next += len;
|
|
} else {
|
|
rs_trace("advance over %d bytes from input buffer", len);
|
|
assert(len <= stream->avail_in);
|
|
stream->avail_in -= len;
|
|
stream->next_in += len;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* \brief Read from scoop without advancing.
|
|
*
|
|
* Ask for LEN bytes of input from the stream. If that much data is
|
|
* available, then return a pointer to it in PTR, advance the stream
|
|
* input pointer over the data, and return RS_DONE. If there's not
|
|
* enough data, then accept whatever is there into a buffer, advance
|
|
* over it, and return RS_BLOCKED.
|
|
*
|
|
* The data is not actually removed from the input, so this function
|
|
* lets you do readahead. If you want to keep any of the data, you
|
|
* should also call rs_scoop_advance() to skip over it.
|
|
*/
|
|
rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
|
|
{
|
|
rs_buffers_t *stream = job->stream;
|
|
rs_job_check(job);
|
|
|
|
if (job->scoop_avail >= len) {
|
|
/* We have enough data queued to satisfy the request,
|
|
* so go straight from the scoop buffer. */
|
|
rs_trace("got %.0f bytes direct from scoop", (double) len);
|
|
*ptr = job->scoop_next;
|
|
return RS_DONE;
|
|
} else if (job->scoop_avail) {
|
|
/* We have some data in the scoop, but not enough to
|
|
* satisfy the request. */
|
|
rs_trace("data is present in the scoop and must be used");
|
|
rs_scoop_input(job, len);
|
|
|
|
if (job->scoop_avail < len) {
|
|
rs_trace("still have only %.0f bytes in scoop",
|
|
(double) job->scoop_avail);
|
|
return RS_BLOCKED;
|
|
} else {
|
|
rs_trace("scoop now has %.0f bytes, this is enough",
|
|
(double) job->scoop_avail);
|
|
*ptr = job->scoop_next;
|
|
return RS_DONE;
|
|
}
|
|
} else if (stream->avail_in >= len) {
|
|
/* There's enough data in the stream's input */
|
|
*ptr = stream->next_in;
|
|
rs_trace("got %.0f bytes from input buffer", (double) len);
|
|
return RS_DONE;
|
|
} else if (stream->avail_in > 0) {
|
|
/* Nothing was queued before, but we don't have enough
|
|
* data to satisfy the request. So queue what little
|
|
* we have, and try again next time. */
|
|
rs_trace("couldn't satisfy request for %.0f, scooping %.0f bytes",
|
|
(double) len, (double) job->scoop_avail);
|
|
rs_scoop_input(job, len);
|
|
return RS_BLOCKED;
|
|
} else if (stream->eof_in) {
|
|
/* Nothing is queued before, and nothing is in the input
|
|
* buffer at the moment. */
|
|
rs_trace("reached end of input stream");
|
|
return RS_INPUT_ENDED;
|
|
} else {
|
|
/* Nothing queued at the moment. */
|
|
rs_trace("blocked with no data in scoop or input buffer");
|
|
return RS_BLOCKED;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Read LEN bytes if possible, and remove them from the input scoop.
|
|
* If there's not enough data yet, return RS_BLOCKED.
|
|
*
|
|
* \param ptr will be updated to point to a read-only buffer holding
|
|
* the data, if enough is available.
|
|
*
|
|
* \return RS_DONE if all the data was available, RS_BLOCKED if it's
|
|
* not there.
|
|
*/
|
|
rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
|
|
{
|
|
rs_result result;
|
|
|
|
result = rs_scoop_readahead(job, len, ptr);
|
|
if (result == RS_DONE)
|
|
rs_scoop_advance(job, len);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* Read whatever remains in the input stream, assuming that it runs up
|
|
* to the end of the file. Set LEN appropriately.
|
|
*/
|
|
rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
|
|
{
|
|
rs_buffers_t *stream = job->stream;
|
|
|
|
*len = job->scoop_avail + stream->avail_in;
|
|
|
|
return rs_scoop_read(job, *len, ptr);
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Return the total number of bytes available including the scoop and input
|
|
* buffer.
|
|
*/
|
|
size_t rs_scoop_total_avail(rs_job_t *job)
|
|
{
|
|
return job->scoop_avail + job->stream->avail_in;
|
|
}
|