Drop the end-of-chunk dummy entries from string pool

- As pointed out by Michael Schroeder in
  http://lists.rpm.org/pipermail/rpm-maint/2013-September/003605.html,
  the dummy entries used for optimizing rpmstrPoolStrlen() are
  problematic in number of ways:
  - Walking the id's in a pool is unreliable, and rehashing can cause
    bogus empty strings to be added to a pool where they otherwise
    do not exist
  - rpmstrPoolNumStr() is not accurate when more than one chunk is in use
- Unfortunately this means giving up the rpmstrPoolStrlen() optimization,
  for now at least.
This commit is contained in:
Panu Matilainen 2013-12-02 10:45:33 +02:00
parent 43745e88e5
commit cfe99e08ad
2 changed files with 7 additions and 11 deletions

View File

@ -299,9 +299,8 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne
size_t chunk_used;
pool->offs_size += 1;
/* need one extra for end of string */
/* and one extra to mark the end of the chunk */
if (pool->offs_alloced <= pool->offs_size + 2) {
/* Need one extra for storing the starting point of next string */
if (pool->offs_alloced <= pool->offs_size + 1) {
pool->offs_alloced += STROFFS_CHUNK;
pool->offs = xrealloc(pool->offs,
pool->offs_alloced * sizeof(*pool->offs));
@ -322,9 +321,6 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne
pool->chunk_allocated = 2 * ssize;
}
/* Dummy entry for end of last string*/
pool->offs_size += 1;
pool->offs[pool->offs_size] = xcalloc(1, pool->chunk_allocated);
pool->chunks[pool->chunks_size] = pool->offs[pool->offs_size];
}
@ -406,8 +402,8 @@ const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid)
size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid)
{
size_t slen = 0;
if (pool && sid <= pool->offs_size) {
slen = pool->offs[sid+1] - pool->offs[sid] - 1;
if (pool && sid > 0 && sid <= pool->offs_size) {
slen = strlen(pool->offs[sid]);
}
return slen;
}

View File

@ -80,11 +80,11 @@ const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid);
/** \ingroup rpmstrpool
* Return length of a string by its pool id. The result is equal to
* calling strlen() on a string retrieved through rpmstrPoolStr() but
* runs in constant time regardless of the length of the string.
* calling strlen() on a string retrieved through rpmstrPoolStr(), but
* the pool might be able to optimize the calculation.
* @param pool string pool
* @param sid pool id of a string
* @return length of the string
* @return length of the string, 0 for invalid pool or id
*/
size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid);