First cut of a libsolv-style string <-> id pool API

- The pool stores "arbitrary" number of strings in a space-efficient
  manner, with near constant (hashed) string -> id lookup/store and
  constant time id -> string and id -> string length lookups.
- Credits for the idea go to the Suse developers working on libsolv,
  the basic concept is directly lifted from there but details
  differ due to using rpm's own hash table implementation etc.
  Another minor difference is using size_t for offsets to permit over
  4GB total data size on 64bit systems, the total number of id's in
  the pool is limited to uint32 max however (like in libsolv).
- Any (re)implementation bugs by yours truly, this is almost certainly
  going to need further tuning and tweaking, API and otherwise.
This commit is contained in:
Panu Matilainen 2012-09-07 10:33:22 +03:00
parent a466227a2f
commit 9e47043b2d
5 changed files with 203 additions and 1 deletions

View File

@ -58,6 +58,7 @@ pkginclude_HEADERS += rpmio/rpmlog.h
pkginclude_HEADERS += rpmio/rpmpgp.h
pkginclude_HEADERS += rpmio/rpmsq.h
pkginclude_HEADERS += rpmio/rpmstring.h
pkginclude_HEADERS += rpmio/rpmstrpool.h
pkginclude_HEADERS += rpmio/rpmsw.h
pkginclude_HEADERS += rpmio/rpmfileutil.h
pkginclude_HEADERS += rpmio/rpmutil.h

View File

@ -30,6 +30,10 @@ include/rpm/rpmstring.h: rpmio/rpmstring.h include/rpm/$(dirstamp)
$(INSTALL_DATA) $(top_srcdir)/rpmio/rpmstring.h include/rpm/rpmstring.h
BUILT_SOURCES += include/rpm/rpmstring.h
CLEANFILES += include/rpm/rpmstring.h
include/rpm/rpmstrpool.h: rpmio/rpmstrpool.h include/rpm/$(dirstamp)
$(INSTALL_DATA) $(top_srcdir)/rpmio/rpmstrpool.h include/rpm/rpmstrpool.h
BUILT_SOURCES += include/rpm/rpmstrpool.h
CLEANFILES += include/rpm/rpmstrpool.h
include/rpm/rpmsw.h: rpmio/rpmsw.h include/rpm/$(dirstamp)
$(INSTALL_DATA) $(top_srcdir)/rpmio/rpmsw.h include/rpm/rpmsw.h
BUILT_SOURCES += include/rpm/rpmsw.h

View File

@ -15,7 +15,7 @@ librpmio_la_SOURCES = \
rpmpgp.c rpmsq.c rpmsw.c url.c \
rpmio_internal.h rpmhook.h \
rpmstring.c rpmfileutil.c rpmglob.c \
rpmkeyring.c
rpmkeyring.c rpmstrpool.c
librpmio_la_SOURCES += digest_nss.c

153
rpmio/rpmstrpool.c Normal file
View File

@ -0,0 +1,153 @@
#include "system.h"
#include <rpm/rpmstring.h>
#include <rpm/rpmstrpool.h>
#include "debug.h"
#define HASHTYPE strHash
#define HTKEYTYPE const char *
#define HTDATATYPE rpmsid
#include "lib/rpmhash.H"
#include "lib/rpmhash.C"
#undef HASHTYPE
#undef HTKEYTYPE
#undef HTDATATYPE
#define STRDATA_CHUNK 65536
#define STROFFS_CHUNK 2048
struct rpmstrPool_s {
size_t * offs; /* offsets into data area */
rpmsid offs_size; /* largest offset index */;
rpmsid offs_alloced; /* offsets allocation size */
char * data; /* string data area */
size_t data_size; /* string data area size */
size_t data_alloced; /* string data area allocation size */
strHash hash; /* string -> sid hash table */
int nrefs; /* refcount */
};
rpmstrPool rpmstrPoolCreate(void)
{
rpmstrPool pool = xcalloc(1, sizeof(*pool));
pool->hash = strHashCreate(5, rstrhash, strcmp, NULL, NULL);
pool->nrefs = 1;
return pool;
}
rpmstrPool rpmstrPoolFree(rpmstrPool pool)
{
if (pool) {
if (pool->nrefs > 1) {
pool->nrefs--;
} else {
strHashFree(pool->hash);
free(pool->offs);
free(pool->data);
free(pool);
}
}
return NULL;
}
rpmstrPool rpmstrPoolLink(rpmstrPool pool)
{
if (pool)
pool->nrefs++;
return pool;
}
void rpmstrPoolFreeze(rpmstrPool pool)
{
if (pool) {
pool->hash = strHashFree(pool->hash);
pool->data_alloced = pool->data_size;
pool->data = xrealloc(pool->data, pool->data_alloced);
pool->offs_alloced = pool->offs_size + 1;
pool->offs = xrealloc(pool->offs,
pool->offs_alloced * sizeof(*pool->offs));
}
}
void rpmstrPoolUnfreeze(rpmstrPool pool)
{
if (pool) {
pool->hash = strHashCreate((pool->offs_size / 2) - 1,
rstrhash, strcmp, NULL, NULL);
for (int i = 1; i < pool->offs_size; i++) {
strHashAddEntry(pool->hash, rpmstrPoolStr(pool, i), i);
}
}
}
static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash)
{
const char *t = NULL;
size_t ssize = slen + 1;
if (ssize > pool->data_alloced - pool->data_size) {
size_t need = pool->data_size + ssize;
size_t alloced = pool->data_alloced;
while (alloced < need)
alloced += STRDATA_CHUNK;
pool->data = xrealloc(pool->data, alloced);
pool->data_alloced = alloced;
}
pool->offs_size += 1;
if (pool->offs_alloced < pool->offs_size) {
pool->offs_alloced += STROFFS_CHUNK;
pool->offs = xrealloc(pool->offs,
pool->offs_alloced * sizeof(*pool->offs));
}
t = memcpy(pool->data + pool->data_size, s, ssize);
pool->offs[pool->offs_size] = pool->data_size;
pool->data_size += ssize;
strHashAddHEntry(pool->hash, t, hash, pool->offs_size);
return pool->offs_size;
}
rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create)
{
rpmsid sid = 0;
if (pool && s) {
unsigned int hash = strHashKeyHash(pool->hash, s);
rpmsid *sids;
if (strHashGetHEntry(pool->hash, s, hash, &sids, NULL, NULL)) {
sid = sids[0];
} else if (create && pool->hash) {
sid = rpmstrPoolPut(pool, s, slen, hash);
}
}
return sid;
}
rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create)
{
return rpmstrPoolIdn(pool, s, strlen(s), create);
}
const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid)
{
const char *s = NULL;
if (pool && sid <= pool->offs_size)
s = pool->data + pool->offs[sid];
return s;
}
size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid)
{
size_t slen = 0;
if (pool && sid <= pool->offs_size) {
size_t end = (sid < pool->offs_size) ? pool->offs[sid + 1] :
pool->offs_size;
slen = end - pool->offs[sid] - 1;
}
return slen;
}

44
rpmio/rpmstrpool.h Normal file
View File

@ -0,0 +1,44 @@
#ifndef _RPMSTRPOOL_H
#define _RPMSTRPOOL_H
typedef uint32_t rpmsid;
typedef struct rpmstrPool_s * rpmstrPool;
#ifdef __cplusplus
extern "C" {
#endif
/* XXX TODO: properly document... */
/* create a new string pool */
rpmstrPool rpmstrPoolCreate(void);
/* destroy a string pool (refcounted) */
rpmstrPool rpmstrPoolFree(rpmstrPool sidpool);
/* reference a string pool */
rpmstrPool rpmstrPoolLink(rpmstrPool sidpool);
/* freeze pool to free memory */
void rpmstrPoolFreeze(rpmstrPool sidpool);
/* unfreeze pool (ie recreate hash table) */
void rpmstrPoolUnfreeze(rpmstrPool sidpool);
/* get the id of a string, optionally storing if not already present */
rpmsid rpmstrPoolId(rpmstrPool sidpool, const char *s, int create);
/* get the id of a string + length, optionally storing if not already present */
rpmsid rpmstrPoolIdn(rpmstrPool sidpool, const char *s, size_t slen, int create);
/* get a string by its id */
const char * rpmstrPoolStr(rpmstrPool sidpool, rpmsid sid);
/* get a strings length by its id (in constant time) */
size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid);
#ifdef __cplusplus
}
#endif
#endif /* _RPMSIDPOOL_H */