* Added Levenshtein distance algorithm in r_diff

* Added corresponding test case
This commit is contained in:
Nibble 2009-03-31 16:52:58 +02:00
parent 9b947d793c
commit 1048ed5ab0
3 changed files with 58 additions and 0 deletions

View File

@ -203,3 +203,40 @@ int r_diff_buffers(struct r_diff_t *d, const u8 *a, u32 la, const u8 *b, u32 lb)
return ret;
}
int r_diff_buffers_distance(struct r_diff_t *d, const u8 *a, u32 la, const u8 *b, u32 lb, u32 *distance, float *similarity)
{
int i, j, cost, tmin, **m;
if (la < 1 || lb < 1)
return R_FALSE;
if ((m = alloca(la * sizeof(int*))) == NULL)
return R_FALSE;
for(i = 0; i <= la; i++)
if ((m[i] = alloca(lb * sizeof(int))) == NULL)
return R_FALSE;
for (i = 0; i <= la; i++)
m[i][0] = i;
for (j = 0; j <= lb; j++)
m[0][j] = j;
for (i = 1; i <= la; i++) {
for (j = 1; j <= lb; j++) {
if (a[i-1] == b[j-1])
cost = 0;
else cost = 1;
tmin = R_MIN(m[i-1][j] + 1, m[i][j-1] + 1);
m[i][j] = R_MIN(tmin, m[i-1][j-1] + cost);
}
}
if (distance != NULL)
*distance = m[la][lb];
if (similarity != NULL)
*similarity = 1.0/(1.0+m[la][lb]);
return R_TRUE;
}

View File

@ -56,6 +56,22 @@ int test_delta()
return 1;
}
int test_distance()
{
struct r_diff_t d;
char *bufa = "hello";
char *bufb = "heprpworld";
u32 distance = 0;
float similarity = 0;
printf("Similarity: '%s' vs '%s'\n", bufa, bufb);
r_diff_buffers_distance(NULL, bufa, strlen(bufa), bufb, strlen(bufb),
&distance, &similarity);
printf("Levenshtein distance = %i\nSimilarity = %f\n",
distance, similarity);
return 1;
}
int main()
{
test_equal();
@ -65,6 +81,8 @@ int main()
test_diff();
printf("--\n");
test_delta();
printf("--\n");
test_distance();
return 0;

View File

@ -32,5 +32,8 @@ int r_diff_buffers(struct r_diff_t *d, const u8 *a, u32 la, const u8 *b, u32 lb)
int r_diff_set_callback(struct r_diff_t *d,
int (*callback)(struct r_diff_t *d, void *user, struct r_diff_op_t *op),
void *user);
int r_diff_buffers_distance(struct r_diff_t *d,
const u8 *a, u32 la, const u8 *b, u32 lb, u32 *distance,
float *similarity);
#endif