* Added Levenshtein distance algorithm in r_diff
* Added corresponding test case
This commit is contained in:
parent
9b947d793c
commit
1048ed5ab0
|
@ -203,3 +203,40 @@ int r_diff_buffers(struct r_diff_t *d, const u8 *a, u32 la, const u8 *b, u32 lb)
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int r_diff_buffers_distance(struct r_diff_t *d, const u8 *a, u32 la, const u8 *b, u32 lb, u32 *distance, float *similarity)
|
||||
{
|
||||
int i, j, cost, tmin, **m;
|
||||
|
||||
if (la < 1 || lb < 1)
|
||||
return R_FALSE;
|
||||
|
||||
if ((m = alloca(la * sizeof(int*))) == NULL)
|
||||
return R_FALSE;
|
||||
for(i = 0; i <= la; i++)
|
||||
if ((m[i] = alloca(lb * sizeof(int))) == NULL)
|
||||
return R_FALSE;
|
||||
|
||||
for (i = 0; i <= la; i++)
|
||||
m[i][0] = i;
|
||||
for (j = 0; j <= lb; j++)
|
||||
m[0][j] = j;
|
||||
|
||||
for (i = 1; i <= la; i++) {
|
||||
for (j = 1; j <= lb; j++) {
|
||||
if (a[i-1] == b[j-1])
|
||||
cost = 0;
|
||||
else cost = 1;
|
||||
|
||||
tmin = R_MIN(m[i-1][j] + 1, m[i][j-1] + 1);
|
||||
m[i][j] = R_MIN(tmin, m[i-1][j-1] + cost);
|
||||
}
|
||||
}
|
||||
|
||||
if (distance != NULL)
|
||||
*distance = m[la][lb];
|
||||
if (similarity != NULL)
|
||||
*similarity = 1.0/(1.0+m[la][lb]);
|
||||
|
||||
return R_TRUE;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,22 @@ int test_delta()
|
|||
return 1;
|
||||
}
|
||||
|
||||
int test_distance()
|
||||
{
|
||||
struct r_diff_t d;
|
||||
char *bufa = "hello";
|
||||
char *bufb = "heprpworld";
|
||||
u32 distance = 0;
|
||||
float similarity = 0;
|
||||
|
||||
printf("Similarity: '%s' vs '%s'\n", bufa, bufb);
|
||||
r_diff_buffers_distance(NULL, bufa, strlen(bufa), bufb, strlen(bufb),
|
||||
&distance, &similarity);
|
||||
printf("Levenshtein distance = %i\nSimilarity = %f\n",
|
||||
distance, similarity);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
test_equal();
|
||||
|
@ -65,6 +81,8 @@ int main()
|
|||
test_diff();
|
||||
printf("--\n");
|
||||
test_delta();
|
||||
printf("--\n");
|
||||
test_distance();
|
||||
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -32,5 +32,8 @@ int r_diff_buffers(struct r_diff_t *d, const u8 *a, u32 la, const u8 *b, u32 lb)
|
|||
int r_diff_set_callback(struct r_diff_t *d,
|
||||
int (*callback)(struct r_diff_t *d, void *user, struct r_diff_op_t *op),
|
||||
void *user);
|
||||
int r_diff_buffers_distance(struct r_diff_t *d,
|
||||
const u8 *a, u32 la, const u8 *b, u32 lb, u32 *distance,
|
||||
float *similarity);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue