udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
Instead of implementing our own functions converting to and from UTF-8, use the ones provided by NLS. Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
parent
b8333ea1ad
commit
b8a41c44a4
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
#include "udf_sb.h"
|
#include "udf_sb.h"
|
||||||
|
|
||||||
|
#define UNICODE_MAX 0x10ffff
|
||||||
#define SURROGATE_MASK 0xfffff800
|
#define SURROGATE_MASK 0xfffff800
|
||||||
#define SURROGATE_PAIR 0x0000d800
|
#define SURROGATE_PAIR 0x0000d800
|
||||||
|
|
||||||
|
@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
|
||||||
if (boundlen <= 0)
|
if (boundlen <= 0)
|
||||||
return -ENAMETOOLONG;
|
return -ENAMETOOLONG;
|
||||||
|
|
||||||
if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
|
u_len = utf32_to_utf8(uni, out, boundlen);
|
||||||
|
if (u_len < 0) {
|
||||||
|
if (uni > UNICODE_MAX ||
|
||||||
|
(uni & SURROGATE_MASK) == SURROGATE_PAIR)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (uni < 0x80) {
|
|
||||||
out[u_len++] = (unsigned char)uni;
|
|
||||||
} else if (uni < 0x800) {
|
|
||||||
if (boundlen < 2)
|
|
||||||
return -ENAMETOOLONG;
|
return -ENAMETOOLONG;
|
||||||
out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
|
|
||||||
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
|
|
||||||
} else {
|
|
||||||
if (boundlen < 3)
|
|
||||||
return -ENAMETOOLONG;
|
|
||||||
out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
|
|
||||||
out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
|
|
||||||
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
|
|
||||||
}
|
}
|
||||||
return u_len;
|
return u_len;
|
||||||
}
|
}
|
||||||
|
@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
|
||||||
int boundlen,
|
int boundlen,
|
||||||
wchar_t *uni)
|
wchar_t *uni)
|
||||||
{
|
{
|
||||||
unsigned int utf_char;
|
int u_len;
|
||||||
unsigned char c;
|
unicode_t c;
|
||||||
int utf_cnt, u_len;
|
|
||||||
|
|
||||||
utf_char = 0;
|
u_len = utf8_to_utf32(in, boundlen, &c);
|
||||||
utf_cnt = 0;
|
if (u_len < 0) {
|
||||||
for (u_len = 0; u_len < boundlen;) {
|
|
||||||
c = in[u_len++];
|
|
||||||
|
|
||||||
/* Complete a multi-byte UTF-8 character */
|
|
||||||
if (utf_cnt) {
|
|
||||||
utf_char = (utf_char << 6) | (c & 0x3f);
|
|
||||||
if (--utf_cnt)
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
/* Check for a multi-byte UTF-8 character */
|
|
||||||
if (c & 0x80) {
|
|
||||||
/* Start a multi-byte UTF-8 character */
|
|
||||||
if ((c & 0xe0) == 0xc0) {
|
|
||||||
utf_char = c & 0x1f;
|
|
||||||
utf_cnt = 1;
|
|
||||||
} else if ((c & 0xf0) == 0xe0) {
|
|
||||||
utf_char = c & 0x0f;
|
|
||||||
utf_cnt = 2;
|
|
||||||
} else if ((c & 0xf8) == 0xf0) {
|
|
||||||
utf_char = c & 0x07;
|
|
||||||
utf_cnt = 3;
|
|
||||||
} else if ((c & 0xfc) == 0xf8) {
|
|
||||||
utf_char = c & 0x03;
|
|
||||||
utf_cnt = 4;
|
|
||||||
} else if ((c & 0xfe) == 0xfc) {
|
|
||||||
utf_char = c & 0x01;
|
|
||||||
utf_cnt = 5;
|
|
||||||
} else {
|
|
||||||
utf_cnt = -1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
/* Single byte UTF-8 character (most common) */
|
|
||||||
utf_char = c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*uni = utf_char;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (utf_cnt) {
|
|
||||||
*uni = '?';
|
*uni = '?';
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (c > MAX_WCHAR_T)
|
||||||
|
*uni = '?';
|
||||||
|
else
|
||||||
|
*uni = c;
|
||||||
return u_len;
|
return u_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue