udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
Instead of implementing our own functions converting to and from UTF-8, use the ones provided by NLS. Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
parent
b8333ea1ad
commit
b8a41c44a4
|
@ -28,6 +28,7 @@
|
|||
|
||||
#include "udf_sb.h"
|
||||
|
||||
#define UNICODE_MAX 0x10ffff
|
||||
#define SURROGATE_MASK 0xfffff800
|
||||
#define SURROGATE_PAIR 0x0000d800
|
||||
|
||||
|
@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
|
|||
if (boundlen <= 0)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
|
||||
return -EINVAL;
|
||||
|
||||
if (uni < 0x80) {
|
||||
out[u_len++] = (unsigned char)uni;
|
||||
} else if (uni < 0x800) {
|
||||
if (boundlen < 2)
|
||||
return -ENAMETOOLONG;
|
||||
out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
|
||||
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
|
||||
} else {
|
||||
if (boundlen < 3)
|
||||
return -ENAMETOOLONG;
|
||||
out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
|
||||
out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
|
||||
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
|
||||
u_len = utf32_to_utf8(uni, out, boundlen);
|
||||
if (u_len < 0) {
|
||||
if (uni > UNICODE_MAX ||
|
||||
(uni & SURROGATE_MASK) == SURROGATE_PAIR)
|
||||
return -EINVAL;
|
||||
return -ENAMETOOLONG;
|
||||
}
|
||||
return u_len;
|
||||
}
|
||||
|
@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
|
|||
int boundlen,
|
||||
wchar_t *uni)
|
||||
{
|
||||
unsigned int utf_char;
|
||||
unsigned char c;
|
||||
int utf_cnt, u_len;
|
||||
int u_len;
|
||||
unicode_t c;
|
||||
|
||||
utf_char = 0;
|
||||
utf_cnt = 0;
|
||||
for (u_len = 0; u_len < boundlen;) {
|
||||
c = in[u_len++];
|
||||
|
||||
/* Complete a multi-byte UTF-8 character */
|
||||
if (utf_cnt) {
|
||||
utf_char = (utf_char << 6) | (c & 0x3f);
|
||||
if (--utf_cnt)
|
||||
continue;
|
||||
} else {
|
||||
/* Check for a multi-byte UTF-8 character */
|
||||
if (c & 0x80) {
|
||||
/* Start a multi-byte UTF-8 character */
|
||||
if ((c & 0xe0) == 0xc0) {
|
||||
utf_char = c & 0x1f;
|
||||
utf_cnt = 1;
|
||||
} else if ((c & 0xf0) == 0xe0) {
|
||||
utf_char = c & 0x0f;
|
||||
utf_cnt = 2;
|
||||
} else if ((c & 0xf8) == 0xf0) {
|
||||
utf_char = c & 0x07;
|
||||
utf_cnt = 3;
|
||||
} else if ((c & 0xfc) == 0xf8) {
|
||||
utf_char = c & 0x03;
|
||||
utf_cnt = 4;
|
||||
} else if ((c & 0xfe) == 0xfc) {
|
||||
utf_char = c & 0x01;
|
||||
utf_cnt = 5;
|
||||
} else {
|
||||
utf_cnt = -1;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
/* Single byte UTF-8 character (most common) */
|
||||
utf_char = c;
|
||||
}
|
||||
}
|
||||
*uni = utf_char;
|
||||
break;
|
||||
}
|
||||
if (utf_cnt) {
|
||||
u_len = utf8_to_utf32(in, boundlen, &c);
|
||||
if (u_len < 0) {
|
||||
*uni = '?';
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (c > MAX_WCHAR_T)
|
||||
*uni = '?';
|
||||
else
|
||||
*uni = c;
|
||||
return u_len;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue