[PATCH] console utf-8 mode fixes
Fix utf-8 mode so alternate charset modes always work according to control sequences interpreted in do_con_trol function preserving backward US-ASCII and VT100 semigraphics compatibility. Malformed utf-8 sequences are represented as sequences of replacement glyphs,original codes or '?' as a last resort. unicode-xterm, gnome-terminal, kconsole and other terminal emulators in utf-8 mode respect acsc, enacs, rmacs sequences. Also I found that some important system programs (from Debian distro) uses acsc in utf-8 mode - dselect, aptitude, w3m for example. Signed-off-by: Adam Tlalka <atlka@pg.gda.pl> Acked-by: Alan Cox <alan@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
08c67d2a5d
commit
d4328b40af
|
@ -63,6 +63,13 @@
|
|||
*
|
||||
* Removed console_lock, enabled interrupts across all console operations
|
||||
* 13 March 2001, Andrew Morton
|
||||
*
|
||||
* Fixed UTF-8 mode so alternate charset modes always work according
|
||||
* to control sequences interpreted in do_con_trol function
|
||||
* preserving backward VT100 semigraphics compatibility,
|
||||
* malformed UTF sequences represented as sequences of replacement glyphs,
|
||||
* original codes or '?' as a last resort if replacement glyph is undefined
|
||||
* by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
|
@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
|
|||
/* Do no translation at all in control states */
|
||||
if (vc->vc_state != ESnormal) {
|
||||
tc = c;
|
||||
} else if (vc->vc_utf) {
|
||||
} else if (vc->vc_utf && !vc->vc_disp_ctrl) {
|
||||
/* Combine UTF-8 into Unicode */
|
||||
/* Incomplete characters silently ignored */
|
||||
/* Malformed sequences as sequences of replacement glyphs */
|
||||
rescan_last_byte:
|
||||
if(c > 0x7f) {
|
||||
if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) {
|
||||
vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
|
||||
vc->vc_utf_count--;
|
||||
if (vc->vc_utf_count == 0)
|
||||
tc = c = vc->vc_utf_char;
|
||||
else continue;
|
||||
if (vc->vc_utf_count) {
|
||||
if ((c & 0xc0) == 0x80) {
|
||||
vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
|
||||
if (--vc->vc_utf_count) {
|
||||
vc->vc_npar++;
|
||||
continue;
|
||||
}
|
||||
tc = c = vc->vc_utf_char;
|
||||
} else
|
||||
goto replacement_glyph;
|
||||
} else {
|
||||
vc->vc_npar = 0;
|
||||
if ((c & 0xe0) == 0xc0) {
|
||||
vc->vc_utf_count = 1;
|
||||
vc->vc_utf_char = (c & 0x1f);
|
||||
|
@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
|
|||
vc->vc_utf_count = 5;
|
||||
vc->vc_utf_char = (c & 0x01);
|
||||
} else
|
||||
vc->vc_utf_count = 0;
|
||||
goto replacement_glyph;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (vc->vc_utf_count)
|
||||
goto replacement_glyph;
|
||||
tc = c;
|
||||
vc->vc_utf_count = 0;
|
||||
}
|
||||
} else { /* no utf */
|
||||
} else { /* no utf or alternate charset mode */
|
||||
tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
|
||||
}
|
||||
|
||||
|
@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
|
|||
* direct-to-font zone in UTF-8 mode.
|
||||
*/
|
||||
ok = tc && (c >= 32 ||
|
||||
(!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS
|
||||
: CTRL_ACTION) >> c) & 1)))
|
||||
!(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
|
||||
vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
|
||||
&& (c != 127 || vc->vc_disp_ctrl)
|
||||
&& (c != 128+27);
|
||||
|
||||
if (vc->vc_state == ESnormal && ok) {
|
||||
/* Now try to find out how to display it */
|
||||
tc = conv_uni_to_pc(vc, tc);
|
||||
if ( tc == -4 ) {
|
||||
if (tc & ~charmask) {
|
||||
if ( tc == -4 ) {
|
||||
/* If we got -4 (not found) then see if we have
|
||||
defined a replacement character (U+FFFD) */
|
||||
tc = conv_uni_to_pc(vc, 0xfffd);
|
||||
|
||||
/* One reason for the -4 can be that we just
|
||||
did a clear_unimap();
|
||||
try at least to show something. */
|
||||
if (tc == -4)
|
||||
tc = c;
|
||||
} else if ( tc == -3 ) {
|
||||
/* Bad hash table -- hope for the best */
|
||||
tc = c;
|
||||
}
|
||||
if (tc & ~charmask)
|
||||
continue; /* Conversion failed */
|
||||
replacement_glyph:
|
||||
tc = conv_uni_to_pc(vc, 0xfffd);
|
||||
if (!(tc & ~charmask))
|
||||
goto display_glyph;
|
||||
} else if ( tc != -3 )
|
||||
continue; /* nothing to display */
|
||||
/* no hash table or no replacement --
|
||||
* hope for the best */
|
||||
if ( c & ~charmask )
|
||||
tc = '?';
|
||||
else
|
||||
tc = c;
|
||||
}
|
||||
|
||||
display_glyph:
|
||||
if (vc->vc_need_wrap || vc->vc_decim)
|
||||
FLUSH
|
||||
if (vc->vc_need_wrap) {
|
||||
|
@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
|
|||
vc->vc_x++;
|
||||
draw_to = (vc->vc_pos += 2);
|
||||
}
|
||||
if (vc->vc_utf_count) {
|
||||
if (vc->vc_npar) {
|
||||
vc->vc_npar--;
|
||||
goto display_glyph;
|
||||
}
|
||||
vc->vc_utf_count = 0;
|
||||
c = orig;
|
||||
goto rescan_last_byte;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
FLUSH
|
||||
|
|
Loading…
Reference in New Issue