Add Support for Creating and Deleting Unicode Files and Directories in Lit

This enables lit to work with unicode file names via mkdir, rm, and redirection.
Lit still uses utf-8 internally, but converts to utf-16 on Windows, or just utf-8
bytes on everything else.

Committed on behalf of Jason Mittertreiner
Differential Revision: https://reviews.llvm.org/D56754

llvm-svn: 355122
This commit is contained in:
Serge Guelton 2019-02-28 19:16:17 +00:00
parent 34f2bee0fb
commit 54be909aa0
4 changed files with 40 additions and 7 deletions

View File

@ -23,7 +23,7 @@ from lit.ShCommands import GlobItem
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
from lit.util import to_bytes, to_string
from lit.util import to_bytes, to_string, to_unicode
from lit.BooleanExpression import BooleanExpression
class InternalShellError(Exception):
@ -344,8 +344,11 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
stderr = StringIO()
exitCode = 0
for dir in args:
cwd = cmd_shenv.cwd
dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
if not os.path.isabs(dir):
dir = os.path.realpath(os.path.join(cmd_shenv.cwd, dir))
dir = os.path.realpath(os.path.join(cwd, dir))
if parent:
lit.util.mkdir_p(dir)
else:
@ -598,8 +601,11 @@ def executeBuiltinRm(cmd, cmd_shenv):
stderr = StringIO()
exitCode = 0
for path in args:
cwd = cmd_shenv.cwd
path = to_unicode(path) if kIsWindows else to_bytes(path)
cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
if not os.path.isabs(path):
path = os.path.realpath(os.path.join(cmd_shenv.cwd, path))
path = os.path.realpath(os.path.join(cwd, path))
if force and not os.path.exists(path):
continue
try:
@ -695,6 +701,8 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
else:
# Make sure relative paths are relative to the cwd.
redir_filename = os.path.join(cmd_shenv.cwd, name)
redir_filename = to_unicode(redir_filename) \
if kIsWindows else to_bytes(redir_filename)
fd = open(redir_filename, mode)
# Workaround a Win32 and/or subprocess bug when appending.
#
@ -1096,11 +1104,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
for i, ln in enumerate(commands):
commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
if test.config.pipefail:
f.write('set -o pipefail;')
f.write(b'set -o pipefail;' if mode == 'wb' else 'set -o pipefail;')
if litConfig.echo_all_commands:
f.write('set -x;')
f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
f.write('\n')
f.write(b'set -x;' if mode == 'wb' else 'set -x;')
if sys.version_info > (3,0) and mode == 'wb':
f.write(bytes('{ ' + '; } &&\n{ '.join(commands) + '; }', 'utf-8'))
else:
f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
f.write(b'\n' if mode == 'wb' else '\n')
f.close()
if isWin32CMDEXE:

View File

@ -102,6 +102,20 @@ def to_string(b):
raise TypeError('not sure how to convert %s to %s' % (type(b), str))
def to_unicode(s):
"""Return the parameter as type which supports unicode, possibly decoding
it.
In Python2, this is the unicode type. In Python3 it's the str type.
"""
if isinstance(s, bytes):
# In Python2, this branch is taken for both 'str' and 'bytes'.
# In Python3, this branch is taken only for 'bytes'.
return s.decode('utf-8')
return s
def detectCPUs():
"""Detects the number of CPUs on a system.

View File

@ -0,0 +1,7 @@
# Check removing unicode
#
# RUN: mkdir -p Output/中文
# RUN: echo "" > Output/中文/你好.txt
# RUN: rm Output/中文/你好.txt
# RUN: echo "" > Output/中文/你好.txt
# RUN: rm -r Output/中文

View File

@ -224,6 +224,7 @@
# CHECK: Exit Code: 1
# CHECK: ***
# CHECK: PASS: shtest-shell :: rm-unicode-0.txt
# CHECK: PASS: shtest-shell :: sequencing-0.txt
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
# CHECK: PASS: shtest-shell :: valid-shell.txt