Add Support for Creating and Deleting Unicode Files and Directories in Lit

This enables lit to work with unicode file names via mkdir, rm, and redirection. Lit still uses utf-8 internally, but converts to utf-16 on Windows, or just utf-8 bytes on everything else. Committed on behalf of Jason Mittertreiner Differential Revision: https://reviews.llvm.org/D56754 llvm-svn: 355122
2019-02-28 19:16:17 +00:00 · 2019-02-28 19:16:17 +00:00 · 54be909aa0
parent 34f2bee0fb
commit 54be909aa0
4 changed files with 40 additions and 7 deletions
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@ -23,7 +23,7 @@ from lit.ShCommands import GlobItem
 import lit.ShUtil as ShUtil
 import lit.Test as Test
 import lit.util
-from lit.util import to_bytes, to_string
+from lit.util import to_bytes, to_string, to_unicode
 from lit.BooleanExpression import BooleanExpression

 class InternalShellError(Exception):
@ -344,8 +344,11 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
    stderr = StringIO()
    exitCode = 0
    for dir in args:
+        cwd = cmd_shenv.cwd
+        dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
+        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
        if not os.path.isabs(dir):
-            dir = os.path.realpath(os.path.join(cmd_shenv.cwd, dir))
+            dir = os.path.realpath(os.path.join(cwd, dir))
        if parent:
            lit.util.mkdir_p(dir)
        else:
@ -598,8 +601,11 @@ def executeBuiltinRm(cmd, cmd_shenv):
    stderr = StringIO()
    exitCode = 0
    for path in args:
+        cwd = cmd_shenv.cwd
+        path = to_unicode(path) if kIsWindows else to_bytes(path)
+        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
        if not os.path.isabs(path):
-            path = os.path.realpath(os.path.join(cmd_shenv.cwd, path))
+            path = os.path.realpath(os.path.join(cwd, path))
        if force and not os.path.exists(path):
            continue
        try:
@ -695,6 +701,8 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
        else:
            # Make sure relative paths are relative to the cwd.
            redir_filename = os.path.join(cmd_shenv.cwd, name)
+            redir_filename = to_unicode(redir_filename) \
+                    if kIsWindows else to_bytes(redir_filename)
            fd = open(redir_filename, mode)
        # Workaround a Win32 and/or subprocess bug when appending.
        #
@ -1096,11 +1104,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
        for i, ln in enumerate(commands):
            commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
        if test.config.pipefail:
-            f.write('set -o pipefail;')
+            f.write(b'set -o pipefail;' if mode == 'wb' else 'set -o pipefail;')
        if litConfig.echo_all_commands:
-            f.write('set -x;')
-        f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
-    f.write('\n')
+            f.write(b'set -x;' if mode == 'wb' else 'set -x;')
+        if sys.version_info > (3,0) and mode == 'wb':
+            f.write(bytes('{ ' + '; } &&\n{ '.join(commands) + '; }', 'utf-8'))
+        else:
+            f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
+    f.write(b'\n' if mode == 'wb' else '\n')
    f.close()

    if isWin32CMDEXE:
--- a/llvm/utils/lit/lit/util.py
+++ b/llvm/utils/lit/lit/util.py
@ -102,6 +102,20 @@ def to_string(b):
        raise TypeError('not sure how to convert %s to %s' % (type(b), str))


+def to_unicode(s):
+    """Return the parameter as type which supports unicode, possibly decoding
+    it.
+
+    In Python2, this is the unicode type. In Python3 it's the str type.
+
+    """
+    if isinstance(s, bytes):
+        # In Python2, this branch is taken for both 'str' and 'bytes'.
+        # In Python3, this branch is taken only for 'bytes'.
+        return s.decode('utf-8')
+    return s
+
+
 def detectCPUs():
    """Detects the number of CPUs on a system.

--- a/llvm/utils/lit/tests/Inputs/shtest-shell/rm-unicode-0.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-shell/rm-unicode-0.txt
@ -0,0 +1,7 @@
+# Check removing unicode
+#
+# RUN: mkdir -p  Output/中文
+# RUN: echo "" > Output/中文/你好.txt
+# RUN: rm Output/中文/你好.txt
+# RUN: echo "" > Output/中文/你好.txt
+# RUN: rm -r Output/中文
--- a/llvm/utils/lit/tests/shtest-shell.py
+++ b/llvm/utils/lit/tests/shtest-shell.py
@ -224,6 +224,7 @@
 # CHECK: Exit Code: 1
 # CHECK: ***

+# CHECK: PASS: shtest-shell :: rm-unicode-0.txt
 # CHECK: PASS: shtest-shell :: sequencing-0.txt
 # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
 # CHECK: PASS: shtest-shell :: valid-shell.txt