From e4f11a31927e7ca67ff54897de37ef75e31e05ff Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Sat, 12 Oct 2019 11:57:20 +0000 Subject: [PATCH] Reland r374389: [lit] Clean up internal diff's encoding handling To avoid breaking some tests, D66574, D68664, D67643, and D68668 landed together. However, D68664 introduced an issue now addressed by D68839, with which these are now all relanding. Differential Revision: https://reviews.llvm.org/D68664 llvm-svn: 374649 --- llvm/utils/lit/lit/builtin_commands/diff.py | 51 +++++++---------- .../Inputs/shtest-shell/diff-encodings.txt | 9 +++ .../lit/tests/Inputs/shtest-shell/diff-in.bin | Bin 0 -> 26 bytes .../tests/Inputs/shtest-shell/diff-in.utf16 | Bin 0 -> 24 bytes .../tests/Inputs/shtest-shell/diff-in.utf8 | 3 + llvm/utils/lit/tests/max-failures.py | 2 +- llvm/utils/lit/tests/shtest-shell.py | 54 +++++++++++++++++- 7 files changed, 86 insertions(+), 33 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt create mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.bin create mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 create mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 diff --git a/llvm/utils/lit/lit/builtin_commands/diff.py b/llvm/utils/lit/lit/builtin_commands/diff.py index 885b425c95fa..562b9ac37ac2 100644 --- a/llvm/utils/lit/lit/builtin_commands/diff.py +++ b/llvm/utils/lit/lit/builtin_commands/diff.py @@ -1,6 +1,7 @@ import difflib import functools import getopt +import locale import os import sys @@ -24,37 +25,26 @@ def getDirTree(path, basedir=""): return path, sorted(child_trees) def compareTwoFiles(flags, filepaths): - compare_bytes = False - encoding = None filelines = [] for file in filepaths: + with open(file, 'rb') as file_bin: + filelines.append(file_bin.readlines()) + + try: + return compareTwoTextFiles(flags, filepaths, filelines, + locale.getpreferredencoding(False)) + except UnicodeDecodeError: try: - with open(file, 'r') as f: - filelines.append(f.readlines()) - except UnicodeDecodeError: - try: - with io.open(file, 'r', encoding="utf-8") as f: - filelines.append(f.readlines()) - encoding = "utf-8" - except: - compare_bytes = True - - if compare_bytes: - return compareTwoBinaryFiles(flags, filepaths) - else: - return compareTwoTextFiles(flags, filepaths, encoding) - -def compareTwoBinaryFiles(flags, filepaths): - filelines = [] - for file in filepaths: - with open(file, 'rb') as f: - filelines.append(f.readlines()) + return compareTwoTextFiles(flags, filepaths, filelines, "utf-8") + except: + return compareTwoBinaryFiles(flags, filepaths, filelines) +def compareTwoBinaryFiles(flags, filepaths, filelines): exitCode = 0 if hasattr(difflib, 'diff_bytes'): # python 3.5 or newer diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode()) - diffs = [diff.decode() for diff in diffs] + diffs = [diff.decode(errors="backslashreplace") for diff in diffs] else: # python 2.7 if flags.unified_diff: @@ -68,15 +58,14 @@ def compareTwoBinaryFiles(flags, filepaths): exitCode = 1 return exitCode -def compareTwoTextFiles(flags, filepaths, encoding): +def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding): filelines = [] - for file in filepaths: - if encoding is None: - with open(file, 'r') as f: - filelines.append(f.readlines()) - else: - with io.open(file, 'r', encoding=encoding) as f: - filelines.append(f.readlines()) + for lines_bin in filelines_bin: + lines = [] + for line_bin in lines_bin: + line = line_bin.decode(encoding=encoding) + lines.append(line) + filelines.append(lines) exitCode = 0 def compose2(f, g): diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt new file mode 100644 index 000000000000..d8b9718a0990 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt @@ -0,0 +1,9 @@ +# Check that diff falls back to binary mode if it cannot decode a file. + +# RUN: diff -u diff-in.bin diff-in.bin +# RUN: diff -u diff-in.utf16 diff-in.bin && false || true +# RUN: diff -u diff-in.utf8 diff-in.bin && false || true +# RUN: diff -u diff-in.bin diff-in.utf8 && false || true + +# Fail so lit will print output. +# RUN: false diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.bin b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.bin new file mode 100644 index 0000000000000000000000000000000000000000..06b800b707c1ade254fa995363aa211d096b534e GIT binary patch literal 26 acmZQbW5@?WE`}tAM24dO|DmiZ1}*?iD+SX4 literal 0 HcmV?d00001 diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 new file mode 100644 index 0000000000000000000000000000000000000000..d7d9feefa7da8341244deff11b6ea91b4d36b8f9 GIT binary patch literal 24 YcmZQbW5@?WE`}tAM1~?LUB$o!05LiP)c^nh literal 0 HcmV?d00001 diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 new file mode 100644 index 000000000000..86e041dad66a --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 @@ -0,0 +1,3 @@ +foo +bar +baz diff --git a/llvm/utils/lit/tests/max-failures.py b/llvm/utils/lit/tests/max-failures.py index cee06fa255dc..5149a91ec002 100644 --- a/llvm/utils/lit/tests/max-failures.py +++ b/llvm/utils/lit/tests/max-failures.py @@ -8,7 +8,7 @@ # # END. -# CHECK: Failing Tests (27) +# CHECK: Failing Tests (28) # CHECK: Failing Tests (1) # CHECK: Failing Tests (2) # CHECK: error: argument --max-failures: requires positive integer, but found '0' diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py index 3978e4470a99..6d9b1aa459c2 100644 --- a/llvm/utils/lit/tests/shtest-shell.py +++ b/llvm/utils/lit/tests/shtest-shell.py @@ -34,6 +34,58 @@ # CHECK: error: command failed with exit status: 127 # CHECK: *** + +# CHECK: FAIL: shtest-shell :: diff-encodings.txt +# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED *** + +# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin" +# CHECK-NOT: error + +# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: {{^ .f.o.o.$}} +# CHECK-NEXT: {{^-.b.a.r.$}} +# CHECK-NEXT: {{^\+.b.a.r..}} +# CHECK-NEXT: {{^ .b.a.z.$}} +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: -foo +# CHECK-NEXT: -bar +# CHECK-NEXT: -baz +# CHECK-NEXT: {{^\+.f.o.o.$}} +# CHECK-NEXT: {{^\+.b.a.r..}} +# CHECK-NEXT: {{^\+.b.a.z.$}} +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: {{^\-.f.o.o.$}} +# CHECK-NEXT: {{^\-.b.a.r..}} +# CHECK-NEXT: {{^\-.b.a.z.$}} +# CHECK-NEXT: +foo +# CHECK-NEXT: +bar +# CHECK-NEXT: +baz +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "false" + +# CHECK: *** + + # CHECK: FAIL: shtest-shell :: diff-error-1.txt # CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED *** # CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt" @@ -245,4 +297,4 @@ # CHECK: PASS: shtest-shell :: sequencing-0.txt # CHECK: XFAIL: shtest-shell :: sequencing-1.txt # CHECK: PASS: shtest-shell :: valid-shell.txt -# CHECK: Failing Tests (27) +# CHECK: Failing Tests (28)