[lit] Make internal diff work in pipelines

When using lit's internal shell, RUN lines like the following
accidentally execute an external `diff` instead of lit's internal
`diff`:

```
 # RUN: program | diff file -
 # RUN: not diff file1 file2 | FileCheck %s
```

Such cases exist now, in `clang/test/Analysis` for example.  We are
preparing patches to ensure lit's internal `diff` is called in such
cases, which will then fail because lit's internal `diff` cannot
currently be used in pipelines and doesn't recognize `-` as a
command-line option.

To enable pipelines, this patch moves lit's `diff` implementation into
an out-of-process script, similar to lit's `cat` implementation.  A
follow-up patch will implement `-` to mean stdin.

Reviewed By: probinson, stella.stamenova

Differential Revision: https://reviews.llvm.org/D66574

llvm-svn: 374388
This commit is contained in:
Joel E. Denny 2019-10-10 17:39:24 +00:00
parent c2cd2d40aa
commit df35ec8289
5 changed files with 276 additions and 241 deletions

View File

@ -1,7 +1,5 @@
from __future__ import absolute_import
import difflib
import errno
import functools
import io
import itertools
import getopt
@ -361,218 +359,6 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
exitCode = 1
return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
def executeBuiltinDiff(cmd, cmd_shenv):
"""executeBuiltinDiff - Compare files line by line."""
args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
try:
opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"])
except getopt.GetoptError as err:
raise InternalShellError(cmd, "Unsupported: 'diff': %s" % str(err))
filelines, filepaths, dir_trees = ([] for i in range(3))
ignore_all_space = False
ignore_space_change = False
unified_diff = False
recursive_diff = False
strip_trailing_cr = False
for o, a in opts:
if o == "-w":
ignore_all_space = True
elif o == "-b":
ignore_space_change = True
elif o == "-u":
unified_diff = True
elif o == "-r":
recursive_diff = True
elif o == "--strip-trailing-cr":
strip_trailing_cr = True
else:
assert False, "unhandled option"
if len(args) != 2:
raise InternalShellError(cmd, "Error: missing or extra operand")
def getDirTree(path, basedir=""):
# Tree is a tuple of form (dirname, child_trees).
# An empty dir has child_trees = [], a file has child_trees = None.
child_trees = []
for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)):
for child_dir in child_dirs:
child_trees.append(getDirTree(child_dir, dirname))
for filename in files:
child_trees.append((filename, None))
return path, sorted(child_trees)
def compareTwoFiles(filepaths):
compare_bytes = False
encoding = None
filelines = []
for file in filepaths:
try:
with open(file, 'r') as f:
filelines.append(f.readlines())
except UnicodeDecodeError:
try:
with io.open(file, 'r', encoding="utf-8") as f:
filelines.append(f.readlines())
encoding = "utf-8"
except:
compare_bytes = True
if compare_bytes:
return compareTwoBinaryFiles(filepaths)
else:
return compareTwoTextFiles(filepaths, encoding)
def compareTwoBinaryFiles(filepaths):
filelines = []
for file in filepaths:
with open(file, 'rb') as f:
filelines.append(f.readlines())
exitCode = 0
if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
diffs = [diff.decode() for diff in diffs]
else:
# python 2.7
func = difflib.unified_diff if unified_diff else difflib.context_diff
diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
for diff in diffs:
stdout.write(diff)
exitCode = 1
return exitCode
def compareTwoTextFiles(filepaths, encoding):
filelines = []
for file in filepaths:
if encoding is None:
with open(file, 'r') as f:
filelines.append(f.readlines())
else:
with io.open(file, 'r', encoding=encoding) as f:
filelines.append(f.readlines())
exitCode = 0
def compose2(f, g):
return lambda x: f(g(x))
f = lambda x: x
if strip_trailing_cr:
f = compose2(lambda line: line.rstrip('\r'), f)
if ignore_all_space or ignore_space_change:
ignoreSpace = lambda line, separator: separator.join(line.split())
ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if ignore_all_space else ' ')
f = compose2(ignoreAllSpaceOrSpaceChange, f)
for idx, lines in enumerate(filelines):
filelines[idx]= [f(line) for line in lines]
func = difflib.unified_diff if unified_diff else difflib.context_diff
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]):
stdout.write(diff)
exitCode = 1
return exitCode
def printDirVsFile(dir_path, file_path):
if os.path.getsize(file_path):
msg = "File %s is a directory while file %s is a regular file"
else:
msg = "File %s is a directory while file %s is a regular empty file"
stdout.write(msg % (dir_path, file_path) + "\n")
def printFileVsDir(file_path, dir_path):
if os.path.getsize(file_path):
msg = "File %s is a regular file while file %s is a directory"
else:
msg = "File %s is a regular empty file while file %s is a directory"
stdout.write(msg % (file_path, dir_path) + "\n")
def printOnlyIn(basedir, path, name):
stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name))
def compareDirTrees(dir_trees, base_paths=["", ""]):
# Dirnames of the trees are not checked, it's caller's responsibility,
# as top-level dirnames are always different. Base paths are important
# for doing os.walk, but we don't put it into tree's dirname in order
# to speed up string comparison below and while sorting in getDirTree.
left_tree, right_tree = dir_trees[0], dir_trees[1]
left_base, right_base = base_paths[0], base_paths[1]
# Compare two files or report file vs. directory mismatch.
if left_tree[1] is None and right_tree[1] is None:
return compareTwoFiles([os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0])])
if left_tree[1] is None and right_tree[1] is not None:
printFileVsDir(os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0]))
return 1
if left_tree[1] is not None and right_tree[1] is None:
printDirVsFile(os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0]))
return 1
# Compare two directories via recursive use of compareDirTrees.
exitCode = 0
left_names = [node[0] for node in left_tree[1]]
right_names = [node[0] for node in right_tree[1]]
l, r = 0, 0
while l < len(left_names) and r < len(right_names):
# Names are sorted in getDirTree, rely on that order.
if left_names[l] < right_names[r]:
exitCode = 1
printOnlyIn(left_base, left_tree[0], left_names[l])
l += 1
elif left_names[l] > right_names[r]:
exitCode = 1
printOnlyIn(right_base, right_tree[0], right_names[r])
r += 1
else:
exitCode |= compareDirTrees([left_tree[1][l], right_tree[1][r]],
[os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0])])
l += 1
r += 1
# At least one of the trees has ended. Report names from the other tree.
while l < len(left_names):
exitCode = 1
printOnlyIn(left_base, left_tree[0], left_names[l])
l += 1
while r < len(right_names):
exitCode = 1
printOnlyIn(right_base, right_tree[0], right_names[r])
r += 1
return exitCode
stderr = StringIO()
stdout = StringIO()
exitCode = 0
try:
for file in args:
if not os.path.isabs(file):
file = os.path.realpath(os.path.join(cmd_shenv.cwd, file))
if recursive_diff:
dir_trees.append(getDirTree(file))
else:
filepaths.append(file)
if not recursive_diff:
exitCode = compareTwoFiles(filepaths)
else:
exitCode = compareDirTrees(dir_trees)
except IOError as err:
stderr.write("Error: 'diff' command failed, %s\n" % str(err))
exitCode = 1
return ShellCommandResult(cmd, stdout.getvalue(), stderr.getvalue(), exitCode, False)
def executeBuiltinRm(cmd, cmd_shenv):
"""executeBuiltinRm - Removes (deletes) files or directories."""
args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
@ -838,14 +624,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
results.append(cmdResult)
return cmdResult.exitCode
if cmd.commands[0].args[0] == 'diff':
if len(cmd.commands) != 1:
raise InternalShellError(cmd.commands[0], "Unsupported: 'diff' "
"cannot be part of a pipeline")
cmdResult = executeBuiltinDiff(cmd.commands[0], shenv)
results.append(cmdResult)
return cmdResult.exitCode
if cmd.commands[0].args[0] == 'rm':
if len(cmd.commands) != 1:
raise InternalShellError(cmd.commands[0], "Unsupported: 'rm' "
@ -866,7 +644,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
stderrTempFiles = []
opened_files = []
named_temp_files = []
builtin_commands = set(['cat'])
builtin_commands = set(['cat', 'diff'])
builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands")
# To avoid deadlock, we use a single stderr stream for piped
# output. This is null until we have seen some output using

View File

@ -0,0 +1,228 @@
import difflib
import functools
import getopt
import os
import sys
class DiffFlags():
def __init__(self):
self.ignore_all_space = False
self.ignore_space_change = False
self.unified_diff = False
self.recursive_diff = False
self.strip_trailing_cr = False
def getDirTree(path, basedir=""):
# Tree is a tuple of form (dirname, child_trees).
# An empty dir has child_trees = [], a file has child_trees = None.
child_trees = []
for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)):
for child_dir in child_dirs:
child_trees.append(getDirTree(child_dir, dirname))
for filename in files:
child_trees.append((filename, None))
return path, sorted(child_trees)
def compareTwoFiles(flags, filepaths):
compare_bytes = False
encoding = None
filelines = []
for file in filepaths:
try:
with open(file, 'r') as f:
filelines.append(f.readlines())
except UnicodeDecodeError:
try:
with io.open(file, 'r', encoding="utf-8") as f:
filelines.append(f.readlines())
encoding = "utf-8"
except:
compare_bytes = True
if compare_bytes:
return compareTwoBinaryFiles(flags, filepaths)
else:
return compareTwoTextFiles(flags, filepaths, encoding)
def compareTwoBinaryFiles(flags, filepaths):
filelines = []
for file in filepaths:
with open(file, 'rb') as f:
filelines.append(f.readlines())
exitCode = 0
if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
diffs = [diff.decode() for diff in diffs]
else:
# python 2.7
if flags.unified_diff:
func = difflib.unified_diff
else:
func = difflib.context_diff
diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
for diff in diffs:
sys.stdout.write(diff)
exitCode = 1
return exitCode
def compareTwoTextFiles(flags, filepaths, encoding):
filelines = []
for file in filepaths:
if encoding is None:
with open(file, 'r') as f:
filelines.append(f.readlines())
else:
with io.open(file, 'r', encoding=encoding) as f:
filelines.append(f.readlines())
exitCode = 0
def compose2(f, g):
return lambda x: f(g(x))
f = lambda x: x
if flags.strip_trailing_cr:
f = compose2(lambda line: line.rstrip('\r'), f)
if flags.ignore_all_space or flags.ignore_space_change:
ignoreSpace = lambda line, separator: separator.join(line.split())
ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if flags.ignore_all_space else ' ')
f = compose2(ignoreAllSpaceOrSpaceChange, f)
for idx, lines in enumerate(filelines):
filelines[idx]= [f(line) for line in lines]
func = difflib.unified_diff if flags.unified_diff else difflib.context_diff
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]):
sys.stdout.write(diff)
exitCode = 1
return exitCode
def printDirVsFile(dir_path, file_path):
if os.path.getsize(file_path):
msg = "File %s is a directory while file %s is a regular file"
else:
msg = "File %s is a directory while file %s is a regular empty file"
sys.stdout.write(msg % (dir_path, file_path) + "\n")
def printFileVsDir(file_path, dir_path):
if os.path.getsize(file_path):
msg = "File %s is a regular file while file %s is a directory"
else:
msg = "File %s is a regular empty file while file %s is a directory"
sys.stdout.write(msg % (file_path, dir_path) + "\n")
def printOnlyIn(basedir, path, name):
sys.stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name))
def compareDirTrees(flags, dir_trees, base_paths=["", ""]):
# Dirnames of the trees are not checked, it's caller's responsibility,
# as top-level dirnames are always different. Base paths are important
# for doing os.walk, but we don't put it into tree's dirname in order
# to speed up string comparison below and while sorting in getDirTree.
left_tree, right_tree = dir_trees[0], dir_trees[1]
left_base, right_base = base_paths[0], base_paths[1]
# Compare two files or report file vs. directory mismatch.
if left_tree[1] is None and right_tree[1] is None:
return compareTwoFiles(flags,
[os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0])])
if left_tree[1] is None and right_tree[1] is not None:
printFileVsDir(os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0]))
return 1
if left_tree[1] is not None and right_tree[1] is None:
printDirVsFile(os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0]))
return 1
# Compare two directories via recursive use of compareDirTrees.
exitCode = 0
left_names = [node[0] for node in left_tree[1]]
right_names = [node[0] for node in right_tree[1]]
l, r = 0, 0
while l < len(left_names) and r < len(right_names):
# Names are sorted in getDirTree, rely on that order.
if left_names[l] < right_names[r]:
exitCode = 1
printOnlyIn(left_base, left_tree[0], left_names[l])
l += 1
elif left_names[l] > right_names[r]:
exitCode = 1
printOnlyIn(right_base, right_tree[0], right_names[r])
r += 1
else:
exitCode |= compareDirTrees(flags,
[left_tree[1][l], right_tree[1][r]],
[os.path.join(left_base, left_tree[0]),
os.path.join(right_base, right_tree[0])])
l += 1
r += 1
# At least one of the trees has ended. Report names from the other tree.
while l < len(left_names):
exitCode = 1
printOnlyIn(left_base, left_tree[0], left_names[l])
l += 1
while r < len(right_names):
exitCode = 1
printOnlyIn(right_base, right_tree[0], right_names[r])
r += 1
return exitCode
def main(argv):
args = argv[1:]
try:
opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"])
except getopt.GetoptError as err:
sys.stderr.write("Unsupported: 'diff': %s\n" % str(err))
sys.exit(1)
flags = DiffFlags()
filelines, filepaths, dir_trees = ([] for i in range(3))
for o, a in opts:
if o == "-w":
flags.ignore_all_space = True
elif o == "-b":
flags.ignore_space_change = True
elif o == "-u":
flags.unified_diff = True
elif o == "-r":
flags.recursive_diff = True
elif o == "--strip-trailing-cr":
flags.strip_trailing_cr = True
else:
assert False, "unhandled option"
if len(args) != 2:
sys.stderr.write("Error: missing or extra operand\n")
sys.exit(1)
exitCode = 0
try:
for file in args:
if not os.path.isabs(file):
file = os.path.realpath(os.path.join(os.getcwd(), file))
if flags.recursive_diff:
dir_trees.append(getDirTree(file))
else:
filepaths.append(file)
if not flags.recursive_diff:
exitCode = compareTwoFiles(flags, filepaths)
else:
exitCode = compareDirTrees(flags, dir_trees)
except IOError as err:
sys.stderr.write("Error: 'diff' command failed, %s\n" % str(err))
exitCode = 1
sys.exit(exitCode)
if __name__ == "__main__":
main(sys.argv)

View File

@ -1,3 +0,0 @@
# Check error on a unsupported diff (cannot be part of a pipeline).
#
# RUN: diff diff-error-0.txt diff-error-0.txt | echo Output

View File

@ -0,0 +1,15 @@
# RUN: echo foo > %t.foo
# RUN: echo bar > %t.bar
# Check output pipe.
# RUN: diff %t.foo %t.foo | FileCheck -allow-empty -check-prefix=EMPTY %s
# RUN: diff -u %t.foo %t.bar | FileCheck %s && false || true
# Fail so lit will print output.
# RUN: false
# CHECK: @@
# CHECK-NEXT: -foo
# CHECK-NEXT: +bar
# EMPTY-NOT: {{.}}

View File

@ -34,28 +34,20 @@
# CHECK: error: command failed with exit status: 127
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-0.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
# CHECK: # command stderr:
# CHECK: Unsupported: 'diff' cannot be part of a pipeline
# CHECK: error: command failed with exit status: 127
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-1.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED ***
# CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt"
# CHECK: # command stderr:
# CHECK: Unsupported: 'diff': option -B not recognized
# CHECK: error: command failed with exit status: 127
# CHECK: error: command failed with exit status: 1
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-2.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-2.txt' FAILED ***
# CHECK: $ "diff" "temp.txt"
# CHECK: # command stderr:
# CHECK: Error: missing or extra operand
# CHECK: error: command failed with exit status: 127
# CHECK: Error: missing or extra operand
# CHECK: error: command failed with exit status: 1
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-3.txt
@ -82,18 +74,43 @@
# CHECK: *** TEST 'shtest-shell :: diff-error-5.txt' FAILED ***
# CHECK: $ "diff"
# CHECK: # command stderr:
# CHECK: Error: missing or extra operand
# CHECK: error: command failed with exit status: 127
# CHECK: Error: missing or extra operand
# CHECK: error: command failed with exit status: 1
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-6.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-6.txt' FAILED ***
# CHECK: $ "diff"
# CHECK: # command stderr:
# CHECK: Error: missing or extra operand
# CHECK: error: command failed with exit status: 127
# CHECK: Error: missing or extra operand
# CHECK: error: command failed with exit status: 1
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-pipes.txt
# CHECK: *** TEST 'shtest-shell :: diff-pipes.txt' FAILED ***
# CHECK: $ "diff" "{{[^"]*}}.foo" "{{[^"]*}}.foo"
# CHECK-NOT: note
# CHECK-NOT: error
# CHECK: $ "FileCheck"
# CHECK-NOT: note
# CHECK-NOT: error
# CHECK: $ "diff" "-u" "{{[^"]*}}.foo" "{{[^"]*}}.bar"
# CHECK: note: command had no output on stdout or stderr
# CHECK: error: command failed with exit status: 1
# CHECK: $ "FileCheck"
# CHECK-NOT: note
# CHECK-NOT: error
# CHECK: $ "true"
# CHECK: $ "false"
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-r-error-0.txt
# CHECK: *** TEST 'shtest-shell :: diff-r-error-0.txt' FAILED ***
# CHECK: $ "diff" "-r"