Make check-llvm run 50% faster on macOS, 18% faster on Windows.

While looking at cycle time graphs of some of my bots, I noticed
that 327894859c made check-llvm noticeably slower on macOS and
Windows.

As it turns out, the 5 substitutions added in that change were
enough to cause lit to thrash the build-in cache in re.compile()
(re.sub() is implemented as re.compile().sub()), and apparently
applySubstitutions() is on the cricital path and slow when all
regexes need to compile all the time.

(See `_MAXCACHE = 512` in cpython/Lib/re.py)

Supporting full regexes for lit substitutions seems a bit like
overkill, but for now add a simple unbounded cache to recover
the lost performance.

No intended behavior change.
This commit is contained in:
Nico Weber 2020-01-06 10:54:13 -05:00
parent 3abc2927cb
commit f6544934b9
1 changed files with 30 additions and 6 deletions

View File

@ -1112,11 +1112,16 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
s = s.replace('&', '\&') s = s.replace('&', '\&')
return s return s
substitutions.extend([ substitutions.extend([
('%{/s:regex_replacement}', regex_escape(sourcepath.replace('\\', '/'))), ('%{/s:regex_replacement}',
('%{/S:regex_replacement}', regex_escape(sourcedir.replace('\\', '/'))), regex_escape(sourcepath.replace('\\', '/'))),
('%{/p:regex_replacement}', regex_escape(sourcedir.replace('\\', '/'))), ('%{/S:regex_replacement}',
('%{/t:regex_replacement}', regex_escape(tmpBase.replace('\\', '/')) + '.tmp'), regex_escape(sourcedir.replace('\\', '/'))),
('%{/T:regex_replacement}', regex_escape(tmpDir.replace('\\', '/'))), ('%{/p:regex_replacement}',
regex_escape(sourcedir.replace('\\', '/'))),
('%{/t:regex_replacement}',
regex_escape(tmpBase.replace('\\', '/')) + '.tmp'),
('%{/T:regex_replacement}',
regex_escape(tmpDir.replace('\\', '/'))),
]) ])
# "%:[STpst]" are normalized paths without colons and without a leading # "%:[STpst]" are normalized paths without colons and without a leading
@ -1130,6 +1135,18 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
]) ])
return substitutions return substitutions
def _memoize(f):
cache = {} # Intentionally unbounded, see applySubstitutions()
def memoized(x):
if x not in cache:
cache[x] = f(x)
return cache[x]
return memoized
@_memoize
def _caching_re_compile(r):
return re.compile(r)
def applySubstitutions(script, substitutions): def applySubstitutions(script, substitutions):
"""Apply substitutions to the script. Allow full regular expression syntax. """Apply substitutions to the script. Allow full regular expression syntax.
Replace each matching occurrence of regular expression pattern a with Replace each matching occurrence of regular expression pattern a with
@ -1139,7 +1156,14 @@ def applySubstitutions(script, substitutions):
for a,b in substitutions: for a,b in substitutions:
if kIsWindows: if kIsWindows:
b = b.replace("\\","\\\\") b = b.replace("\\","\\\\")
ln = re.sub(a, b, ln) # re.compile() has a built-in LRU cache with 512 entries. In some
# test suites lit ends up thrashing that cache, which made e.g.
# check-llvm run 50% slower. Use an explicit, unbounded cache
# to prevent that from happening. Since lit is fairly
# short-lived, since the set of substitutions is fairly small, and
# since thrashing has such bad consequences, not bounding the cache
# seems reasonable.
ln = _caching_re_compile(a).sub(b, ln)
# Strip the trailing newline and any extra whitespace. # Strip the trailing newline and any extra whitespace.
return ln.strip() return ln.strip()