Add initial version of updated tools from #1533

This commit is contained in:
Richard Berger 2019-11-05 14:22:17 -05:00
parent 74dade3ccb
commit 423a3bb99f
3 changed files with 303 additions and 1 deletions

View File

@ -0,0 +1,238 @@
#! /usr/bin/env python3
# LAMMPS Documentation Utilities
#
# Scan for duplicate anchor labels in documentation files
#
# Copyright (C) 2019 E. Anne Gunn
# Based largely on doc_anchor_check.py by Richard Berger
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse
import os
import re
import sys
# We only want to replace image lines where image is
# pulled from Eqs subfolder
image_pattern = re.compile(r'.*image:: Eqs/(.*)\.jpg')
tex_eq_pattern = re.compile(r'\$\$')
latex_begin_eq_pattern = re.compile(r'\\begin{equation}')
latex_end_eq_pattern = re.compile(r'\\end{equation}')
latex_begin_eqArray_pattern = re.compile(r'\\begin{eqnarray\*}')
latex_end_eqArray_pattern = re.compile(r'\\end{eqnarray\*}')
imageMarker = ">>>image was here"
image_marker_pattern = re.compile(r'>>>image was here')
align_pattern = re.compile(r'.*:align: center')
modifiedFileFolder = "src/modifiedRst/"
# Since this is a proof of concept implementation,
# skip any rst files that are known to cause problems
skipFileList = ["pair_tersoff_zbl.rst"]
runReport = {
}
def checkForEquationStart(texLine):
eqType = None
texMatch = tex_eq_pattern.match(texLine)
if texMatch:
eqType = "texMatch"
else:
eqMatch = latex_begin_eq_pattern.match(texLine)
if eqMatch:
eqType = "eqMatch"
else:
eqArrayMatch = latex_begin_eqArray_pattern.match(texLine)
if eqArrayMatch:
eqType = "eqArrayMatch"
return eqType
def checkForEquationEnd(texLine, eqType):
endPattern = tex_eq_pattern
if eqType == "texMatch":
endPattern = tex_eq_pattern
elif eqType == "eqMatch":
endPattern = latex_end_eq_pattern
elif eqType == "eqArrayMatch":
endPattern = latex_end_eqArray_pattern
else:
print("***error: unexpected eqType %s, will look for tex delimiter" % eqType)
endMatch = endPattern.match(texLine)
endFound = endMatch is not None
if endFound:
print("found pattern end, line: %s" % texLine)
return endFound
def startMathjax():
mathjaxLines = []
mathjaxLines.append(".. math::\n\n")
return mathjaxLines
def endMathjax(mathjaxLines):
mathjaxLines.append("\n")
mathjaxLines.append("%s\n" % imageMarker)
return mathjaxLines
def processFile(filename):
print("in processFile for filename: %s" % filename)
imageCount = 0
modifiedFileLines = []
doWriteModifiedFile = False
with open(filename, 'rt') as f:
for line_number, line in enumerate(f):
m = image_pattern.match(line)
if m:
fileroot = m.group(1)
print("fileroot: {0}".format(fileroot))
imageCount += 1
texFilename = "src/Eqs/{0}.tex".format(fileroot)
print("will try to open %s" % texFilename)
eqType = None
eqLines = []
try:
with open(texFilename, 'rt', encoding='utf-8') as t:
print("%s file opened ok" % texFilename)
eqLines = startMathjax()
try:
for dummy, texLine in enumerate(t):
#print(texLine)
if eqType == None:
eqType = checkForEquationStart(texLine)
if eqType != None:
print("equation type: {0}".format(eqType))
else:
endFound = checkForEquationEnd(texLine, eqType)
if endFound != True:
eqLines.append(texLine)
else:
eqType = None
eqLines = endMathjax(eqLines)
print("Equation lines will be:")
print("-----------------------------")
print(*eqLines, sep="\n")
print("-----------------------------")
except UnicodeDecodeError:
print("UnicodeDecodeError reading file file %s, image markup will be left in place" % texFilename)
break
except EnvironmentError:
error = "could not open source tex file {0}, line: {1}".format(texFilename, line)
print(error)
print("image markup will be left in place")
if filename not in runReport:
runReport[filename] = []
runReport[filename].append(error)
# put the image line we could not replace back into the output
eqLines.append(line)
if len(eqLines) > 0:
modifiedFileLines.extend(eqLines)
doWriteModifiedFile = True
eqLines = []
else:
# not an equation line, so simply queue it up for output as is
modifiedFileLines.append(line)
if doWriteModifiedFile:
#print(*modifiedFileLines, sep="\n")
print("modifiedFileLines has %d lines before align center cleanup" % len(modifiedFileLines))
# First, go through the file and pull out the lines where there is
# now an image file marker followed by an align center directive
deleteLines = []
for lineNumber, line in enumerate(modifiedFileLines):
m = image_marker_pattern.match(line)
if m:
print("found image marker in line %d" % lineNumber)
n = align_pattern.match(modifiedFileLines[lineNumber+1])
if n:
print("found align center")
deleteLines.append(lineNumber)
deleteLines.append(lineNumber+1)
#When deleting, always work from the back of the list to the front
for lineNumber in reversed(deleteLines):
print(lineNumber)
del modifiedFileLines[lineNumber]
print("modifiedFileLines has %d lines after align center cleanup" % len(modifiedFileLines))
# Now we can actually write out the new contents
try:
if not os.path.exists(modifiedFileFolder):
os.makedirs(modifiedFileFolder)
nameParts = filename.split("/")
filenamePos = len(nameParts) - 1
modFilePath = "{0}{1}".format(modifiedFileFolder, nameParts[filenamePos])
modRst = open(modFilePath, "w")
for rstLine in modifiedFileLines:
modRst.write(rstLine)
modRst.close()
except OSError:
print('Error: Creating directory. ' + modifiedFileFolder)
return imageCount
def main():
fileCount = 0
totalImageCount = 0
parser = argparse.ArgumentParser(description='replace image markup in rst files with inline mathjax markup from .txt source of images')
parser.add_argument('files', metavar='file', nargs='+', help='one or more files to scan')
parsed_args = parser.parse_args()
# TODO: make originalRst folder and copy src/*.rst files into it
# Because we may decide to add files to the skip list between runs,
# if we have more than one file to process,
# remove the modified file folder so we don't end up with
# zombie modifications
if len(parsed_args.files) > 1:
for outputFile in os.listdir(modifiedFileFolder):
filePath = os.path.join(modifiedFileFolder, outputFile)
try:
if os.path.isfile(filePath):
os.unlink(filePath)
except Exception as e:
print(e)
sys.exit(1)
for filename in parsed_args.files:
doSkip = False
for skipName in skipFileList:
if filename.find(skipName) != -1:
print("skipping file: %s" % filename)
doSkip = True
runReport[filename] = ["skipped based on skipFileList"]
break
if not doSkip:
fileCount += 1
ic = processFile(filename)
totalImageCount += ic
print("============================================")
print("Processed %d rst files." % fileCount)
print("Found %d image lines." % totalImageCount)
for fileKey in runReport:
print("--------------------------------------------")
print("run report for %s:" % fileKey)
print(*runReport[fileKey], sep="\n")
print("============================================")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,64 @@
#! /usr/bin/env python3
# LAMMPS Documentation Utilities
#
# Scan for duplicate anchor labels in documentation files
#
# Copyright (C) 2017 Richard Berger
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import sys
import argparse
def main():
parser = argparse.ArgumentParser(description='scan for duplicate anchor labels in documentation files')
parser.add_argument('files', metavar='file', nargs='+', help='one or more files to scan')
parsed_args = parser.parse_args()
anchor_pattern = re.compile(r'^\.\. _(.*):$')
anchors = {}
for filename in parsed_args.files:
#print("filename: %s" % filename)
with open(filename, 'rt') as f:
for line_number, line in enumerate(f):
m = anchor_pattern.match(line)
if m:
label = m.group(1)
#print("found label: %s" % label)
if label in anchors:
anchors[label].append((filename, line_number+1))
else:
anchors[label] = [(filename, line_number+1)]
print("found %d anchor labels" % len(anchors))
count = 0
for label in sorted(anchors.keys()):
if len(anchors[label]) > 1:
print(label)
count += 1
for filename, line_number in anchors[label]:
print(" - %s:%d" % (filename, line_number))
if count > 0:
print("Found %d anchor label errors." % count)
sys.exit(1)
else:
print("No anchor label errors.")
if __name__ == "__main__":
main()

View File

@ -13,6 +13,6 @@ setup(name='LAMMPS Documentation Utilities',
entry_points = {
"console_scripts": ['txt2html = lammpsdoc.txt2html:main',
'txt2rst = lammpsdoc.txt2rst:main',
'doc_anchor_check = lammpsdoc.doc_anchor_check:main ']
'rst_anchor_check = lammpsdoc.rst_anchor_check:main ']
},
)