From d26560950b6ba6454c11cd978d3e6bb4d38430e8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 9 May 2018 10:18:49 -0300 Subject: [PATCH] scripts/documentation-file-ref-check: rewrite it in perl with auto-fix mode The original shell script works, but: 1) it is too slow; 2) it is hard to exclude rejex patterns Convert it to perl. Here, the new version is able to check the entire tree in less than a second (after cached): real 0m0,284s user 0m0,668s sys 0m0,778s The old version takes more than a minute to complete (also after cached): real 1m17,905s user 0m25,583s sys 0m55,334s It also produce less false-positives (if any). The new script also contains an auto-fix mode. Usually, file references get lost when they're moved to some other place and/or renamed to .rst. Add an experimental mode to auto-fix those. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/documentation-file-ref-check | 123 ++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 11 deletions(-) diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check index bc1659900e89..2520bc14ffac 100755 --- a/scripts/documentation-file-ref-check +++ b/scripts/documentation-file-ref-check @@ -1,15 +1,116 @@ -#!/bin/sh +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# # Treewide grep for references to files under Documentation, and report # non-existing files in stderr. -for f in $(git ls-files); do - for ref in $(grep -ho "Documentation/[A-Za-z0-9_.,~/*+-]*" "$f"); do - # presume trailing . and , are not part of the name - ref=${ref%%[.,]} +use warnings; +use strict; +use Getopt::Long qw(:config no_auto_abbrev); - # use ls to handle wildcards - if ! ls $ref >/dev/null 2>&1; then - echo "$f: $ref" >&2 - fi - done -done +my $scriptname = $0; +$scriptname =~ s,.*/([^/]+/),$1,; + +# Parse arguments +my $help = 0; +my $fix = 0; + +GetOptions( + 'fix' => \$fix, + 'h|help|usage' => \$help, +); + +if ($help != 0) { + print "$scriptname [--help] [--fix-rst]\n"; + exit -1; +} + +# Step 1: find broken references +print "Finding broken references. This may take a while... " if ($fix); + +my %broken_ref; + +open IN, "git grep 'Documentation/'|" + or die "Failed to run git grep"; +while () { + next if (!m/^([^:]+):(.*)/); + + my $f = $1; + my $ln = $2; + + # Makefiles contain nasty expressions to parse docs + next if ($f =~ m/Makefile/); + # Skip this script + next if ($f eq $scriptname); + + if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*+-]*),) { + my $prefix = $1; + my $ref = $2; + my $base = $2; + + $ref =~ s/[\,\.]+$//; + + my $fulref = "$prefix$ref"; + + $fulref =~ s/^(\ 1) { + print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n"; + foreach my $j (@find) { + $j =~ s,^./,,; + print STDERR " $j\n"; + } + } else { + $f = $find[0]; + $f =~ s,^./,,; + print "INFO: Replacing $ref to $f\n"; + foreach my $j (qx(git grep -l $ref)) { + qx(sed "s\@$ref\@$f\@g" -i $j); + } + } +}