Split internal and external link checker into separate programs (#916)
Closes https://github.com/Qiskit/documentation/issues/876. This split brings several improvements: 1. Simpler internal link checker code. 2. The external link checker now deduplicates links across all files, which avoids repeating requests. 3. The external link checker logs its progress every 20 links. 4. It's possible to check external links without checking internal links. We run the external link checker in our weekly cron job over all files, other than translations and historical Qiskit versions. That change was added in https://github.com/Qiskit/documentation/pull/913.
This commit is contained in:
parent
a2edf63200
commit
b91049d822
|
@ -19,7 +19,7 @@ on:
|
|||
- "docs/api/qiskit-ibm-provider/**/*"
|
||||
- "docs/api/qiskit-ibm-runtime/**/*"
|
||||
- "public/api/**/*"
|
||||
- "scripts/checkLinks.ts"
|
||||
- "scripts/checkInternalLinks.ts"
|
||||
- "scripts/lib/links/ignores.ts"
|
||||
|
||||
jobs:
|
||||
|
@ -36,7 +36,7 @@ jobs:
|
|||
run: npm ci
|
||||
- name: Check internal links
|
||||
run: >
|
||||
npm run check:links --
|
||||
npm run check:internal-links --
|
||||
--qiskit-release-notes
|
||||
--current-apis
|
||||
--dev-apis
|
||||
|
|
|
@ -31,7 +31,7 @@ jobs:
|
|||
- name: Spellcheck
|
||||
run: npm run check:spelling
|
||||
- name: Internal link checker
|
||||
run: npm run check:links
|
||||
run: npm run check:internal-links
|
||||
- name: Formatting
|
||||
run: npm run check:fmt
|
||||
- name: Typecheck
|
||||
|
@ -54,4 +54,4 @@ jobs:
|
|||
if: steps.changed-docs-files.outputs.any_changed == 'true'
|
||||
run: |
|
||||
echo "${{ steps.changed-docs-files.outputs.all_changed_files }}" > changed.txt
|
||||
npm run check-pages-render -- --from-file changed.txt
|
||||
npm run check:pages-render -- --from-file changed.txt
|
||||
|
|
|
@ -34,7 +34,7 @@ jobs:
|
|||
sleep 20
|
||||
- name: Check all pages render
|
||||
run: >
|
||||
npm run check-pages-render --
|
||||
npm run check:pages-render --
|
||||
--non-api
|
||||
--qiskit-release-notes
|
||||
--current-apis
|
||||
|
@ -54,10 +54,6 @@ jobs:
|
|||
run: npm ci
|
||||
- name: Check external links
|
||||
run: >
|
||||
npm run check:links --
|
||||
--qiskit-release-notes
|
||||
--current-apis
|
||||
--dev-apis
|
||||
--historical-apis
|
||||
--skip-broken-historical
|
||||
--external
|
||||
npm run check:external-links --
|
||||
'docs/**/*.{md,mdx,ipynb}'
|
||||
'!docs/api/qiskit/[0-9]*/*'
|
||||
|
|
32
README.md
32
README.md
|
@ -178,27 +178,35 @@ about it.
|
|||
|
||||
## Check for broken links
|
||||
|
||||
CI will check for broken links. You can also check locally:
|
||||
We have two broken link checkers: for internal links and for external links.
|
||||
|
||||
To check internal links:
|
||||
|
||||
```bash
|
||||
# Only check for internal broken links
|
||||
npm run check:links
|
||||
# Only check non-API docs
|
||||
npm run check:internal-links
|
||||
|
||||
# Enable the validation of external links
|
||||
npm run check:links -- --external
|
||||
|
||||
# By default, only the non-API docs are checked. You can add any of the
|
||||
# below arguments to also check API docs.
|
||||
npm run check:links -- --current-apis --dev-apis --historical-apis --qiskit-release-notes
|
||||
# You can add any of the below arguments to also check API docs.
|
||||
npm run check:internal-links -- --current-apis --dev-apis --historical-apis --qiskit-release-notes
|
||||
|
||||
# However, `--historical-apis` currently has failing versions, so you may
|
||||
# want to add `--skip-broken-historical`.
|
||||
npm run check:links -- --historical-apis --skip-broken-historical
|
||||
npm run check:internal-links -- --historical-apis --skip-broken-historical
|
||||
|
||||
# Or, run all the checks. Although this only checks non-API docs.
|
||||
npm run check
|
||||
```
|
||||
|
||||
To check external links:
|
||||
|
||||
```bash
|
||||
# Specify the files you want after `--`
|
||||
npm run check:external-links -- docs/run/index.md docs/run/circuit-execution.mdx
|
||||
|
||||
# You can also use globs
|
||||
npm run check:external-links -- 'docs/run/*' '!docs/run/index.mdx'
|
||||
```
|
||||
|
||||
## Check file metadata
|
||||
|
||||
Every file needs to have a `title` and `description`. The `lint` job in CI will fail with instructions for any bad file.
|
||||
|
@ -260,9 +268,9 @@ It's possible to write broken pages that crash when loaded. This is usually due
|
|||
To check that all the non-API docs render:
|
||||
|
||||
1. Start up the local preview with `./start` by following the instructions at [Preview the docs locally](#preview-the-docs-locally)
|
||||
2. In a new tab, `npm run check-pages-render -- --non-api`
|
||||
2. In a new tab, `npm run check:pages-render -- --non-api`
|
||||
|
||||
You can also check that API docs and translations render by using any of these arguments: `npm run check-pages-render -- --non-api --qiskit-release-notes --current-apis --dev-apis --historical-apis --translations`. Warning that this is exponentially slower.
|
||||
You can also check that API docs and translations render by using any of these arguments: `npm run check:pages-render -- --non-api --qiskit-release-notes --current-apis --dev-apis --historical-apis --translations`. Warning that this is exponentially slower.
|
||||
|
||||
CI will check on every PR that any changed files render correctly. We also run a weekly cron job to check that every page renders correctly.
|
||||
|
||||
|
|
|
@ -5,12 +5,13 @@
|
|||
"author": "Qiskit Development Team",
|
||||
"license": "Apache-2.0",
|
||||
"scripts": {
|
||||
"check": "npm run check:metadata && npm run check:spelling && npm run check:links && npm run check:fmt",
|
||||
"check": "npm run check:metadata && npm run check:spelling && npm run check:internal-links && npm run check:fmt",
|
||||
"check:metadata": "node -r esbuild-register scripts/commands/checkMetadata.ts",
|
||||
"check:links": "node -r esbuild-register scripts/commands/checkLinks.ts",
|
||||
"check:spelling": "cspell --relative --no-progress docs/**/*.md* docs/api/**/*.md* --config cspell/cSpell.json",
|
||||
"check:fmt": "prettier --check .",
|
||||
"check-pages-render": "node -r esbuild-register scripts/commands/checkPagesRender.ts",
|
||||
"check:internal-links": "node -r esbuild-register scripts/commands/checkInternalLinks.ts",
|
||||
"check:external-links": "node -r esbuild-register scripts/commands/checkExternalLinks.ts",
|
||||
"check:pages-render": "node -r esbuild-register scripts/commands/checkPagesRender.ts",
|
||||
"fmt": "prettier --write .",
|
||||
"test": "jest",
|
||||
"typecheck": "tsc",
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
// This code is a Qiskit project.
|
||||
//
|
||||
// (C) Copyright IBM 2024.
|
||||
//
|
||||
// This code is licensed under the Apache License, Version 2.0. You may
|
||||
// obtain a copy of this license in the LICENSE file in the root directory
|
||||
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
//
|
||||
// Any modifications or derivative works of this code must retain this
|
||||
// copyright notice, and modified files need to carry a notice indicating
|
||||
// that they have been altered from the originals.
|
||||
|
||||
import { globby } from "globby";
|
||||
import yargs from "yargs/yargs";
|
||||
import { hideBin } from "yargs/helpers";
|
||||
|
||||
import { ExternalLink } from "../lib/links/ExternalLink";
|
||||
import { parseFile } from "../lib/links/extractLinks";
|
||||
import { addLinksToMap } from "../lib/links/FileBatch";
|
||||
|
||||
interface Arguments {
|
||||
[x: string]: unknown;
|
||||
globs?: string[];
|
||||
}
|
||||
|
||||
const readArgs = (): Arguments => {
|
||||
return yargs(hideBin(process.argv))
|
||||
.version(false)
|
||||
.command("$0 [globs..]", "")
|
||||
.parseSync();
|
||||
};
|
||||
|
||||
async function main() {
|
||||
const args = readArgs();
|
||||
const filePaths = await globby(args.globs || []);
|
||||
|
||||
const links = await getLinks(filePaths);
|
||||
|
||||
// We use a for loop to reduce the risk of rate limiting.
|
||||
let allGood = true;
|
||||
let numLinksChecked = 1;
|
||||
for (const link of links) {
|
||||
const result = await link.check();
|
||||
|
||||
// This script can be slow, so log progress every 20 links.
|
||||
if (numLinksChecked % 20 == 0) {
|
||||
console.log(`Checked ${numLinksChecked} / ${links.length} links`);
|
||||
}
|
||||
numLinksChecked++;
|
||||
|
||||
if (result === undefined) continue;
|
||||
console.error(result);
|
||||
allGood = false;
|
||||
}
|
||||
|
||||
if (!allGood) {
|
||||
console.error(
|
||||
`\n\n💔 Some external links appear broken, although it's possible they are flakes. Checked ${links.length} links.`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(
|
||||
`\n\n✅ No external links are broken. Checked ${links.length} links.`,
|
||||
);
|
||||
}
|
||||
|
||||
async function getLinks(filePaths: string[]): Promise<ExternalLink[]> {
|
||||
const linksToOriginFiles = new Map<string, string[]>();
|
||||
for (const filePath of filePaths) {
|
||||
const parsed = await parseFile(filePath);
|
||||
addLinksToMap(filePath, parsed.externalLinks, linksToOriginFiles);
|
||||
}
|
||||
return Array.from(linksToOriginFiles.entries()).map(
|
||||
([link, originFiles]) => new ExternalLink(link, originFiles),
|
||||
);
|
||||
}
|
||||
|
||||
main().then(() => process.exit());
|
|
@ -29,7 +29,6 @@ const SYNTHETIC_FILES: string[] = [
|
|||
|
||||
interface Arguments {
|
||||
[x: string]: unknown;
|
||||
external: boolean;
|
||||
currentApis: boolean;
|
||||
devApis: boolean;
|
||||
historicalApis: boolean;
|
||||
|
@ -40,12 +39,6 @@ interface Arguments {
|
|||
const readArgs = (): Arguments => {
|
||||
return yargs(hideBin(process.argv))
|
||||
.version(false)
|
||||
.option("external", {
|
||||
type: "boolean",
|
||||
default: false,
|
||||
description:
|
||||
"Should external links be checked? This slows down the script, but is useful to check.",
|
||||
})
|
||||
.option("current-apis", {
|
||||
type: "boolean",
|
||||
default: false,
|
||||
|
@ -91,7 +84,7 @@ async function main() {
|
|||
|
||||
let allGood = true;
|
||||
for (const fileBatch of fileBatches) {
|
||||
const allValidLinks = await fileBatch.check(args.external, otherFiles);
|
||||
const allValidLinks = await fileBatch.checkInternalLinks(otherFiles);
|
||||
if (!allValidLinks) {
|
||||
allGood = false;
|
||||
}
|
|
@ -13,7 +13,6 @@
|
|||
import { globby } from "globby";
|
||||
|
||||
import { InternalLink, File } from "./InternalLink";
|
||||
import { ExternalLink } from "./ExternalLink";
|
||||
import {
|
||||
ALWAYS_IGNORED_URLS,
|
||||
FILES_TO_IGNORES,
|
||||
|
@ -59,75 +58,50 @@ export class FileBatch {
|
|||
/**
|
||||
* Load and process the file batch.
|
||||
*
|
||||
* Returns a triplet:
|
||||
* Returns a pair:
|
||||
* 1. A list of `File` objects with their anchors. These represent
|
||||
* the universe of valid internal links for this batch, other
|
||||
* than any additional we may add at check-time, e.g. images.
|
||||
* 2. A list of InternalLink objects to validate.
|
||||
* 3. A list of ExternalLink objects to validate.
|
||||
*/
|
||||
async load(
|
||||
loadExternalLinks: boolean,
|
||||
): Promise<[File[], InternalLink[], ExternalLink[]]> {
|
||||
async load(): Promise<[File[], InternalLink[]]> {
|
||||
const files: File[] = [];
|
||||
for (let filePath of this.toLoad) {
|
||||
const parsed = await parseFile(filePath);
|
||||
files.push(new File(filePath, parsed.anchors));
|
||||
}
|
||||
|
||||
const internalLinksToOriginFiles = new Map<string, string[]>();
|
||||
const externalLinksToOriginFiles = new Map<string, string[]>();
|
||||
const linksToOriginFiles = new Map<string, string[]>();
|
||||
for (const filePath of this.toCheck) {
|
||||
const parsed = await parseFile(filePath);
|
||||
files.push(new File(filePath, parsed.anchors));
|
||||
addLinksToMap(filePath, parsed.internalLinks, internalLinksToOriginFiles);
|
||||
if (loadExternalLinks) {
|
||||
addLinksToMap(
|
||||
filePath,
|
||||
parsed.externalLinks,
|
||||
externalLinksToOriginFiles,
|
||||
);
|
||||
}
|
||||
addLinksToMap(filePath, parsed.internalLinks, linksToOriginFiles);
|
||||
}
|
||||
|
||||
const internalLinks = Array.from(internalLinksToOriginFiles.entries()).map(
|
||||
const links = Array.from(linksToOriginFiles.entries()).map(
|
||||
([link, originFiles]) => new InternalLink(link, originFiles),
|
||||
);
|
||||
const externalLinks = Array.from(externalLinksToOriginFiles.entries()).map(
|
||||
([link, originFiles]) => new ExternalLink(link, originFiles),
|
||||
);
|
||||
return [files, internalLinks, externalLinks];
|
||||
return [files, links];
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that all links in this file batch are valid.
|
||||
* Check that all internal links in this file batch are valid.
|
||||
*
|
||||
* Logs the results to the console and returns `true` if there were no issues.
|
||||
*/
|
||||
async check(
|
||||
checkExternalLinks: boolean,
|
||||
otherFiles: File[],
|
||||
): Promise<boolean> {
|
||||
console.log(`\n\nChecking links for ${this.description}`);
|
||||
async checkInternalLinks(otherFiles: File[]): Promise<boolean> {
|
||||
console.log(`\n\nChecking internal links for ${this.description}`);
|
||||
|
||||
const [docsFiles, internalLinkList, externalLinkList] =
|
||||
await this.load(checkExternalLinks);
|
||||
const [docsFiles, links] = await this.load();
|
||||
const existingFiles = docsFiles.concat(otherFiles);
|
||||
|
||||
const results = internalLinkList.map((link) => link.check(existingFiles));
|
||||
|
||||
// For loop reduces the risk of rate-limiting.
|
||||
for (let link of externalLinkList) {
|
||||
results.push(await link.check());
|
||||
}
|
||||
|
||||
let allGood = true;
|
||||
results
|
||||
.filter((res) => res !== undefined)
|
||||
.forEach((linkError) => {
|
||||
console.error(linkError);
|
||||
allGood = false;
|
||||
});
|
||||
links.forEach((link) => {
|
||||
const result = link.check(existingFiles);
|
||||
if (result === undefined) return;
|
||||
console.error(result);
|
||||
allGood = false;
|
||||
});
|
||||
return allGood;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue