Add Image checker for images' alt text and HTML tags (#2349)

This pull request adds an image checker to ensure all images have an alt
text defined, which is crucial for accessibility, and that we don't have
any `<img>` HTML tag. The output of the check shows the file name that
contains invalid images and the images' names. Ex:
```
Error in file 'docs/guides/custom-transpiler-pass.ipynb':
	- The image '/images/guides/custom-transpiler-pass/DAG.png' does not have alt text.

Invalid images found 💔 See https://github.com/Qiskit/documentation#images for instructions.
```
The PR builds on the work done by @shraddha-aangiras on
https://github.com/Qiskit/documentation/pull/1800. I have made some
changes to simplify the code and incorporated @eric-arellano's feedback.
Thank you both for the work you've done!

Closes https://github.com/Qiskit/documentation/issues/1651

---------

Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com>
Co-authored-by: Shraddha Aangiras <63237790+shraddha-aangiras@users.noreply.github.com>
This commit is contained in:
Arnau Casau 2024-11-21 16:39:36 +01:00 committed by GitHub
parent 3eadfe10b7
commit 7a59fa9cde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 156 additions and 3 deletions

View File

@ -28,6 +28,8 @@ jobs:
- name: File metadata
run: npm run check:metadata
- name: Check images
run: npm run check:images
- name: Spellcheck
run: npm run check:spelling
- name: Check Qiskit bot config

View File

@ -289,6 +289,20 @@ npm run check:metadata -- --apis
npm run check
```
## Check images
Every image needs to have alt text for accessibility and must use markdown syntax. To avoid changing the styling of the images, the use of the `<img>` HTML tag is not allowed. The lint job in CI will fail if images do not have alt text defined or if an `<img>` tag is found.
You can check it locally by running:
```bash
# Only check images
npm run check:images
# Or, run all the checks
npm run check
```
## Spellcheck
We use [cSpell](https://cspell.org) to check for spelling. The `lint` job in CI will fail if there are spelling issues.

View File

@ -72,7 +72,7 @@
" qc.draw(output='mpl')\n",
"\n",
"```\n",
"![The circuit's DAG consists of nodes that are connected by directional edges. It is a visual way to represent qubits or classical bits, the operations, and the way that data flows. ](/images/guides/custom-transpiler-pass/DAG_circ.png \"DAG\")\n",
"![Circuit preparing a Bell state and applying an $R_Z$ rotation depending on the measurement outcome.](/images/guides/custom-transpiler-pass/DAG_circ.png \"Circuit\")\n",
"\n",
"Use the `qiskit.tools.visualization.dag_drawer()` function to view this circuit's DAG. There are three kinds of graph nodes: qubit/clbit nodes (green), operation nodes (blue), and output nodes (red). Each edge indicates data flow (or dependency) between two nodes.\n",
"\n",
@ -83,7 +83,7 @@
"dag = circuit_to_dag(qc)\n",
"dag_drawer(dag)\n",
"```\n",
"![](/images/guides/custom-transpiler-pass/DAG.png)\n",
"![The circuit's DAG consists of nodes that are connected by directional edges. It is a visual way to represent qubits or classical bits, the operations, and the way that data flows.](/images/guides/custom-transpiler-pass/DAG.png \"DAG\")\n",
"</details>"
]
},

View File

@ -15,7 +15,8 @@
"fmt": "prettier --write .",
"test": "playwright test",
"typecheck": "tsc",
"check": "npm run check:qiskit-bot && npm run check:patterns-index && npm run check:metadata && npm run check:spelling && npm run check:internal-links && npm run check:orphan-pages && npm run check:fmt",
"check": "npm run check:qiskit-bot && npm run check:patterns-index && npm run check:images && npm run check:metadata && npm run check:spelling && npm run check:internal-links && npm run check:orphan-pages && npm run check:fmt",
"check:images": "tsx scripts/js/commands/checkImages.ts",
"check:metadata": "tsx scripts/js/commands/checkMetadata.ts",
"check:spelling": "tsx scripts/js/commands/checkSpelling.ts",
"check:fmt": "prettier --check .",

View File

@ -0,0 +1,42 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
import { globby } from "globby";
import { collectInvalidImageErrors } from "../lib/markdownImages.js";
import { readMarkdown } from "../lib/markdownReader.js";
async function main() {
const files = await globby(["docs/**/*.{ipynb,mdx}", "!docs/api/**/*.mdx"]);
const fileErrors: string[] = [];
for (const file of files) {
const markdown = await readMarkdown(file);
const imageErrors = await collectInvalidImageErrors(markdown);
if (imageErrors.size) {
fileErrors.push(
`Error in file '${file}':\n\t- ${[...imageErrors].join("\n\t- ")}`,
);
}
}
if (fileErrors.length) {
fileErrors.forEach((error) => console.log(error));
console.error(
"\n💔 Some images have problems. See https://github.com/Qiskit/documentation#images for instructions.\n",
);
process.exit(1);
}
console.log("✅ All images are valid.\n");
}
main().then(() => process.exit());

View File

@ -0,0 +1,46 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2023.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
import { expect, test } from "@playwright/test";
import { collectInvalidImageErrors } from "./markdownImages.js";
test("Test the finding of invalid images", async () => {
const markdown = `
# A header
![Our first image with alt text](/images/img1.png)
![](/images/invalid_img1.png)
![Here's another valid image](/images/img2.png)
![](/images/invalid_img2.png)
![](/images/invalid_img2.png)
<img src="/images/HTMLexample1.jpg" alt="" width="200"/>
<img src="/images/HTMLexample2.jpg" alt="Example" width="200"/>
![And now, our last link](https://ibm.com)
`;
const images = await collectInvalidImageErrors(markdown);
const correct_images = new Set([
"The image '/images/HTMLexample1.jpg' uses an HTML <img> tag instead of markdown syntax.",
"The image '/images/HTMLexample2.jpg' uses an HTML <img> tag instead of markdown syntax.",
"The image '/images/invalid_img1.png' does not have alt text.",
"The image '/images/invalid_img2.png' does not have alt text.",
]);
expect(images).toEqual(correct_images);
});

View File

@ -0,0 +1,48 @@
// This code is a Qiskit project.
//
// (C) Copyright IBM 2024.
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.
import { load } from "cheerio";
import { unified } from "unified";
import { Root } from "remark-mdx";
import { visit } from "unist-util-visit";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import remarkStringify from "remark-stringify";
export async function collectInvalidImageErrors(
markdown: string,
): Promise<Set<string>> {
const imagesErrors = new Set<string>();
await unified()
.use(remarkParse)
.use(remarkGfm)
.use(() => (tree: Root) => {
visit(tree, "image", (node) => {
if (!node.alt) {
imagesErrors.add(`The image '${node.url}' does not have alt text.`);
}
});
visit(tree, "html", (node) => {
const $ = load(node.value);
if ($("img").length) {
imagesErrors.add(
`The image '${$("img").attr("src")}' uses an HTML <img> tag instead of markdown syntax.`,
);
}
});
})
.use(remarkStringify)
.process(markdown);
return imagesErrors;
}