From 7a59fa9cde886123eb0f814fd5cb6f96b5797842 Mon Sep 17 00:00:00 2001
From: Arnau Casau <47946624+arnaucasau@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:39:36 +0100
Subject: [PATCH] Add Image checker for images' alt text and HTML tags (#2349)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This pull request adds an image checker to ensure all images have an alt
text defined, which is crucial for accessibility, and that we don't have
any `
` HTML tag. The output of the check shows the file name that
contains invalid images and the images' names. Ex:
```
Error in file 'docs/guides/custom-transpiler-pass.ipynb':
- The image '/images/guides/custom-transpiler-pass/DAG.png' does not have alt text.
Invalid images found š See https://github.com/Qiskit/documentation#images for instructions.
```
The PR builds on the work done by @shraddha-aangiras on
https://github.com/Qiskit/documentation/pull/1800. I have made some
changes to simplify the code and incorporated @eric-arellano's feedback.
Thank you both for the work you've done!
Closes https://github.com/Qiskit/documentation/issues/1651
---------
Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com>
Co-authored-by: Shraddha Aangiras <63237790+shraddha-aangiras@users.noreply.github.com>
---
.github/workflows/main.yml | 2 +
README.md | 14 +++++++
docs/guides/custom-transpiler-pass.ipynb | 4 +-
package.json | 3 +-
scripts/js/commands/checkImages.ts | 42 +++++++++++++++++++++
scripts/js/lib/markdownImages.test.ts | 46 +++++++++++++++++++++++
scripts/js/lib/markdownImages.ts | 48 ++++++++++++++++++++++++
7 files changed, 156 insertions(+), 3 deletions(-)
create mode 100644 scripts/js/commands/checkImages.ts
create mode 100644 scripts/js/lib/markdownImages.test.ts
create mode 100644 scripts/js/lib/markdownImages.ts
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ed9fc9b2c4..7bdd0d52ac 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -28,6 +28,8 @@ jobs:
- name: File metadata
run: npm run check:metadata
+ - name: Check images
+ run: npm run check:images
- name: Spellcheck
run: npm run check:spelling
- name: Check Qiskit bot config
diff --git a/README.md b/README.md
index c5072a316b..e5b5bc4b10 100644
--- a/README.md
+++ b/README.md
@@ -289,6 +289,20 @@ npm run check:metadata -- --apis
npm run check
```
+## Check images
+
+Every image needs to have alt text for accessibility and must use markdown syntax. To avoid changing the styling of the images, the use of the `
` HTML tag is not allowed. The lint job in CI will fail if images do not have alt text defined or if an `
` tag is found.
+
+You can check it locally by running:
+
+```bash
+# Only check images
+npm run check:images
+
+# Or, run all the checks
+npm run check
+```
+
## Spellcheck
We use [cSpell](https://cspell.org) to check for spelling. The `lint` job in CI will fail if there are spelling issues.
diff --git a/docs/guides/custom-transpiler-pass.ipynb b/docs/guides/custom-transpiler-pass.ipynb
index abfa353609..ed4fb3fb5e 100644
--- a/docs/guides/custom-transpiler-pass.ipynb
+++ b/docs/guides/custom-transpiler-pass.ipynb
@@ -72,7 +72,7 @@
" qc.draw(output='mpl')\n",
"\n",
"```\n",
- "data:image/s3,"s3://crabby-images/c42c8/c42c88faccb85180edc222e2b3057c22396df2f4" alt="The circuit's DAG consists of nodes that are connected by directional edges. It is a visual way to represent qubits or classical bits, the operations, and the way that data flows. "\n",
+ "data:image/s3,"s3://crabby-images/c5ea5/c5ea580ea0a09c46ffd79385a921e9b0aaaa53a9" alt="Circuit preparing a Bell state and applying an $R_Z$ rotation depending on the measurement outcome."\n",
"\n",
"Use the `qiskit.tools.visualization.dag_drawer()` function to view this circuit's DAG. There are three kinds of graph nodes: qubit/clbit nodes (green), operation nodes (blue), and output nodes (red). Each edge indicates data flow (or dependency) between two nodes.\n",
"\n",
@@ -83,7 +83,7 @@
"dag = circuit_to_dag(qc)\n",
"dag_drawer(dag)\n",
"```\n",
- "data:image/s3,"s3://crabby-images/06c07/06c079dfa70dc621728c5fe83ba91881ea8eacd5" alt=""\n",
+ "data:image/s3,"s3://crabby-images/bafad/bafad7175e7a7144f6b2eef0b180c1248a7ae807" alt="The circuit's DAG consists of nodes that are connected by directional edges. It is a visual way to represent qubits or classical bits, the operations, and the way that data flows."\n",
""
]
},
diff --git a/package.json b/package.json
index f67d649c9f..2e18ac8f50 100644
--- a/package.json
+++ b/package.json
@@ -15,7 +15,8 @@
"fmt": "prettier --write .",
"test": "playwright test",
"typecheck": "tsc",
- "check": "npm run check:qiskit-bot && npm run check:patterns-index && npm run check:metadata && npm run check:spelling && npm run check:internal-links && npm run check:orphan-pages && npm run check:fmt",
+ "check": "npm run check:qiskit-bot && npm run check:patterns-index && npm run check:images && npm run check:metadata && npm run check:spelling && npm run check:internal-links && npm run check:orphan-pages && npm run check:fmt",
+ "check:images": "tsx scripts/js/commands/checkImages.ts",
"check:metadata": "tsx scripts/js/commands/checkMetadata.ts",
"check:spelling": "tsx scripts/js/commands/checkSpelling.ts",
"check:fmt": "prettier --check .",
diff --git a/scripts/js/commands/checkImages.ts b/scripts/js/commands/checkImages.ts
new file mode 100644
index 0000000000..3b410600e4
--- /dev/null
+++ b/scripts/js/commands/checkImages.ts
@@ -0,0 +1,42 @@
+// This code is a Qiskit project.
+//
+// (C) Copyright IBM 2024.
+//
+// This code is licensed under the Apache License, Version 2.0. You may
+// obtain a copy of this license in the LICENSE file in the root directory
+// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+//
+// Any modifications or derivative works of this code must retain this
+// copyright notice, and modified files need to carry a notice indicating
+// that they have been altered from the originals.
+
+import { globby } from "globby";
+import { collectInvalidImageErrors } from "../lib/markdownImages.js";
+import { readMarkdown } from "../lib/markdownReader.js";
+
+async function main() {
+ const files = await globby(["docs/**/*.{ipynb,mdx}", "!docs/api/**/*.mdx"]);
+ const fileErrors: string[] = [];
+
+ for (const file of files) {
+ const markdown = await readMarkdown(file);
+ const imageErrors = await collectInvalidImageErrors(markdown);
+
+ if (imageErrors.size) {
+ fileErrors.push(
+ `Error in file '${file}':\n\t- ${[...imageErrors].join("\n\t- ")}`,
+ );
+ }
+ }
+
+ if (fileErrors.length) {
+ fileErrors.forEach((error) => console.log(error));
+ console.error(
+ "\nš Some images have problems. See https://github.com/Qiskit/documentation#images for instructions.\n",
+ );
+ process.exit(1);
+ }
+ console.log("ā
All images are valid.\n");
+}
+
+main().then(() => process.exit());
diff --git a/scripts/js/lib/markdownImages.test.ts b/scripts/js/lib/markdownImages.test.ts
new file mode 100644
index 0000000000..b885425805
--- /dev/null
+++ b/scripts/js/lib/markdownImages.test.ts
@@ -0,0 +1,46 @@
+// This code is a Qiskit project.
+//
+// (C) Copyright IBM 2023.
+//
+// This code is licensed under the Apache License, Version 2.0. You may
+// obtain a copy of this license in the LICENSE file in the root directory
+// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+//
+// Any modifications or derivative works of this code must retain this
+// copyright notice, and modified files need to carry a notice indicating
+// that they have been altered from the originals.
+
+import { expect, test } from "@playwright/test";
+
+import { collectInvalidImageErrors } from "./markdownImages.js";
+
+test("Test the finding of invalid images", async () => {
+ const markdown = `
+# A header
+
+data:image/s3,"s3://crabby-images/69d1c/69d1cdbeebe5e6ca9ecb7ab27f3fde9bb2910f8a" alt="Our first image with alt text"
+
+data:image/s3,"s3://crabby-images/e22bb/e22bb9ac54f8cef333fa6816581605a126cc9470" alt=""
+
+data:image/s3,"s3://crabby-images/f0723/f07235e5cf485105a6b60f377dd1a0a0220fc412" alt="Here's another valid image"
+
+data:image/s3,"s3://crabby-images/c91b1/c91b1ab5c3578bbe9b7cd592bfc54e1d4f230193" alt=""
+
+data:image/s3,"s3://crabby-images/c91b1/c91b1ab5c3578bbe9b7cd592bfc54e1d4f230193" alt=""
+
+
+
+
+
+data:image/s3,"s3://crabby-images/78f0e/78f0ece4699b61eabe26e8d111668afbb075c795" alt="And now, our last link"
+ `;
+ const images = await collectInvalidImageErrors(markdown);
+ const correct_images = new Set([
+ "The image '/images/HTMLexample1.jpg' uses an HTML
tag instead of markdown syntax.",
+ "The image '/images/HTMLexample2.jpg' uses an HTML
tag instead of markdown syntax.",
+ "The image '/images/invalid_img1.png' does not have alt text.",
+ "The image '/images/invalid_img2.png' does not have alt text.",
+ ]);
+
+ expect(images).toEqual(correct_images);
+});
diff --git a/scripts/js/lib/markdownImages.ts b/scripts/js/lib/markdownImages.ts
new file mode 100644
index 0000000000..590db9b167
--- /dev/null
+++ b/scripts/js/lib/markdownImages.ts
@@ -0,0 +1,48 @@
+// This code is a Qiskit project.
+//
+// (C) Copyright IBM 2024.
+//
+// This code is licensed under the Apache License, Version 2.0. You may
+// obtain a copy of this license in the LICENSE file in the root directory
+// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+//
+// Any modifications or derivative works of this code must retain this
+// copyright notice, and modified files need to carry a notice indicating
+// that they have been altered from the originals.
+
+import { load } from "cheerio";
+import { unified } from "unified";
+import { Root } from "remark-mdx";
+import { visit } from "unist-util-visit";
+import remarkParse from "remark-parse";
+import remarkGfm from "remark-gfm";
+import remarkStringify from "remark-stringify";
+
+export async function collectInvalidImageErrors(
+ markdown: string,
+): Promise> {
+ const imagesErrors = new Set();
+
+ await unified()
+ .use(remarkParse)
+ .use(remarkGfm)
+ .use(() => (tree: Root) => {
+ visit(tree, "image", (node) => {
+ if (!node.alt) {
+ imagesErrors.add(`The image '${node.url}' does not have alt text.`);
+ }
+ });
+ visit(tree, "html", (node) => {
+ const $ = load(node.value);
+ if ($("img").length) {
+ imagesErrors.add(
+ `The image '${$("img").attr("src")}' uses an HTML
tag instead of markdown syntax.`,
+ );
+ }
+ });
+ })
+ .use(remarkStringify)
+ .process(markdown);
+
+ return imagesErrors;
+}