From 5b0859eb4cc7aafcc6bd85b2b758f3f7137c9c11 Mon Sep 17 00:00:00 2001 From: Arnau Casau <47946624+arnaucasau@users.noreply.github.com> Date: Thu, 29 Feb 2024 19:22:24 +0100 Subject: [PATCH] Refactor htmlToMd.ts (#934) Part of #845 This is the first refactor of `htmlToMd.ts`. It brings the following changes: - New function named `buildMathExpression` to build math expressions whether they are inlined or not - New function named `removeEmphasisSpaces` to remove the initial and trailing spaces in an emphasis node - The Unified plugin and the creation of the handlers for it were separated into two functions. The unified plugin was inlined into `generateMarkdownFile` --------- Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> --- scripts/lib/api/htmlToMd.ts | 152 ++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 75 deletions(-) diff --git a/scripts/lib/api/htmlToMd.ts b/scripts/lib/api/htmlToMd.ts index 927c780645..11015dd2f0 100644 --- a/scripts/lib/api/htmlToMd.ts +++ b/scripts/lib/api/htmlToMd.ts @@ -22,7 +22,7 @@ import remarkMath from "remark-math"; import remarkMdx from "remark-mdx"; import { MdxJsxFlowElement } from "mdast-util-mdx-jsx"; import { visit } from "unist-util-visit"; -import { Root } from "mdast"; +import { Emphasis, Root } from "mdast"; import { processHtml } from "./processHtml"; import { HtmlToMdResult } from "./HtmlToMdResult"; @@ -54,6 +54,39 @@ async function generateMarkdownFile( mainHtml: string, meta: Metadata, ): Promise { + const handlers = prepareHandlers(meta); + const mdFile = await unified() + .use(rehypeParse) + .use(remarkGfm) + .use(remarkMath) + .use(remarkMdx) + .use(rehypeRemark, { + handlers, + }) + .use(remarkStringify, remarkStringifyOptions) + .use(() => (root: Root) => { + // merge contiguous emphasis + visit(root, "emphasis", (node, index, parent) => { + if (index === null || parent === null) return; + let nextIndex = index + 1; + while (parent.children[nextIndex]?.type === "emphasis") { + node.children.push( + ...((parent.children[nextIndex] as any).children ?? []), + ); + nextIndex++; + } + parent.children.splice(index + 1, nextIndex - (index + 1)); + + removeEmphasisSpaces(node, index, parent, "initial"); + removeEmphasisSpaces(node, index, parent, "tail"); + }); + }) + .process(mainHtml); + + return mdFile.toString().replaceAll(``, ""); +} + +function prepareHandlers(meta: Metadata): Record { const handlers: Record = { br(h, node: any) { return all(h, node); @@ -66,13 +99,7 @@ async function generateMarkdownFile( }, span(h, node: any) { if (node.properties.className?.includes("math")) { - let value = node.children[0].value; - const prefix = "\\("; - const sufix = "\\)"; - if (value.startsWith(prefix) && value.endsWith(sufix)) { - value = value.substring(prefix.length, value.length - sufix.length); - } - return { type: "inlineMath", value }; + return buildMathExpression(node, "inlineMath"); } if (node.properties.id && node.properties.className?.includes("target")) { @@ -87,13 +114,7 @@ async function generateMarkdownFile( }, pre(h, node: any) { if (node.properties.className?.includes("math")) { - let value = node.children[0].value; - const prefix = "\\["; - const sufix = "\\]"; - if (value.startsWith(prefix) && value.endsWith(sufix)) { - value = value.substring(prefix.length, value.length - sufix.length); - } - return { type: "math", value }; + return buildMathExpression(node, "math"); } return defaultHandlers.pre(h, node); }, @@ -149,10 +170,7 @@ async function generateMarkdownFile( (child: any) => child.properties.className?.includes("versionmodified"), ); - let title = toText(titleNode).trim(); - if (title.endsWith(":")) { - title = title.slice(0, -1); - } + const title = toText(titleNode).trim().replace(/:$/, ""); const otherChildren = without(root.children, titleNode); return buildAdmonition({ title, @@ -172,64 +190,38 @@ async function generateMarkdownFile( }, }; - const mdFile = await unified() - .use(rehypeParse) - .use(remarkGfm) - .use(remarkMath) - .use(remarkMdx) - .use(rehypeRemark, { - handlers, - }) - .use(remarkStringify, remarkStringifyOptions) - .use(() => (root: Root) => { - // merge contiguous emphasis - visit(root, "emphasis", (node, index, parent) => { - if (index === null || parent === null) return; - let nextIndex = index + 1; - while (parent.children[nextIndex]?.type === "emphasis") { - node.children.push( - ...((parent.children[nextIndex] as any).children ?? []), - ); - nextIndex++; - } - parent.children.splice(index + 1, nextIndex - (index + 1)); + return handlers; +} - // remove initial and trailing spaces from emphasis - const firstChild = first(node.children); - if (firstChild?.type === "text") { - const match = firstChild.value.match(/^\s+/); - if (match) { - if (match[0] === firstChild.value) { - node.children = tail(node.children); - } else { - firstChild.value = removePrefix(firstChild.value, match[0]); - } - parent.children.splice(index, 0, { - type: "text", - value: match[0], - }); - } - } - const lastChild = last(node.children); - if (lastChild?.type === "text") { - const match = lastChild.value.match(/\s+$/); - if (match) { - if (match[0] === lastChild.value) { - node.children = initial(node.children); - } else { - lastChild.value = removeSuffix(lastChild.value, match[0]); - } - parent.children.splice(index + 1, 0, { - type: "text", - value: match[0], - }); - } - } +function removeEmphasisSpaces( + node: Emphasis, + index: number, + parent: any, + position: "initial" | "tail", +): void { + const child = + position == "initial" ? first(node.children) : last(node.children); + const reg = position == "initial" ? /^\s+/ : /\s+$/; + const idx = position == "initial" ? index : index + 1; + + if (child?.type === "text") { + const match = child.value.match(reg); + if (match) { + if (match[0] === child.value) { + node.children = + position == "initial" ? tail(node.children) : initial(node.children); + } else { + child.value = + position == "initial" + ? removePrefix(child.value, match[0]) + : removeSuffix(child.value, match[0]); + } + parent.children.splice(idx, 0, { + type: "text", + value: match[0], }); - }) - .process(mainHtml); - - return mdFile.toString().replaceAll(``, ""); + } + } } function buildAdmonition(options: { @@ -271,3 +263,13 @@ function buildSpanId(id: string): MdxJsxFlowElement { children: [], }; } + +function buildMathExpression(node: any, type: "math" | "inlineMath"): any { + let value = node.children[0].value; + const prefix = type == "math" ? "\\[" : "\\("; + const sufix = type == "math" ? "\\]" : "\\)"; + if (value.startsWith(prefix) && value.endsWith(sufix)) { + value = value.substring(prefix.length, value.length - sufix.length); + } + return { type: type, value }; +}