refactor(parser): paragraph, table and fence
This commit is contained in:
parent
0e3561add3
commit
7df26af941
|
@ -1,6 +1,6 @@
|
|||
# @md-report/parser
|
||||
|
||||
Transfer plain markdown text to md-report data structure.
|
||||
Transfer plain markdown text to markdown tokens.
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
"prepublishOnly": "nr build"
|
||||
},
|
||||
"dependencies": {
|
||||
"docx": "^7.3.0",
|
||||
"js-yaml": "^4.1.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
// MarkdownIt token types.
|
||||
export const HEADING_OPEN = 'heading_open'
|
||||
export const PARAGRAPH_OPEN = 'paragraph_open'
|
||||
|
||||
// Style
|
||||
export const KAI_TI_FIRA_CODE_FONTS = {
|
||||
ascii: 'Fira Code',
|
||||
eastAsia: 'KaiTi',
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
import YAML = require('js-yaml')
|
||||
import { isObject } from '@antfu/utils'
|
||||
import MarkdownIt = require('markdown-it')
|
||||
import type { ReportConfig, ReportMarkdown, ReportMarkdownParagraph, ReportMarkdownParagraphChild, ReportMarkdownSection } from '@md-report/types'
|
||||
import Token = require('markdown-it/lib/token')
|
||||
import type { IRunOptions } from 'docx'
|
||||
import { HEADING_OPEN, PARAGRAPH_OPEN } from './constants'
|
||||
import { getParagraphChildConfig, getParagraphChildType } from './utils'
|
||||
|
||||
const md = MarkdownIt({ html: true })
|
||||
|
||||
export function matter(code: string): { data: ReportConfig; content: string } {
|
||||
let data: any = {}
|
||||
const content = code.replace(/^---.*\r?\n([\s\S]*?)---/,
|
||||
(_, d) => {
|
||||
data = YAML.load(d)
|
||||
if (!isObject(data))
|
||||
data = {}
|
||||
return ''
|
||||
})
|
||||
return { data, content }
|
||||
}
|
||||
|
||||
export function parseParagraphChild(tokens: Token[]): ReportMarkdownParagraphChild {
|
||||
// Get rid of closing tags.
|
||||
let i = 0
|
||||
while (tokens[i].type.includes('close') || tokens[i].content.match(/\<\/[^]*?\>/))
|
||||
i++
|
||||
|
||||
const type = getParagraphChildType(tokens[i])
|
||||
const config: IRunOptions = getParagraphChildConfig(tokens.slice(i))
|
||||
|
||||
return {
|
||||
type,
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
export function parseParagraph(tokens: Token[]): ReportMarkdownParagraph {
|
||||
const type = tokens[0].type === HEADING_OPEN ? 'heading' : 'paragraph'
|
||||
const level = tokens[0].markup.length
|
||||
const _content = tokens[1].children
|
||||
const children: ReportMarkdownParagraphChild[] = []
|
||||
|
||||
let start = 0
|
||||
for (let i = 0; i < _content.length; i++) {
|
||||
const _token = _content[i]
|
||||
if (_token.type === 'code_inline' || _token.type === 'text') {
|
||||
children.push(parseParagraphChild(_content.slice(start, i + 1)))
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type,
|
||||
level,
|
||||
children,
|
||||
}
|
||||
}
|
||||
|
||||
export function parseSection(tokens: Token[]): ReportMarkdownSection {
|
||||
const children: ReportMarkdownParagraph[] = []
|
||||
|
||||
let start = 0
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
const token = tokens[i]
|
||||
if ((token.type === HEADING_OPEN || token.type === PARAGRAPH_OPEN) && i > start) {
|
||||
children.push(parseParagraph(tokens.slice(start, i)))
|
||||
start = i
|
||||
}
|
||||
}
|
||||
children.push(parseParagraph(tokens.slice(start)))
|
||||
|
||||
return {
|
||||
type: 'section',
|
||||
children,
|
||||
}
|
||||
}
|
||||
|
||||
export function parseContent(tokens: Token[]): ReportMarkdownSection[] {
|
||||
const sections: ReportMarkdownSection[] = []
|
||||
|
||||
let start = 0
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
const token = tokens[i]
|
||||
// If heading 1.
|
||||
if (token.type === HEADING_OPEN && token.markup.length === 1 && i > start) {
|
||||
sections.push(parseSection(tokens.slice(start, i)))
|
||||
start = i
|
||||
}
|
||||
}
|
||||
sections.push(parseSection(tokens.slice(start)))
|
||||
|
||||
return sections
|
||||
}
|
||||
|
||||
export function parse(markdown: string): ReportMarkdown {
|
||||
const { data: frontmatter, content: rawContent } = matter(markdown)
|
||||
const contentTokens = md.parse(rawContent, {})
|
||||
|
||||
const content = parseContent(contentTokens)
|
||||
|
||||
return {
|
||||
raw: markdown,
|
||||
frontmatter,
|
||||
content,
|
||||
}
|
||||
}
|
||||
|
||||
const src = `# 111
|
||||
|
||||
this is a paragraph with **\`strong\`**, *italic*, \`inline code\`, [hyperlink](https://syy11.cn), ~~delete~~, ==highlight==, $1 + 1 = 2$, a<sup>sup</sup><sub>sub</sub>, ![image](https://image.cn), footnotes[^foot][^note]
|
||||
|
||||
\`\`\`javascript
|
||||
const a = 0
|
||||
\`\`\`
|
||||
|
||||
$$
|
||||
1+2=3
|
||||
$$
|
||||
|
||||
# Refs
|
||||
|
||||
[^foot]: ref1
|
||||
[^note]: ref2`
|
||||
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(parse(src).content[0].children[1].children)
|
|
@ -1,8 +0,0 @@
|
|||
import { promises as fs } from 'fs'
|
||||
import type { ReportMarkdown } from '@md-report/types'
|
||||
import { parse } from './core'
|
||||
|
||||
export async function load(filepath: string, content?: string): Promise<ReportMarkdown> {
|
||||
const markdown = content ?? await fs.readFile(filepath, 'utf8')
|
||||
return parse(markdown)
|
||||
}
|
|
@ -1,2 +1,49 @@
|
|||
export * from './core'
|
||||
export * from './fs'
|
||||
import type Token from 'markdown-it/lib/token'
|
||||
import MarkdownIt from 'markdown-it'
|
||||
import type { ISectionOptions, IStylesOptions, Paragraph, Table, TableOfContents } from 'docx'
|
||||
import { Document } from 'docx'
|
||||
import { sliceParagraph, sliceSection } from './utils'
|
||||
import { paragraphParser } from './paragraph'
|
||||
|
||||
const md = new MarkdownIt()
|
||||
|
||||
export function parse(props: { markdown: string; config: { meta: Record<string, any>; styles: IStylesOptions } }): Document {
|
||||
const { markdown, config } = props
|
||||
const { meta, styles } = config
|
||||
// Get frontmatter.
|
||||
// Get tokens.
|
||||
const tokens: Token[] = md.parse(markdown, meta)
|
||||
return parseDocument(tokens, styles)
|
||||
}
|
||||
|
||||
export function parseDocument(tokens: Token[], styles: IStylesOptions): Document {
|
||||
// Variables.
|
||||
let pos = 0
|
||||
const sections: ISectionOptions[] = []
|
||||
// Split and parse sections.
|
||||
while (pos < tokens.length) {
|
||||
const { tokens: section, offset: nextPos } = sliceSection(tokens.slice(pos))
|
||||
sections.push(parseSection(section))
|
||||
pos = nextPos
|
||||
}
|
||||
return new Document({
|
||||
styles,
|
||||
sections,
|
||||
})
|
||||
}
|
||||
|
||||
export function parseSection(tokens: Token[]): ISectionOptions {
|
||||
// Variables.
|
||||
let pos = 0
|
||||
const children: (Paragraph | Table | TableOfContents)[] = []
|
||||
// Split and parse paragraphs.
|
||||
while (pos < tokens.length) {
|
||||
const { tokens: paragraph, offset: nextPos } = sliceParagraph(tokens.slice(pos))
|
||||
const parser = paragraphParser[tokens[0].tag]
|
||||
children.push(parser(paragraph))
|
||||
pos = nextPos
|
||||
}
|
||||
return {
|
||||
children,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
import { readFileSync } from 'fs'
|
||||
import type { IImageOptions, IRunOptions, ParagraphChild } from 'docx'
|
||||
import { ImageRun, Paragraph, TextRun } from 'docx'
|
||||
import type Token from 'markdown-it/lib/token'
|
||||
import { sliceInlineText } from './utils'
|
||||
|
||||
export function parseInline(props: { tokens: Token[]; style?: string }): Paragraph {
|
||||
// Variables.
|
||||
const { tokens, style = 'normal' } = props
|
||||
const { children: childrenTokens } = tokens[0]
|
||||
const { length } = childrenTokens || []
|
||||
const children: ParagraphChild[] = []
|
||||
let pos = 0
|
||||
// Parse inline children.
|
||||
while (pos < length) {
|
||||
const { tokens: paragraphChild, offset: nextPos } = sliceInlineText(tokens.slice(pos))
|
||||
if (tokens[0].tag === 'img')
|
||||
children.push(parseImage(paragraphChild))
|
||||
else
|
||||
children.push(parseText(paragraphChild))
|
||||
pos = nextPos
|
||||
}
|
||||
return new Paragraph({
|
||||
style,
|
||||
children,
|
||||
})
|
||||
}
|
||||
|
||||
export function parseText(tokens: Token[]): TextRun {
|
||||
let options: IRunOptions = {}
|
||||
tokens.forEach((token) => {
|
||||
if (token.nesting >= 0) {
|
||||
// Only deal with opening and text/code tokens.
|
||||
switch (token.tag) {
|
||||
// Bold.
|
||||
case 'strong':
|
||||
options = { ...options, bold: true }
|
||||
break
|
||||
// Italics
|
||||
case 'em':
|
||||
options = { ...options, italics: true }
|
||||
break
|
||||
// Subscript.
|
||||
case 'sub':
|
||||
options = { ...options, subScript: true }
|
||||
break
|
||||
// Superscript.
|
||||
case 'sup':
|
||||
options = { ...options, superScript: true }
|
||||
break
|
||||
// Strikethrough.
|
||||
case 's':
|
||||
options = { ...options, strike: true }
|
||||
break
|
||||
// Highlight.
|
||||
case 'mark':
|
||||
// TODO: Replace highlight color with env data.
|
||||
options = { ...options, highlight: 'yellow' }
|
||||
break
|
||||
// Inline code.
|
||||
case 'code':
|
||||
// TODO: Replace code font with env data.
|
||||
options = { ...options, font: {}, text: token.content }
|
||||
break
|
||||
// Normal text.
|
||||
default:
|
||||
options = { ...options, text: token.content }
|
||||
}
|
||||
}
|
||||
})
|
||||
return new TextRun(options)
|
||||
}
|
||||
|
||||
export function parseImage(tokens: Token[]): ImageRun | TextRun {
|
||||
const { attrGet, content } = tokens[0]
|
||||
const src = attrGet('src')
|
||||
if (!src) {
|
||||
return new TextRun({
|
||||
text: `[MD Report]: Image ${content} is not found.`,
|
||||
bold: true,
|
||||
color: 'red',
|
||||
highlight: 'yellow',
|
||||
})
|
||||
}
|
||||
const options: IImageOptions = {
|
||||
data: readFileSync(src).toString('base64'),
|
||||
// TODO: Replace width and height with config in image url.
|
||||
transformation: {
|
||||
width: 100,
|
||||
height: 100,
|
||||
},
|
||||
}
|
||||
return new ImageRun(options)
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
import type Token from 'markdown-it/lib/token'
|
||||
import { Paragraph, Table, TableCell, TableRow } from 'docx'
|
||||
import { sliceTableRow } from './utils'
|
||||
import { parseInline } from './inline'
|
||||
|
||||
export function parseFence(tokens: Token[]): Paragraph {
|
||||
// Variables.
|
||||
const { content: text } = tokens[0]
|
||||
return new Paragraph({
|
||||
style: 'fence',
|
||||
text,
|
||||
})
|
||||
}
|
||||
|
||||
export function parseTable(tokens: Token[]): Table {
|
||||
// Variables
|
||||
let pos = 0
|
||||
const rows: TableRow[] = []
|
||||
while (pos < tokens.length) {
|
||||
const { tokens: tableRow, offset: nextPos } = sliceTableRow(tokens.slice(pos))
|
||||
rows.push(parseTableRow(tableRow))
|
||||
pos = nextPos
|
||||
}
|
||||
return new Table({
|
||||
style: 'table',
|
||||
rows,
|
||||
})
|
||||
}
|
||||
|
||||
export function parseTableRow(tokens: Token[]): TableRow {
|
||||
const cells: Token[] = tokens.filter(token => token.type === 'inline')
|
||||
const children: TableCell[] = cells.map(cell => new TableCell({
|
||||
children: [parseInline({
|
||||
tokens: [cell],
|
||||
style: 'table',
|
||||
})],
|
||||
}))
|
||||
return new TableRow({
|
||||
children,
|
||||
})
|
||||
}
|
||||
|
||||
export function parseParagraph(tokens: Token[]): Paragraph {
|
||||
const inline = tokens.filter(token => token.type === 'inline')
|
||||
return parseInline({
|
||||
tokens: inline,
|
||||
style: 'normal',
|
||||
})
|
||||
}
|
||||
|
||||
export function parseHeading(tokens: Token[]): Paragraph {
|
||||
// Inline token.
|
||||
const inline = tokens.filter(token => token.type === 'inline')
|
||||
// Heading level.
|
||||
const { length } = tokens[0].markup
|
||||
return parseInline({
|
||||
tokens: inline,
|
||||
style: `heading${length}`,
|
||||
})
|
||||
}
|
||||
|
||||
export const paragraphParser: Record<string, (tokens: Token[]) => (Paragraph|Table)> = {
|
||||
code: parseFence,
|
||||
table: parseTable,
|
||||
p: parseParagraph,
|
||||
h1: parseHeading,
|
||||
h2: parseHeading,
|
||||
h3: parseHeading,
|
||||
h4: parseHeading,
|
||||
h5: parseHeading,
|
||||
h6: parseHeading,
|
||||
}
|
|
@ -1,55 +1,53 @@
|
|||
import type { IRunOptions } from 'docx'
|
||||
import type { MarkdownItTokenType } from '@md-report/types'
|
||||
import Token = require('markdown-it/lib/token')
|
||||
import { KAI_TI_FIRA_CODE_FONTS } from './constants'
|
||||
import type Token from 'markdown-it/lib/token'
|
||||
|
||||
export function getParagraphChildType(token: Token): 'image' | 'text' {
|
||||
switch (token.type) {
|
||||
case 'image':
|
||||
return 'image'
|
||||
default:
|
||||
return 'text'
|
||||
export interface SliceResult {
|
||||
tokens: Token[]
|
||||
offset: number
|
||||
}
|
||||
|
||||
export function sliceSection(tokens: Token[]): SliceResult {
|
||||
let offset = 0
|
||||
if (tokens[0].tag === 'h1') {
|
||||
while (tokens[offset].nesting >= 0 || tokens[offset].tag !== 'h1')
|
||||
offset++
|
||||
}
|
||||
return {
|
||||
tokens: tokens.slice(0, offset + 1),
|
||||
offset: offset + 1,
|
||||
}
|
||||
}
|
||||
|
||||
export function getParagraphChildConfig(tokens: Token[]): IRunOptions {
|
||||
let config: IRunOptions = {}
|
||||
export function sliceParagraph(tokens: Token[]): SliceResult {
|
||||
let offset = 0
|
||||
// Code block.
|
||||
if (tokens[0].type !== 'fence') {
|
||||
// Normal paragraphs.
|
||||
while (tokens[offset].level > 0 || tokens[offset].nesting >= 0)
|
||||
offset++
|
||||
}
|
||||
// Return paragraph tokens.
|
||||
return {
|
||||
tokens: tokens.slice(0, offset + 1),
|
||||
offset: offset + 1,
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
const token = tokens[i]
|
||||
switch (token.type as MarkdownItTokenType) {
|
||||
case 'em_open': {
|
||||
config = { ...config, italics: true }
|
||||
break
|
||||
}
|
||||
case 'strong_open': {
|
||||
config = { ...config, bold: true }
|
||||
break
|
||||
}
|
||||
case 'mark_open': {
|
||||
config = { ...config, shading: { fill: '#bbbbbb' }, style: 'mark' }
|
||||
break
|
||||
}
|
||||
case 'html_inline': {
|
||||
if (token.content === '<sup>')
|
||||
config = { ...config, superScript: true }
|
||||
if (token.content === '<sub>')
|
||||
config = { ...config, subScript: true }
|
||||
break
|
||||
}
|
||||
case 's_open': {
|
||||
config = { ...config, strike: true }
|
||||
break
|
||||
}
|
||||
case 'code_inline': {
|
||||
config = { ...config, font: KAI_TI_FIRA_CODE_FONTS, style: 'code' }
|
||||
break
|
||||
}
|
||||
case 'text': {
|
||||
config = { ...config, text: token.content }
|
||||
}
|
||||
export function sliceTableRow(tokens: Token[]): SliceResult {
|
||||
let offset = 0
|
||||
while (tokens[offset].type !== 'tr_open')
|
||||
offset++
|
||||
return {
|
||||
tokens: tokens.slice(0, offset),
|
||||
offset,
|
||||
}
|
||||
}
|
||||
|
||||
export function sliceInlineText(tokens: Token[]): SliceResult {
|
||||
if (tokens[0].tag === 'img' || tokens[0].tag === 'code') {
|
||||
return {
|
||||
tokens: tokens.slice(0, 1),
|
||||
offset: 1,
|
||||
}
|
||||
}
|
||||
|
||||
return config
|
||||
return sliceParagraph(tokens)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue