diff --git a/packages/parser/README.md b/packages/parser/README.md
index ee30ef3..0a3de96 100644
--- a/packages/parser/README.md
+++ b/packages/parser/README.md
@@ -1,6 +1,6 @@
# @md-report/parser
-Transfer plain markdown text to md-report data structure.
+Transfer plain markdown text to markdown tokens.
## License
diff --git a/packages/parser/package.json b/packages/parser/package.json
index 0445d89..bf7231d 100644
--- a/packages/parser/package.json
+++ b/packages/parser/package.json
@@ -20,6 +20,7 @@
"prepublishOnly": "nr build"
},
"dependencies": {
+ "docx": "^7.3.0",
"js-yaml": "^4.1.0"
}
}
diff --git a/packages/parser/src/constants.ts b/packages/parser/src/constants.ts
deleted file mode 100644
index 9a8ceb8..0000000
--- a/packages/parser/src/constants.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-// MarkdownIt token types.
-export const HEADING_OPEN = 'heading_open'
-export const PARAGRAPH_OPEN = 'paragraph_open'
-
-// Style
-export const KAI_TI_FIRA_CODE_FONTS = {
- ascii: 'Fira Code',
- eastAsia: 'KaiTi',
-}
diff --git a/packages/parser/src/core.ts b/packages/parser/src/core.ts
deleted file mode 100644
index b21bec8..0000000
--- a/packages/parser/src/core.ts
+++ /dev/null
@@ -1,128 +0,0 @@
-import YAML = require('js-yaml')
-import { isObject } from '@antfu/utils'
-import MarkdownIt = require('markdown-it')
-import type { ReportConfig, ReportMarkdown, ReportMarkdownParagraph, ReportMarkdownParagraphChild, ReportMarkdownSection } from '@md-report/types'
-import Token = require('markdown-it/lib/token')
-import type { IRunOptions } from 'docx'
-import { HEADING_OPEN, PARAGRAPH_OPEN } from './constants'
-import { getParagraphChildConfig, getParagraphChildType } from './utils'
-
-const md = MarkdownIt({ html: true })
-
-export function matter(code: string): { data: ReportConfig; content: string } {
- let data: any = {}
- const content = code.replace(/^---.*\r?\n([\s\S]*?)---/,
- (_, d) => {
- data = YAML.load(d)
- if (!isObject(data))
- data = {}
- return ''
- })
- return { data, content }
-}
-
-export function parseParagraphChild(tokens: Token[]): ReportMarkdownParagraphChild {
- // Get rid of closing tags.
- let i = 0
- while (tokens[i].type.includes('close') || tokens[i].content.match(/\<\/[^]*?\>/))
- i++
-
- const type = getParagraphChildType(tokens[i])
- const config: IRunOptions = getParagraphChildConfig(tokens.slice(i))
-
- return {
- type,
- config,
- }
-}
-
-export function parseParagraph(tokens: Token[]): ReportMarkdownParagraph {
- const type = tokens[0].type === HEADING_OPEN ? 'heading' : 'paragraph'
- const level = tokens[0].markup.length
- const _content = tokens[1].children
- const children: ReportMarkdownParagraphChild[] = []
-
- let start = 0
- for (let i = 0; i < _content.length; i++) {
- const _token = _content[i]
- if (_token.type === 'code_inline' || _token.type === 'text') {
- children.push(parseParagraphChild(_content.slice(start, i + 1)))
- start = i + 1
- }
- }
-
- return {
- type,
- level,
- children,
- }
-}
-
-export function parseSection(tokens: Token[]): ReportMarkdownSection {
- const children: ReportMarkdownParagraph[] = []
-
- let start = 0
- for (let i = 0; i < tokens.length; i++) {
- const token = tokens[i]
- if ((token.type === HEADING_OPEN || token.type === PARAGRAPH_OPEN) && i > start) {
- children.push(parseParagraph(tokens.slice(start, i)))
- start = i
- }
- }
- children.push(parseParagraph(tokens.slice(start)))
-
- return {
- type: 'section',
- children,
- }
-}
-
-export function parseContent(tokens: Token[]): ReportMarkdownSection[] {
- const sections: ReportMarkdownSection[] = []
-
- let start = 0
- for (let i = 0; i < tokens.length; i++) {
- const token = tokens[i]
- // If heading 1.
- if (token.type === HEADING_OPEN && token.markup.length === 1 && i > start) {
- sections.push(parseSection(tokens.slice(start, i)))
- start = i
- }
- }
- sections.push(parseSection(tokens.slice(start)))
-
- return sections
-}
-
-export function parse(markdown: string): ReportMarkdown {
- const { data: frontmatter, content: rawContent } = matter(markdown)
- const contentTokens = md.parse(rawContent, {})
-
- const content = parseContent(contentTokens)
-
- return {
- raw: markdown,
- frontmatter,
- content,
- }
-}
-
-const src = `# 111
-
-this is a paragraph with **\`strong\`**, *italic*, \`inline code\`, [hyperlink](https://syy11.cn), ~~delete~~, ==highlight==, $1 + 1 = 2$, asupsub, ![image](https://image.cn), footnotes[^foot][^note]
-
-\`\`\`javascript
-const a = 0
-\`\`\`
-
-$$
-1+2=3
-$$
-
-# Refs
-
-[^foot]: ref1
-[^note]: ref2`
-
-// eslint-disable-next-line no-console
-console.log(parse(src).content[0].children[1].children)
diff --git a/packages/parser/src/fs.ts b/packages/parser/src/fs.ts
deleted file mode 100644
index 85568fb..0000000
--- a/packages/parser/src/fs.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-import { promises as fs } from 'fs'
-import type { ReportMarkdown } from '@md-report/types'
-import { parse } from './core'
-
-export async function load(filepath: string, content?: string): Promise {
- const markdown = content ?? await fs.readFile(filepath, 'utf8')
- return parse(markdown)
-}
diff --git a/packages/parser/src/index.ts b/packages/parser/src/index.ts
index e4137d3..b39a9e8 100644
--- a/packages/parser/src/index.ts
+++ b/packages/parser/src/index.ts
@@ -1,2 +1,49 @@
-export * from './core'
-export * from './fs'
+import type Token from 'markdown-it/lib/token'
+import MarkdownIt from 'markdown-it'
+import type { ISectionOptions, IStylesOptions, Paragraph, Table, TableOfContents } from 'docx'
+import { Document } from 'docx'
+import { sliceParagraph, sliceSection } from './utils'
+import { paragraphParser } from './paragraph'
+
+const md = new MarkdownIt()
+
+export function parse(props: { markdown: string; config: { meta: Record; styles: IStylesOptions } }): Document {
+ const { markdown, config } = props
+ const { meta, styles } = config
+ // Get frontmatter.
+ // Get tokens.
+ const tokens: Token[] = md.parse(markdown, meta)
+ return parseDocument(tokens, styles)
+}
+
+export function parseDocument(tokens: Token[], styles: IStylesOptions): Document {
+ // Variables.
+ let pos = 0
+ const sections: ISectionOptions[] = []
+ // Split and parse sections.
+ while (pos < tokens.length) {
+ const { tokens: section, offset: nextPos } = sliceSection(tokens.slice(pos))
+ sections.push(parseSection(section))
+ pos = nextPos
+ }
+ return new Document({
+ styles,
+ sections,
+ })
+}
+
+export function parseSection(tokens: Token[]): ISectionOptions {
+ // Variables.
+ let pos = 0
+ const children: (Paragraph | Table | TableOfContents)[] = []
+ // Split and parse paragraphs.
+ while (pos < tokens.length) {
+ const { tokens: paragraph, offset: nextPos } = sliceParagraph(tokens.slice(pos))
+ const parser = paragraphParser[tokens[0].tag]
+ children.push(parser(paragraph))
+ pos = nextPos
+ }
+ return {
+ children,
+ }
+}
diff --git a/packages/parser/src/inline.ts b/packages/parser/src/inline.ts
new file mode 100644
index 0000000..b53cac1
--- /dev/null
+++ b/packages/parser/src/inline.ts
@@ -0,0 +1,94 @@
+import { readFileSync } from 'fs'
+import type { IImageOptions, IRunOptions, ParagraphChild } from 'docx'
+import { ImageRun, Paragraph, TextRun } from 'docx'
+import type Token from 'markdown-it/lib/token'
+import { sliceInlineText } from './utils'
+
+export function parseInline(props: { tokens: Token[]; style?: string }): Paragraph {
+ // Variables.
+ const { tokens, style = 'normal' } = props
+ const { children: childrenTokens } = tokens[0]
+ const { length } = childrenTokens || []
+ const children: ParagraphChild[] = []
+ let pos = 0
+ // Parse inline children.
+ while (pos < length) {
+ const { tokens: paragraphChild, offset: nextPos } = sliceInlineText(tokens.slice(pos))
+ if (tokens[0].tag === 'img')
+ children.push(parseImage(paragraphChild))
+ else
+ children.push(parseText(paragraphChild))
+ pos = nextPos
+ }
+ return new Paragraph({
+ style,
+ children,
+ })
+}
+
+export function parseText(tokens: Token[]): TextRun {
+ let options: IRunOptions = {}
+ tokens.forEach((token) => {
+ if (token.nesting >= 0) {
+ // Only deal with opening and text/code tokens.
+ switch (token.tag) {
+ // Bold.
+ case 'strong':
+ options = { ...options, bold: true }
+ break
+ // Italics
+ case 'em':
+ options = { ...options, italics: true }
+ break
+ // Subscript.
+ case 'sub':
+ options = { ...options, subScript: true }
+ break
+ // Superscript.
+ case 'sup':
+ options = { ...options, superScript: true }
+ break
+ // Strikethrough.
+ case 's':
+ options = { ...options, strike: true }
+ break
+ // Highlight.
+ case 'mark':
+ // TODO: Replace highlight color with env data.
+ options = { ...options, highlight: 'yellow' }
+ break
+ // Inline code.
+ case 'code':
+ // TODO: Replace code font with env data.
+ options = { ...options, font: {}, text: token.content }
+ break
+ // Normal text.
+ default:
+ options = { ...options, text: token.content }
+ }
+ }
+ })
+ return new TextRun(options)
+}
+
+export function parseImage(tokens: Token[]): ImageRun | TextRun {
+ const { attrGet, content } = tokens[0]
+ const src = attrGet('src')
+ if (!src) {
+ return new TextRun({
+ text: `[MD Report]: Image ${content} is not found.`,
+ bold: true,
+ color: 'red',
+ highlight: 'yellow',
+ })
+ }
+ const options: IImageOptions = {
+ data: readFileSync(src).toString('base64'),
+ // TODO: Replace width and height with config in image url.
+ transformation: {
+ width: 100,
+ height: 100,
+ },
+ }
+ return new ImageRun(options)
+}
diff --git a/packages/parser/src/paragraph.ts b/packages/parser/src/paragraph.ts
new file mode 100644
index 0000000..e14b7ab
--- /dev/null
+++ b/packages/parser/src/paragraph.ts
@@ -0,0 +1,72 @@
+import type Token from 'markdown-it/lib/token'
+import { Paragraph, Table, TableCell, TableRow } from 'docx'
+import { sliceTableRow } from './utils'
+import { parseInline } from './inline'
+
+export function parseFence(tokens: Token[]): Paragraph {
+ // Variables.
+ const { content: text } = tokens[0]
+ return new Paragraph({
+ style: 'fence',
+ text,
+ })
+}
+
+export function parseTable(tokens: Token[]): Table {
+ // Variables
+ let pos = 0
+ const rows: TableRow[] = []
+ while (pos < tokens.length) {
+ const { tokens: tableRow, offset: nextPos } = sliceTableRow(tokens.slice(pos))
+ rows.push(parseTableRow(tableRow))
+ pos = nextPos
+ }
+ return new Table({
+ style: 'table',
+ rows,
+ })
+}
+
+export function parseTableRow(tokens: Token[]): TableRow {
+ const cells: Token[] = tokens.filter(token => token.type === 'inline')
+ const children: TableCell[] = cells.map(cell => new TableCell({
+ children: [parseInline({
+ tokens: [cell],
+ style: 'table',
+ })],
+ }))
+ return new TableRow({
+ children,
+ })
+}
+
+export function parseParagraph(tokens: Token[]): Paragraph {
+ const inline = tokens.filter(token => token.type === 'inline')
+ return parseInline({
+ tokens: inline,
+ style: 'normal',
+ })
+}
+
+export function parseHeading(tokens: Token[]): Paragraph {
+ // Inline token.
+ const inline = tokens.filter(token => token.type === 'inline')
+ // Heading level.
+ const { length } = tokens[0].markup
+ return parseInline({
+ tokens: inline,
+ style: `heading${length}`,
+ })
+}
+
+export const paragraphParser: Record (Paragraph|Table)> = {
+ code: parseFence,
+ table: parseTable,
+ p: parseParagraph,
+ h1: parseHeading,
+ h2: parseHeading,
+ h3: parseHeading,
+ h4: parseHeading,
+ h5: parseHeading,
+ h6: parseHeading,
+}
diff --git a/packages/parser/src/utils.ts b/packages/parser/src/utils.ts
index 1d95b51..9bf1e6c 100644
--- a/packages/parser/src/utils.ts
+++ b/packages/parser/src/utils.ts
@@ -1,55 +1,53 @@
-import type { IRunOptions } from 'docx'
-import type { MarkdownItTokenType } from '@md-report/types'
-import Token = require('markdown-it/lib/token')
-import { KAI_TI_FIRA_CODE_FONTS } from './constants'
+import type Token from 'markdown-it/lib/token'
-export function getParagraphChildType(token: Token): 'image' | 'text' {
- switch (token.type) {
- case 'image':
- return 'image'
- default:
- return 'text'
+export interface SliceResult {
+ tokens: Token[]
+ offset: number
+}
+
+export function sliceSection(tokens: Token[]): SliceResult {
+ let offset = 0
+ if (tokens[0].tag === 'h1') {
+ while (tokens[offset].nesting >= 0 || tokens[offset].tag !== 'h1')
+ offset++
+ }
+ return {
+ tokens: tokens.slice(0, offset + 1),
+ offset: offset + 1,
}
}
-export function getParagraphChildConfig(tokens: Token[]): IRunOptions {
- let config: IRunOptions = {}
+export function sliceParagraph(tokens: Token[]): SliceResult {
+ let offset = 0
+ // Code block.
+ if (tokens[0].type !== 'fence') {
+ // Normal paragraphs.
+ while (tokens[offset].level > 0 || tokens[offset].nesting >= 0)
+ offset++
+ }
+ // Return paragraph tokens.
+ return {
+ tokens: tokens.slice(0, offset + 1),
+ offset: offset + 1,
+ }
+}
- for (let i = 0; i < tokens.length; i++) {
- const token = tokens[i]
- switch (token.type as MarkdownItTokenType) {
- case 'em_open': {
- config = { ...config, italics: true }
- break
- }
- case 'strong_open': {
- config = { ...config, bold: true }
- break
- }
- case 'mark_open': {
- config = { ...config, shading: { fill: '#bbbbbb' }, style: 'mark' }
- break
- }
- case 'html_inline': {
- if (token.content === '')
- config = { ...config, superScript: true }
- if (token.content === '')
- config = { ...config, subScript: true }
- break
- }
- case 's_open': {
- config = { ...config, strike: true }
- break
- }
- case 'code_inline': {
- config = { ...config, font: KAI_TI_FIRA_CODE_FONTS, style: 'code' }
- break
- }
- case 'text': {
- config = { ...config, text: token.content }
- }
+export function sliceTableRow(tokens: Token[]): SliceResult {
+ let offset = 0
+ while (tokens[offset].type !== 'tr_open')
+ offset++
+ return {
+ tokens: tokens.slice(0, offset),
+ offset,
+ }
+}
+
+export function sliceInlineText(tokens: Token[]): SliceResult {
+ if (tokens[0].tag === 'img' || tokens[0].tag === 'code') {
+ return {
+ tokens: tokens.slice(0, 1),
+ offset: 1,
}
}
-
- return config
+ return sliceParagraph(tokens)
}