wip: save

This commit is contained in:
Evan You 2023-11-14 01:14:33 +08:00
parent 19bd714239
commit 6311ebbf32
2 changed files with 91 additions and 54 deletions

View File

@ -171,6 +171,11 @@ export default class Tokenizer {
private readonly decodeEntities: boolean
private readonly entityDecoder: EntityDecoder
public line = 1
public column = 1
public startLine = 1
public startColumn = 1
constructor(
{ decodeEntities = true }: { decodeEntities?: boolean },
private readonly cbs: Callbacks
@ -184,12 +189,22 @@ export default class Tokenizer {
public reset(): void {
this.state = State.Text
this.buffer = ''
this.sectionStart = 0
this.recordStart(0)
this.index = 0
this.line = 1
this.column = 1
this.startLine = 1
this.startColumn = 1
this.baseState = State.Text
this.currentSequence = undefined!
}
private recordStart(start = this.index) {
this.sectionStart = start
this.startLine = this.line
this.startColumn = this.column + (start - this.index)
}
private stateText(c: number): void {
if (
c === CharCodes.Lt ||
@ -199,7 +214,7 @@ export default class Tokenizer {
this.cbs.ontext(this.sectionStart, this.index)
}
this.state = State.BeforeTagName
this.sectionStart = this.index
this.recordStart()
} else if (this.decodeEntities && c === CharCodes.Amp) {
this.startEntity()
}
@ -242,7 +257,7 @@ export default class Tokenizer {
}
this.isSpecial = false
this.sectionStart = endOfText + 2 // Skip over the `</`
this.recordStart(endOfText + 2) // Skip over the `</`
this.stateInClosingTagName(c)
return // We are done; skip the rest of the function.
}
@ -274,7 +289,7 @@ export default class Tokenizer {
this.state = State.InCommentLike
this.currentSequence = Sequences.CdataEnd
this.sequenceIndex = 0
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
}
} else {
this.sequenceIndex = 0
@ -325,7 +340,7 @@ export default class Tokenizer {
}
this.sequenceIndex = 0
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
this.state = State.Text
}
} else if (this.sequenceIndex === 0) {
@ -359,13 +374,13 @@ export default class Tokenizer {
private stateBeforeTagName(c: number): void {
if (c === CharCodes.ExclamationMark) {
this.state = State.BeforeDeclaration
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
} else if (c === CharCodes.Questionmark) {
this.state = State.InProcessingInstruction
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
} else if (this.isTagStartChar(c)) {
const lower = c | 0x20
this.sectionStart = this.index
this.recordStart()
if (lower === Sequences.TitleEnd[2]) {
this.startSpecial(Sequences.TitleEnd, 3)
} else {
@ -384,7 +399,7 @@ export default class Tokenizer {
private stateInTagName(c: number): void {
if (isEndOfTagSection(c)) {
this.cbs.onopentagname(this.sectionStart, this.index)
this.sectionStart = -1
this.recordStart(-1)
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
}
@ -398,13 +413,13 @@ export default class Tokenizer {
this.state = this.isTagStartChar(c)
? State.InClosingTagName
: State.InSpecialComment
this.sectionStart = this.index
this.recordStart()
}
}
private stateInClosingTagName(c: number): void {
if (c === CharCodes.Gt || isWhitespace(c)) {
this.cbs.onclosetag(this.sectionStart, this.index)
this.sectionStart = -1
this.recordStart(-1)
this.state = State.AfterClosingTagName
this.stateAfterClosingTagName(c)
}
@ -413,7 +428,7 @@ export default class Tokenizer {
// Skip everything until ">"
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.state = State.Text
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
}
}
private stateBeforeAttributeName(c: number): void {
@ -425,19 +440,19 @@ export default class Tokenizer {
} else {
this.state = State.Text
}
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
} else if (!isWhitespace(c)) {
this.state = State.InAttributeName
this.sectionStart = this.index
this.recordStart()
}
}
private stateInSelfClosingTag(c: number): void {
if (c === CharCodes.Gt) {
this.cbs.onselfclosingtag(this.index)
this.state = State.Text
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
this.isSpecial = false // Reset special state, in case of self-closing special tags
} else if (!isWhitespace(c)) {
this.state = State.BeforeAttributeName
@ -447,7 +462,7 @@ export default class Tokenizer {
private stateInAttributeName(c: number): void {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index)
this.sectionStart = this.index
this.recordStart()
this.state = State.AfterAttributeName
this.stateAfterAttributeName(c)
}
@ -457,24 +472,24 @@ export default class Tokenizer {
this.state = State.BeforeAttributeValue
} else if (c === CharCodes.Slash || c === CharCodes.Gt) {
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
this.sectionStart = -1
this.recordStart(-1)
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
} else if (!isWhitespace(c)) {
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
this.state = State.InAttributeName
this.sectionStart = this.index
this.recordStart()
}
}
private stateBeforeAttributeValue(c: number): void {
if (c === CharCodes.DoubleQuote) {
this.state = State.InAttributeValueDq
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
} else if (c === CharCodes.SingleQuote) {
this.state = State.InAttributeValueSq
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
} else if (!isWhitespace(c)) {
this.sectionStart = this.index
this.recordStart()
this.state = State.InAttributeValueNq
this.stateInAttributeValueNoQuotes(c) // Reconsume token
}
@ -482,7 +497,7 @@ export default class Tokenizer {
private handleInAttributeValue(c: number, quote: number) {
if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) {
this.cbs.onattribdata(this.sectionStart, this.index)
this.sectionStart = -1
this.recordStart(-1)
this.cbs.onattribend(
quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single,
this.index + 1
@ -501,7 +516,7 @@ export default class Tokenizer {
private stateInAttributeValueNoQuotes(c: number): void {
if (isWhitespace(c) || c === CharCodes.Gt) {
this.cbs.onattribdata(this.sectionStart, this.index)
this.sectionStart = -1
this.recordStart(-1)
this.cbs.onattribend(QuoteType.Unquoted, this.index)
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
@ -522,14 +537,14 @@ export default class Tokenizer {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.ondeclaration(this.sectionStart, this.index)
this.state = State.Text
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
}
}
private stateInProcessingInstruction(c: number): void {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.onprocessinginstruction(this.sectionStart, this.index)
this.state = State.Text
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
}
}
private stateBeforeComment(c: number): void {
@ -538,7 +553,7 @@ export default class Tokenizer {
this.currentSequence = Sequences.CommentEnd
// Allow short comments (eg. <!-->)
this.sequenceIndex = 2
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
} else {
this.state = State.InDeclaration
}
@ -547,7 +562,7 @@ export default class Tokenizer {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
this.cbs.oncomment(this.sectionStart, this.index, 0)
this.state = State.Text
this.sectionStart = this.index + 1
this.recordStart(this.index + 1)
}
}
private stateBeforeSpecialS(c: number): void {
@ -701,6 +716,13 @@ export default class Tokenizer {
}
}
this.index++
// line / column handling
if (c === CharCodes.NewLine) {
this.line++
this.column = 1
} else {
this.column++
}
}
this.cleanup()
this.finish()
@ -717,14 +739,14 @@ export default class Tokenizer {
(this.state === State.InSpecialTag && this.sequenceIndex === 0)
) {
this.cbs.ontext(this.sectionStart, this.index)
this.sectionStart = this.index
this.recordStart()
} else if (
this.state === State.InAttributeValueDq ||
this.state === State.InAttributeValueSq ||
this.state === State.InAttributeValueNq
) {
this.cbs.onattribdata(this.sectionStart, this.index)
this.sectionStart = this.index
this.recordStart()
}
}
}
@ -783,7 +805,7 @@ export default class Tokenizer {
if (this.sectionStart < this.entityStart) {
this.cbs.onattribdata(this.sectionStart, this.entityStart)
}
this.sectionStart = this.entityStart + consumed
this.recordStart(this.entityStart + consumed)
this.index = this.sectionStart - 1
this.cbs.onattribentity(cp)
@ -791,7 +813,7 @@ export default class Tokenizer {
if (this.sectionStart < this.entityStart) {
this.cbs.ontext(this.sectionStart, this.entityStart)
}
this.sectionStart = this.entityStart + consumed
this.recordStart(this.entityStart + consumed)
this.index = this.sectionStart - 1
this.cbs.ontextentity(cp, this.sectionStart)

View File

@ -135,16 +135,11 @@ const tokenizer = new Tokenizer(
{ decodeEntities: true },
{
ontext(start, end) {
const content = getSlice(start, end)
endIndex = end - 1
onText(content)
startIndex = end
onText(getSlice(start, end), start, end)
},
ontextentity(cp, end) {
endIndex = end - 1
onText(fromCodePoint(cp))
startIndex = end
onText(fromCodePoint(cp), end - 1, end)
},
onopentagname(start, end) {
@ -206,7 +201,7 @@ const tokenizer = new Tokenizer(
onattribentity(codepoint) {
attribvalue += fromCodePoint(codepoint)
},
onattribend(quote, end) {
onattribend(_quote, end) {
endIndex = end
if (attribs && !hasOwn(attribs, attribname)) {
// TODO gen attributes AST nodes
@ -299,7 +294,7 @@ function endOpenTag(isImplied: boolean) {
tagname = ''
}
function onText(content: string) {
function onText(content: string, start: number, end: number) {
const parent = getParent()
const lastNode = parent.children[parent.children.length - 1]
if (lastNode?.type === NodeTypes.TEXT) {
@ -310,8 +305,15 @@ function onText(content: string) {
parent.children.push({
type: NodeTypes.TEXT,
content,
// @ts-ignore TODO
loc: {}
loc: {
start: {
offset: start,
line: tokenizer.startLine,
column: tokenizer.startColumn
},
end: { offset: end, line: tokenizer.line, column: tokenizer.column },
source: content
}
})
}
}
@ -327,8 +329,13 @@ function onOpenTag(tag: string) {
// TODO props
props: [],
children: [],
// @ts-ignore TODO
loc: {},
loc: {
// @ts-expect-error TODO
start: {},
// @ts-expect-error TODO
end: { offset: endIndex },
source: ''
},
codegenNode: undefined
}
addNode(el)
@ -338,14 +345,25 @@ function onOpenTag(tag: string) {
function onCloseTag() {
const el = elementStack.pop()!
// whitepsace management
const nodes = el.children
el.children = condenseWhitespace(el.children)
}
const windowsNewlineRE = /\r\n/g
const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g
const nonWhitespaceRE = /[^\t\r\n\f ]/
function isEmptyText(content: string) {
return !nonWhitespaceRE.test(content)
}
function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
const shouldCondense = currentOptions.whitespace !== 'preserve'
let removedWhitespace = false
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i]
if (node.type === NodeTypes.TEXT) {
if (!inPre) {
if (!/[^\t\r\n\f ]/.test(node.content)) {
if (isEmptyText(node.content)) {
const prev = nodes[i - 1]
const next = nodes[i + 1]
// Remove if:
@ -376,19 +394,17 @@ function onCloseTag() {
} else if (shouldCondense) {
// in condense mode, consecutive whitespaces in text are condensed
// down to a single space.
node.content = node.content.replace(/[\t\r\n\f ]+/g, ' ')
node.content = node.content.replace(consecutiveWhitespaceRE, ' ')
}
} else {
// #6410 normalize windows newlines in <pre>:
// in SSR, browsers normalize server-rendered \r\n into a single \n
// in the DOM
node.content = node.content.replace(/\r\n/g, '\n')
node.content = node.content.replace(windowsNewlineRE, '\n')
}
}
}
if (removedWhitespace) {
el.children = nodes.filter(Boolean)
}
return removedWhitespace ? nodes.filter(Boolean) : nodes
}
function addNode(node: TemplateChildNode) {
@ -418,12 +434,11 @@ export function baseParse(
options: ParserOptions = {}
): RootNode {
reset()
currentInput = input.trim()
currentInput = input
currentOptions = options
htmlMode = !!options.htmlMode
const root = (currentRoot = createRoot([]))
tokenizer.parse(currentInput)
// temp hack for ts
console.log(endIndex)
root.children = condenseWhitespace(root.children)
return root
}