wip: save
This commit is contained in:
parent
19bd714239
commit
6311ebbf32
|
@ -171,6 +171,11 @@ export default class Tokenizer {
|
|||
private readonly decodeEntities: boolean
|
||||
private readonly entityDecoder: EntityDecoder
|
||||
|
||||
public line = 1
|
||||
public column = 1
|
||||
public startLine = 1
|
||||
public startColumn = 1
|
||||
|
||||
constructor(
|
||||
{ decodeEntities = true }: { decodeEntities?: boolean },
|
||||
private readonly cbs: Callbacks
|
||||
|
@ -184,12 +189,22 @@ export default class Tokenizer {
|
|||
public reset(): void {
|
||||
this.state = State.Text
|
||||
this.buffer = ''
|
||||
this.sectionStart = 0
|
||||
this.recordStart(0)
|
||||
this.index = 0
|
||||
this.line = 1
|
||||
this.column = 1
|
||||
this.startLine = 1
|
||||
this.startColumn = 1
|
||||
this.baseState = State.Text
|
||||
this.currentSequence = undefined!
|
||||
}
|
||||
|
||||
private recordStart(start = this.index) {
|
||||
this.sectionStart = start
|
||||
this.startLine = this.line
|
||||
this.startColumn = this.column + (start - this.index)
|
||||
}
|
||||
|
||||
private stateText(c: number): void {
|
||||
if (
|
||||
c === CharCodes.Lt ||
|
||||
|
@ -199,7 +214,7 @@ export default class Tokenizer {
|
|||
this.cbs.ontext(this.sectionStart, this.index)
|
||||
}
|
||||
this.state = State.BeforeTagName
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
} else if (this.decodeEntities && c === CharCodes.Amp) {
|
||||
this.startEntity()
|
||||
}
|
||||
|
@ -242,7 +257,7 @@ export default class Tokenizer {
|
|||
}
|
||||
|
||||
this.isSpecial = false
|
||||
this.sectionStart = endOfText + 2 // Skip over the `</`
|
||||
this.recordStart(endOfText + 2) // Skip over the `</`
|
||||
this.stateInClosingTagName(c)
|
||||
return // We are done; skip the rest of the function.
|
||||
}
|
||||
|
@ -274,7 +289,7 @@ export default class Tokenizer {
|
|||
this.state = State.InCommentLike
|
||||
this.currentSequence = Sequences.CdataEnd
|
||||
this.sequenceIndex = 0
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
}
|
||||
} else {
|
||||
this.sequenceIndex = 0
|
||||
|
@ -325,7 +340,7 @@ export default class Tokenizer {
|
|||
}
|
||||
|
||||
this.sequenceIndex = 0
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
this.state = State.Text
|
||||
}
|
||||
} else if (this.sequenceIndex === 0) {
|
||||
|
@ -359,13 +374,13 @@ export default class Tokenizer {
|
|||
private stateBeforeTagName(c: number): void {
|
||||
if (c === CharCodes.ExclamationMark) {
|
||||
this.state = State.BeforeDeclaration
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
} else if (c === CharCodes.Questionmark) {
|
||||
this.state = State.InProcessingInstruction
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
} else if (this.isTagStartChar(c)) {
|
||||
const lower = c | 0x20
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
if (lower === Sequences.TitleEnd[2]) {
|
||||
this.startSpecial(Sequences.TitleEnd, 3)
|
||||
} else {
|
||||
|
@ -384,7 +399,7 @@ export default class Tokenizer {
|
|||
private stateInTagName(c: number): void {
|
||||
if (isEndOfTagSection(c)) {
|
||||
this.cbs.onopentagname(this.sectionStart, this.index)
|
||||
this.sectionStart = -1
|
||||
this.recordStart(-1)
|
||||
this.state = State.BeforeAttributeName
|
||||
this.stateBeforeAttributeName(c)
|
||||
}
|
||||
|
@ -398,13 +413,13 @@ export default class Tokenizer {
|
|||
this.state = this.isTagStartChar(c)
|
||||
? State.InClosingTagName
|
||||
: State.InSpecialComment
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
}
|
||||
}
|
||||
private stateInClosingTagName(c: number): void {
|
||||
if (c === CharCodes.Gt || isWhitespace(c)) {
|
||||
this.cbs.onclosetag(this.sectionStart, this.index)
|
||||
this.sectionStart = -1
|
||||
this.recordStart(-1)
|
||||
this.state = State.AfterClosingTagName
|
||||
this.stateAfterClosingTagName(c)
|
||||
}
|
||||
|
@ -413,7 +428,7 @@ export default class Tokenizer {
|
|||
// Skip everything until ">"
|
||||
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
|
||||
this.state = State.Text
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
}
|
||||
}
|
||||
private stateBeforeAttributeName(c: number): void {
|
||||
|
@ -425,19 +440,19 @@ export default class Tokenizer {
|
|||
} else {
|
||||
this.state = State.Text
|
||||
}
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
} else if (c === CharCodes.Slash) {
|
||||
this.state = State.InSelfClosingTag
|
||||
} else if (!isWhitespace(c)) {
|
||||
this.state = State.InAttributeName
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
}
|
||||
}
|
||||
private stateInSelfClosingTag(c: number): void {
|
||||
if (c === CharCodes.Gt) {
|
||||
this.cbs.onselfclosingtag(this.index)
|
||||
this.state = State.Text
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
this.isSpecial = false // Reset special state, in case of self-closing special tags
|
||||
} else if (!isWhitespace(c)) {
|
||||
this.state = State.BeforeAttributeName
|
||||
|
@ -447,7 +462,7 @@ export default class Tokenizer {
|
|||
private stateInAttributeName(c: number): void {
|
||||
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
|
||||
this.cbs.onattribname(this.sectionStart, this.index)
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
this.state = State.AfterAttributeName
|
||||
this.stateAfterAttributeName(c)
|
||||
}
|
||||
|
@ -457,24 +472,24 @@ export default class Tokenizer {
|
|||
this.state = State.BeforeAttributeValue
|
||||
} else if (c === CharCodes.Slash || c === CharCodes.Gt) {
|
||||
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
|
||||
this.sectionStart = -1
|
||||
this.recordStart(-1)
|
||||
this.state = State.BeforeAttributeName
|
||||
this.stateBeforeAttributeName(c)
|
||||
} else if (!isWhitespace(c)) {
|
||||
this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
|
||||
this.state = State.InAttributeName
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
}
|
||||
}
|
||||
private stateBeforeAttributeValue(c: number): void {
|
||||
if (c === CharCodes.DoubleQuote) {
|
||||
this.state = State.InAttributeValueDq
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
} else if (c === CharCodes.SingleQuote) {
|
||||
this.state = State.InAttributeValueSq
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
} else if (!isWhitespace(c)) {
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
this.state = State.InAttributeValueNq
|
||||
this.stateInAttributeValueNoQuotes(c) // Reconsume token
|
||||
}
|
||||
|
@ -482,7 +497,7 @@ export default class Tokenizer {
|
|||
private handleInAttributeValue(c: number, quote: number) {
|
||||
if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.index)
|
||||
this.sectionStart = -1
|
||||
this.recordStart(-1)
|
||||
this.cbs.onattribend(
|
||||
quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single,
|
||||
this.index + 1
|
||||
|
@ -501,7 +516,7 @@ export default class Tokenizer {
|
|||
private stateInAttributeValueNoQuotes(c: number): void {
|
||||
if (isWhitespace(c) || c === CharCodes.Gt) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.index)
|
||||
this.sectionStart = -1
|
||||
this.recordStart(-1)
|
||||
this.cbs.onattribend(QuoteType.Unquoted, this.index)
|
||||
this.state = State.BeforeAttributeName
|
||||
this.stateBeforeAttributeName(c)
|
||||
|
@ -522,14 +537,14 @@ export default class Tokenizer {
|
|||
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
|
||||
this.cbs.ondeclaration(this.sectionStart, this.index)
|
||||
this.state = State.Text
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
}
|
||||
}
|
||||
private stateInProcessingInstruction(c: number): void {
|
||||
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
|
||||
this.cbs.onprocessinginstruction(this.sectionStart, this.index)
|
||||
this.state = State.Text
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
}
|
||||
}
|
||||
private stateBeforeComment(c: number): void {
|
||||
|
@ -538,7 +553,7 @@ export default class Tokenizer {
|
|||
this.currentSequence = Sequences.CommentEnd
|
||||
// Allow short comments (eg. <!-->)
|
||||
this.sequenceIndex = 2
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
} else {
|
||||
this.state = State.InDeclaration
|
||||
}
|
||||
|
@ -547,7 +562,7 @@ export default class Tokenizer {
|
|||
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
|
||||
this.cbs.oncomment(this.sectionStart, this.index, 0)
|
||||
this.state = State.Text
|
||||
this.sectionStart = this.index + 1
|
||||
this.recordStart(this.index + 1)
|
||||
}
|
||||
}
|
||||
private stateBeforeSpecialS(c: number): void {
|
||||
|
@ -701,6 +716,13 @@ export default class Tokenizer {
|
|||
}
|
||||
}
|
||||
this.index++
|
||||
// line / column handling
|
||||
if (c === CharCodes.NewLine) {
|
||||
this.line++
|
||||
this.column = 1
|
||||
} else {
|
||||
this.column++
|
||||
}
|
||||
}
|
||||
this.cleanup()
|
||||
this.finish()
|
||||
|
@ -717,14 +739,14 @@ export default class Tokenizer {
|
|||
(this.state === State.InSpecialTag && this.sequenceIndex === 0)
|
||||
) {
|
||||
this.cbs.ontext(this.sectionStart, this.index)
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
} else if (
|
||||
this.state === State.InAttributeValueDq ||
|
||||
this.state === State.InAttributeValueSq ||
|
||||
this.state === State.InAttributeValueNq
|
||||
) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.index)
|
||||
this.sectionStart = this.index
|
||||
this.recordStart()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -783,7 +805,7 @@ export default class Tokenizer {
|
|||
if (this.sectionStart < this.entityStart) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.entityStart)
|
||||
}
|
||||
this.sectionStart = this.entityStart + consumed
|
||||
this.recordStart(this.entityStart + consumed)
|
||||
this.index = this.sectionStart - 1
|
||||
|
||||
this.cbs.onattribentity(cp)
|
||||
|
@ -791,7 +813,7 @@ export default class Tokenizer {
|
|||
if (this.sectionStart < this.entityStart) {
|
||||
this.cbs.ontext(this.sectionStart, this.entityStart)
|
||||
}
|
||||
this.sectionStart = this.entityStart + consumed
|
||||
this.recordStart(this.entityStart + consumed)
|
||||
this.index = this.sectionStart - 1
|
||||
|
||||
this.cbs.ontextentity(cp, this.sectionStart)
|
||||
|
|
|
@ -135,16 +135,11 @@ const tokenizer = new Tokenizer(
|
|||
{ decodeEntities: true },
|
||||
{
|
||||
ontext(start, end) {
|
||||
const content = getSlice(start, end)
|
||||
endIndex = end - 1
|
||||
onText(content)
|
||||
startIndex = end
|
||||
onText(getSlice(start, end), start, end)
|
||||
},
|
||||
|
||||
ontextentity(cp, end) {
|
||||
endIndex = end - 1
|
||||
onText(fromCodePoint(cp))
|
||||
startIndex = end
|
||||
onText(fromCodePoint(cp), end - 1, end)
|
||||
},
|
||||
|
||||
onopentagname(start, end) {
|
||||
|
@ -206,7 +201,7 @@ const tokenizer = new Tokenizer(
|
|||
onattribentity(codepoint) {
|
||||
attribvalue += fromCodePoint(codepoint)
|
||||
},
|
||||
onattribend(quote, end) {
|
||||
onattribend(_quote, end) {
|
||||
endIndex = end
|
||||
if (attribs && !hasOwn(attribs, attribname)) {
|
||||
// TODO gen attributes AST nodes
|
||||
|
@ -299,7 +294,7 @@ function endOpenTag(isImplied: boolean) {
|
|||
tagname = ''
|
||||
}
|
||||
|
||||
function onText(content: string) {
|
||||
function onText(content: string, start: number, end: number) {
|
||||
const parent = getParent()
|
||||
const lastNode = parent.children[parent.children.length - 1]
|
||||
if (lastNode?.type === NodeTypes.TEXT) {
|
||||
|
@ -310,8 +305,15 @@ function onText(content: string) {
|
|||
parent.children.push({
|
||||
type: NodeTypes.TEXT,
|
||||
content,
|
||||
// @ts-ignore TODO
|
||||
loc: {}
|
||||
loc: {
|
||||
start: {
|
||||
offset: start,
|
||||
line: tokenizer.startLine,
|
||||
column: tokenizer.startColumn
|
||||
},
|
||||
end: { offset: end, line: tokenizer.line, column: tokenizer.column },
|
||||
source: content
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -327,8 +329,13 @@ function onOpenTag(tag: string) {
|
|||
// TODO props
|
||||
props: [],
|
||||
children: [],
|
||||
// @ts-ignore TODO
|
||||
loc: {},
|
||||
loc: {
|
||||
// @ts-expect-error TODO
|
||||
start: {},
|
||||
// @ts-expect-error TODO
|
||||
end: { offset: endIndex },
|
||||
source: ''
|
||||
},
|
||||
codegenNode: undefined
|
||||
}
|
||||
addNode(el)
|
||||
|
@ -338,14 +345,25 @@ function onOpenTag(tag: string) {
|
|||
function onCloseTag() {
|
||||
const el = elementStack.pop()!
|
||||
// whitepsace management
|
||||
const nodes = el.children
|
||||
el.children = condenseWhitespace(el.children)
|
||||
}
|
||||
|
||||
const windowsNewlineRE = /\r\n/g
|
||||
const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g
|
||||
const nonWhitespaceRE = /[^\t\r\n\f ]/
|
||||
|
||||
function isEmptyText(content: string) {
|
||||
return !nonWhitespaceRE.test(content)
|
||||
}
|
||||
|
||||
function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
|
||||
const shouldCondense = currentOptions.whitespace !== 'preserve'
|
||||
let removedWhitespace = false
|
||||
for (let i = 0; i < nodes.length; i++) {
|
||||
const node = nodes[i]
|
||||
if (node.type === NodeTypes.TEXT) {
|
||||
if (!inPre) {
|
||||
if (!/[^\t\r\n\f ]/.test(node.content)) {
|
||||
if (isEmptyText(node.content)) {
|
||||
const prev = nodes[i - 1]
|
||||
const next = nodes[i + 1]
|
||||
// Remove if:
|
||||
|
@ -376,19 +394,17 @@ function onCloseTag() {
|
|||
} else if (shouldCondense) {
|
||||
// in condense mode, consecutive whitespaces in text are condensed
|
||||
// down to a single space.
|
||||
node.content = node.content.replace(/[\t\r\n\f ]+/g, ' ')
|
||||
node.content = node.content.replace(consecutiveWhitespaceRE, ' ')
|
||||
}
|
||||
} else {
|
||||
// #6410 normalize windows newlines in <pre>:
|
||||
// in SSR, browsers normalize server-rendered \r\n into a single \n
|
||||
// in the DOM
|
||||
node.content = node.content.replace(/\r\n/g, '\n')
|
||||
node.content = node.content.replace(windowsNewlineRE, '\n')
|
||||
}
|
||||
}
|
||||
}
|
||||
if (removedWhitespace) {
|
||||
el.children = nodes.filter(Boolean)
|
||||
}
|
||||
return removedWhitespace ? nodes.filter(Boolean) : nodes
|
||||
}
|
||||
|
||||
function addNode(node: TemplateChildNode) {
|
||||
|
@ -418,12 +434,11 @@ export function baseParse(
|
|||
options: ParserOptions = {}
|
||||
): RootNode {
|
||||
reset()
|
||||
currentInput = input.trim()
|
||||
currentInput = input
|
||||
currentOptions = options
|
||||
htmlMode = !!options.htmlMode
|
||||
const root = (currentRoot = createRoot([]))
|
||||
tokenizer.parse(currentInput)
|
||||
// temp hack for ts
|
||||
console.log(endIndex)
|
||||
root.children = condenseWhitespace(root.children)
|
||||
return root
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue