diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts index cece336fc..4dedf9a46 100644 --- a/packages/compiler-core/src/parser/Tokenizer.ts +++ b/packages/compiler-core/src/parser/Tokenizer.ts @@ -171,6 +171,11 @@ export default class Tokenizer { private readonly decodeEntities: boolean private readonly entityDecoder: EntityDecoder + public line = 1 + public column = 1 + public startLine = 1 + public startColumn = 1 + constructor( { decodeEntities = true }: { decodeEntities?: boolean }, private readonly cbs: Callbacks @@ -184,12 +189,22 @@ export default class Tokenizer { public reset(): void { this.state = State.Text this.buffer = '' - this.sectionStart = 0 + this.recordStart(0) this.index = 0 + this.line = 1 + this.column = 1 + this.startLine = 1 + this.startColumn = 1 this.baseState = State.Text this.currentSequence = undefined! } + private recordStart(start = this.index) { + this.sectionStart = start + this.startLine = this.line + this.startColumn = this.column + (start - this.index) + } + private stateText(c: number): void { if ( c === CharCodes.Lt || @@ -199,7 +214,7 @@ export default class Tokenizer { this.cbs.ontext(this.sectionStart, this.index) } this.state = State.BeforeTagName - this.sectionStart = this.index + this.recordStart() } else if (this.decodeEntities && c === CharCodes.Amp) { this.startEntity() } @@ -242,7 +257,7 @@ export default class Tokenizer { } this.isSpecial = false - this.sectionStart = endOfText + 2 // Skip over the `" if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.state = State.Text - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } } private stateBeforeAttributeName(c: number): void { @@ -425,19 +440,19 @@ export default class Tokenizer { } else { this.state = State.Text } - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } else if (c === CharCodes.Slash) { this.state = State.InSelfClosingTag } else if (!isWhitespace(c)) { this.state = State.InAttributeName - this.sectionStart = this.index + this.recordStart() } } private stateInSelfClosingTag(c: number): void { if (c === CharCodes.Gt) { this.cbs.onselfclosingtag(this.index) this.state = State.Text - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) this.isSpecial = false // Reset special state, in case of self-closing special tags } else if (!isWhitespace(c)) { this.state = State.BeforeAttributeName @@ -447,7 +462,7 @@ export default class Tokenizer { private stateInAttributeName(c: number): void { if (c === CharCodes.Eq || isEndOfTagSection(c)) { this.cbs.onattribname(this.sectionStart, this.index) - this.sectionStart = this.index + this.recordStart() this.state = State.AfterAttributeName this.stateAfterAttributeName(c) } @@ -457,24 +472,24 @@ export default class Tokenizer { this.state = State.BeforeAttributeValue } else if (c === CharCodes.Slash || c === CharCodes.Gt) { this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) - this.sectionStart = -1 + this.recordStart(-1) this.state = State.BeforeAttributeName this.stateBeforeAttributeName(c) } else if (!isWhitespace(c)) { this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) this.state = State.InAttributeName - this.sectionStart = this.index + this.recordStart() } } private stateBeforeAttributeValue(c: number): void { if (c === CharCodes.DoubleQuote) { this.state = State.InAttributeValueDq - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } else if (c === CharCodes.SingleQuote) { this.state = State.InAttributeValueSq - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } else if (!isWhitespace(c)) { - this.sectionStart = this.index + this.recordStart() this.state = State.InAttributeValueNq this.stateInAttributeValueNoQuotes(c) // Reconsume token } @@ -482,7 +497,7 @@ export default class Tokenizer { private handleInAttributeValue(c: number, quote: number) { if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) { this.cbs.onattribdata(this.sectionStart, this.index) - this.sectionStart = -1 + this.recordStart(-1) this.cbs.onattribend( quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single, this.index + 1 @@ -501,7 +516,7 @@ export default class Tokenizer { private stateInAttributeValueNoQuotes(c: number): void { if (isWhitespace(c) || c === CharCodes.Gt) { this.cbs.onattribdata(this.sectionStart, this.index) - this.sectionStart = -1 + this.recordStart(-1) this.cbs.onattribend(QuoteType.Unquoted, this.index) this.state = State.BeforeAttributeName this.stateBeforeAttributeName(c) @@ -522,14 +537,14 @@ export default class Tokenizer { if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.cbs.ondeclaration(this.sectionStart, this.index) this.state = State.Text - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } } private stateInProcessingInstruction(c: number): void { if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.cbs.onprocessinginstruction(this.sectionStart, this.index) this.state = State.Text - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } } private stateBeforeComment(c: number): void { @@ -538,7 +553,7 @@ export default class Tokenizer { this.currentSequence = Sequences.CommentEnd // Allow short comments (eg. ) this.sequenceIndex = 2 - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } else { this.state = State.InDeclaration } @@ -547,7 +562,7 @@ export default class Tokenizer { if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { this.cbs.oncomment(this.sectionStart, this.index, 0) this.state = State.Text - this.sectionStart = this.index + 1 + this.recordStart(this.index + 1) } } private stateBeforeSpecialS(c: number): void { @@ -701,6 +716,13 @@ export default class Tokenizer { } } this.index++ + // line / column handling + if (c === CharCodes.NewLine) { + this.line++ + this.column = 1 + } else { + this.column++ + } } this.cleanup() this.finish() @@ -717,14 +739,14 @@ export default class Tokenizer { (this.state === State.InSpecialTag && this.sequenceIndex === 0) ) { this.cbs.ontext(this.sectionStart, this.index) - this.sectionStart = this.index + this.recordStart() } else if ( this.state === State.InAttributeValueDq || this.state === State.InAttributeValueSq || this.state === State.InAttributeValueNq ) { this.cbs.onattribdata(this.sectionStart, this.index) - this.sectionStart = this.index + this.recordStart() } } } @@ -783,7 +805,7 @@ export default class Tokenizer { if (this.sectionStart < this.entityStart) { this.cbs.onattribdata(this.sectionStart, this.entityStart) } - this.sectionStart = this.entityStart + consumed + this.recordStart(this.entityStart + consumed) this.index = this.sectionStart - 1 this.cbs.onattribentity(cp) @@ -791,7 +813,7 @@ export default class Tokenizer { if (this.sectionStart < this.entityStart) { this.cbs.ontext(this.sectionStart, this.entityStart) } - this.sectionStart = this.entityStart + consumed + this.recordStart(this.entityStart + consumed) this.index = this.sectionStart - 1 this.cbs.ontextentity(cp, this.sectionStart) diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts index d12af24e3..2c7cf540d 100644 --- a/packages/compiler-core/src/parser/index.ts +++ b/packages/compiler-core/src/parser/index.ts @@ -135,16 +135,11 @@ const tokenizer = new Tokenizer( { decodeEntities: true }, { ontext(start, end) { - const content = getSlice(start, end) - endIndex = end - 1 - onText(content) - startIndex = end + onText(getSlice(start, end), start, end) }, ontextentity(cp, end) { - endIndex = end - 1 - onText(fromCodePoint(cp)) - startIndex = end + onText(fromCodePoint(cp), end - 1, end) }, onopentagname(start, end) { @@ -206,7 +201,7 @@ const tokenizer = new Tokenizer( onattribentity(codepoint) { attribvalue += fromCodePoint(codepoint) }, - onattribend(quote, end) { + onattribend(_quote, end) { endIndex = end if (attribs && !hasOwn(attribs, attribname)) { // TODO gen attributes AST nodes @@ -299,7 +294,7 @@ function endOpenTag(isImplied: boolean) { tagname = '' } -function onText(content: string) { +function onText(content: string, start: number, end: number) { const parent = getParent() const lastNode = parent.children[parent.children.length - 1] if (lastNode?.type === NodeTypes.TEXT) { @@ -310,8 +305,15 @@ function onText(content: string) { parent.children.push({ type: NodeTypes.TEXT, content, - // @ts-ignore TODO - loc: {} + loc: { + start: { + offset: start, + line: tokenizer.startLine, + column: tokenizer.startColumn + }, + end: { offset: end, line: tokenizer.line, column: tokenizer.column }, + source: content + } }) } } @@ -327,8 +329,13 @@ function onOpenTag(tag: string) { // TODO props props: [], children: [], - // @ts-ignore TODO - loc: {}, + loc: { + // @ts-expect-error TODO + start: {}, + // @ts-expect-error TODO + end: { offset: endIndex }, + source: '' + }, codegenNode: undefined } addNode(el) @@ -338,14 +345,25 @@ function onOpenTag(tag: string) { function onCloseTag() { const el = elementStack.pop()! // whitepsace management - const nodes = el.children + el.children = condenseWhitespace(el.children) +} + +const windowsNewlineRE = /\r\n/g +const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g +const nonWhitespaceRE = /[^\t\r\n\f ]/ + +function isEmptyText(content: string) { + return !nonWhitespaceRE.test(content) +} + +function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] { const shouldCondense = currentOptions.whitespace !== 'preserve' let removedWhitespace = false for (let i = 0; i < nodes.length; i++) { const node = nodes[i] if (node.type === NodeTypes.TEXT) { if (!inPre) { - if (!/[^\t\r\n\f ]/.test(node.content)) { + if (isEmptyText(node.content)) { const prev = nodes[i - 1] const next = nodes[i + 1] // Remove if: @@ -376,19 +394,17 @@ function onCloseTag() { } else if (shouldCondense) { // in condense mode, consecutive whitespaces in text are condensed // down to a single space. - node.content = node.content.replace(/[\t\r\n\f ]+/g, ' ') + node.content = node.content.replace(consecutiveWhitespaceRE, ' ') } } else { // #6410 normalize windows newlines in
:
         // in SSR, browsers normalize server-rendered \r\n into a single \n
         // in the DOM
-        node.content = node.content.replace(/\r\n/g, '\n')
+        node.content = node.content.replace(windowsNewlineRE, '\n')
       }
     }
   }
-  if (removedWhitespace) {
-    el.children = nodes.filter(Boolean)
-  }
+  return removedWhitespace ? nodes.filter(Boolean) : nodes
 }
 
 function addNode(node: TemplateChildNode) {
@@ -418,12 +434,11 @@ export function baseParse(
   options: ParserOptions = {}
 ): RootNode {
   reset()
-  currentInput = input.trim()
+  currentInput = input
   currentOptions = options
   htmlMode = !!options.htmlMode
   const root = (currentRoot = createRoot([]))
   tokenizer.parse(currentInput)
-  // temp hack for ts
-  console.log(endIndex)
+  root.children = condenseWhitespace(root.children)
   return root
 }