wip: save

2023-11-14 01:14:33 +08:00 · 2023-11-14 01:14:33 +08:00 · 6311ebbf32
parent 19bd714239
commit 6311ebbf32
2 changed files with 91 additions and 54 deletions
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@ -171,6 +171,11 @@ export default class Tokenizer {
  private readonly decodeEntities: boolean
  private readonly entityDecoder: EntityDecoder

+  public line = 1
+  public column = 1
+  public startLine = 1
+  public startColumn = 1
+
  constructor(
    { decodeEntities = true }: { decodeEntities?: boolean },
    private readonly cbs: Callbacks
@ -184,12 +189,22 @@ export default class Tokenizer {
  public reset(): void {
    this.state = State.Text
    this.buffer = ''
-    this.sectionStart = 0
+    this.recordStart(0)
    this.index = 0
+    this.line = 1
+    this.column = 1
+    this.startLine = 1
+    this.startColumn = 1
    this.baseState = State.Text
    this.currentSequence = undefined!
  }

+  private recordStart(start = this.index) {
+    this.sectionStart = start
+    this.startLine = this.line
+    this.startColumn = this.column + (start - this.index)
+  }
+
  private stateText(c: number): void {
    if (
      c === CharCodes.Lt ||
@ -199,7 +214,7 @@ export default class Tokenizer {
        this.cbs.ontext(this.sectionStart, this.index)
      }
      this.state = State.BeforeTagName
-      this.sectionStart = this.index
+      this.recordStart()
    } else if (this.decodeEntities && c === CharCodes.Amp) {
      this.startEntity()
    }
@ -242,7 +257,7 @@ export default class Tokenizer {
        }

        this.isSpecial = false
-        this.sectionStart = endOfText + 2 // Skip over the `</`
+        this.recordStart(endOfText + 2) // Skip over the `</`
        this.stateInClosingTagName(c)
        return // We are done; skip the rest of the function.
      }
@ -274,7 +289,7 @@ export default class Tokenizer {
        this.state = State.InCommentLike
        this.currentSequence = Sequences.CdataEnd
        this.sequenceIndex = 0
-        this.sectionStart = this.index + 1
+        this.recordStart(this.index + 1)
      }
    } else {
      this.sequenceIndex = 0
@ -325,7 +340,7 @@ export default class Tokenizer {
        }

        this.sequenceIndex = 0
-        this.sectionStart = this.index + 1
+        this.recordStart(this.index + 1)
        this.state = State.Text
      }
    } else if (this.sequenceIndex === 0) {
@ -359,13 +374,13 @@ export default class Tokenizer {
  private stateBeforeTagName(c: number): void {
    if (c === CharCodes.ExclamationMark) {
      this.state = State.BeforeDeclaration
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    } else if (c === CharCodes.Questionmark) {
      this.state = State.InProcessingInstruction
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    } else if (this.isTagStartChar(c)) {
      const lower = c | 0x20
-      this.sectionStart = this.index
+      this.recordStart()
      if (lower === Sequences.TitleEnd[2]) {
        this.startSpecial(Sequences.TitleEnd, 3)
      } else {
@ -384,7 +399,7 @@ export default class Tokenizer {
  private stateInTagName(c: number): void {
    if (isEndOfTagSection(c)) {
      this.cbs.onopentagname(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
      this.state = State.BeforeAttributeName
      this.stateBeforeAttributeName(c)
    }
@ -398,13 +413,13 @@ export default class Tokenizer {
      this.state = this.isTagStartChar(c)
        ? State.InClosingTagName
        : State.InSpecialComment
-      this.sectionStart = this.index
+      this.recordStart()
    }
  }
  private stateInClosingTagName(c: number): void {
    if (c === CharCodes.Gt || isWhitespace(c)) {
      this.cbs.onclosetag(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
      this.state = State.AfterClosingTagName
      this.stateAfterClosingTagName(c)
    }
@ -413,7 +428,7 @@ export default class Tokenizer {
    // Skip everything until ">"
    if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
      this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    }
  }
  private stateBeforeAttributeName(c: number): void {
@ -425,19 +440,19 @@ export default class Tokenizer {
      } else {
        this.state = State.Text
      }
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    } else if (c === CharCodes.Slash) {
      this.state = State.InSelfClosingTag
    } else if (!isWhitespace(c)) {
      this.state = State.InAttributeName
-      this.sectionStart = this.index
+      this.recordStart()
    }
  }
  private stateInSelfClosingTag(c: number): void {
    if (c === CharCodes.Gt) {
      this.cbs.onselfclosingtag(this.index)
      this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
      this.isSpecial = false // Reset special state, in case of self-closing special tags
    } else if (!isWhitespace(c)) {
      this.state = State.BeforeAttributeName
@ -447,7 +462,7 @@ export default class Tokenizer {
  private stateInAttributeName(c: number): void {
    if (c === CharCodes.Eq || isEndOfTagSection(c)) {
      this.cbs.onattribname(this.sectionStart, this.index)
-      this.sectionStart = this.index
+      this.recordStart()
      this.state = State.AfterAttributeName
      this.stateAfterAttributeName(c)
    }
@ -457,24 +472,24 @@ export default class Tokenizer {
      this.state = State.BeforeAttributeValue
    } else if (c === CharCodes.Slash || c === CharCodes.Gt) {
      this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
-      this.sectionStart = -1
+      this.recordStart(-1)
      this.state = State.BeforeAttributeName
      this.stateBeforeAttributeName(c)
    } else if (!isWhitespace(c)) {
      this.cbs.onattribend(QuoteType.NoValue, this.sectionStart)
      this.state = State.InAttributeName
-      this.sectionStart = this.index
+      this.recordStart()
    }
  }
  private stateBeforeAttributeValue(c: number): void {
    if (c === CharCodes.DoubleQuote) {
      this.state = State.InAttributeValueDq
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    } else if (c === CharCodes.SingleQuote) {
      this.state = State.InAttributeValueSq
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    } else if (!isWhitespace(c)) {
-      this.sectionStart = this.index
+      this.recordStart()
      this.state = State.InAttributeValueNq
      this.stateInAttributeValueNoQuotes(c) // Reconsume token
    }
@ -482,7 +497,7 @@ export default class Tokenizer {
  private handleInAttributeValue(c: number, quote: number) {
    if (c === quote || (!this.decodeEntities && this.fastForwardTo(quote))) {
      this.cbs.onattribdata(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
      this.cbs.onattribend(
        quote === CharCodes.DoubleQuote ? QuoteType.Double : QuoteType.Single,
        this.index + 1
@ -501,7 +516,7 @@ export default class Tokenizer {
  private stateInAttributeValueNoQuotes(c: number): void {
    if (isWhitespace(c) || c === CharCodes.Gt) {
      this.cbs.onattribdata(this.sectionStart, this.index)
-      this.sectionStart = -1
+      this.recordStart(-1)
      this.cbs.onattribend(QuoteType.Unquoted, this.index)
      this.state = State.BeforeAttributeName
      this.stateBeforeAttributeName(c)
@ -522,14 +537,14 @@ export default class Tokenizer {
    if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
      this.cbs.ondeclaration(this.sectionStart, this.index)
      this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    }
  }
  private stateInProcessingInstruction(c: number): void {
    if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
      this.cbs.onprocessinginstruction(this.sectionStart, this.index)
      this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    }
  }
  private stateBeforeComment(c: number): void {
@ -538,7 +553,7 @@ export default class Tokenizer {
      this.currentSequence = Sequences.CommentEnd
      // Allow short comments (eg. <!-->)
      this.sequenceIndex = 2
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    } else {
      this.state = State.InDeclaration
    }
@ -547,7 +562,7 @@ export default class Tokenizer {
    if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
      this.cbs.oncomment(this.sectionStart, this.index, 0)
      this.state = State.Text
-      this.sectionStart = this.index + 1
+      this.recordStart(this.index + 1)
    }
  }
  private stateBeforeSpecialS(c: number): void {
@ -701,6 +716,13 @@ export default class Tokenizer {
        }
      }
      this.index++
+      // line / column handling
+      if (c === CharCodes.NewLine) {
+        this.line++
+        this.column = 1
+      } else {
+        this.column++
+      }
    }
    this.cleanup()
    this.finish()
@ -717,14 +739,14 @@ export default class Tokenizer {
        (this.state === State.InSpecialTag && this.sequenceIndex === 0)
      ) {
        this.cbs.ontext(this.sectionStart, this.index)
-        this.sectionStart = this.index
+        this.recordStart()
      } else if (
        this.state === State.InAttributeValueDq ||
        this.state === State.InAttributeValueSq ||
        this.state === State.InAttributeValueNq
      ) {
        this.cbs.onattribdata(this.sectionStart, this.index)
-        this.sectionStart = this.index
+        this.recordStart()
      }
    }
  }
@ -783,7 +805,7 @@ export default class Tokenizer {
      if (this.sectionStart < this.entityStart) {
        this.cbs.onattribdata(this.sectionStart, this.entityStart)
      }
-      this.sectionStart = this.entityStart + consumed
+      this.recordStart(this.entityStart + consumed)
      this.index = this.sectionStart - 1

      this.cbs.onattribentity(cp)
@ -791,7 +813,7 @@ export default class Tokenizer {
      if (this.sectionStart < this.entityStart) {
        this.cbs.ontext(this.sectionStart, this.entityStart)
      }
-      this.sectionStart = this.entityStart + consumed
+      this.recordStart(this.entityStart + consumed)
      this.index = this.sectionStart - 1

      this.cbs.ontextentity(cp, this.sectionStart)
--- a/packages/compiler-core/src/parser/index.ts
+++ b/packages/compiler-core/src/parser/index.ts
@ -135,16 +135,11 @@ const tokenizer = new Tokenizer(
  { decodeEntities: true },
  {
    ontext(start, end) {
-      const content = getSlice(start, end)
-      endIndex = end - 1
-      onText(content)
-      startIndex = end
+      onText(getSlice(start, end), start, end)
    },

    ontextentity(cp, end) {
-      endIndex = end - 1
-      onText(fromCodePoint(cp))
-      startIndex = end
+      onText(fromCodePoint(cp), end - 1, end)
    },

    onopentagname(start, end) {
@ -206,7 +201,7 @@ const tokenizer = new Tokenizer(
    onattribentity(codepoint) {
      attribvalue += fromCodePoint(codepoint)
    },
-    onattribend(quote, end) {
+    onattribend(_quote, end) {
      endIndex = end
      if (attribs && !hasOwn(attribs, attribname)) {
        // TODO gen attributes AST nodes
@ -299,7 +294,7 @@ function endOpenTag(isImplied: boolean) {
  tagname = ''
 }

-function onText(content: string) {
+function onText(content: string, start: number, end: number) {
  const parent = getParent()
  const lastNode = parent.children[parent.children.length - 1]
  if (lastNode?.type === NodeTypes.TEXT) {
@ -310,8 +305,15 @@ function onText(content: string) {
    parent.children.push({
      type: NodeTypes.TEXT,
      content,
-      // @ts-ignore TODO
-      loc: {}
+      loc: {
+        start: {
+          offset: start,
+          line: tokenizer.startLine,
+          column: tokenizer.startColumn
+        },
+        end: { offset: end, line: tokenizer.line, column: tokenizer.column },
+        source: content
+      }
    })
  }
 }
@ -327,8 +329,13 @@ function onOpenTag(tag: string) {
    // TODO props
    props: [],
    children: [],
-    // @ts-ignore TODO
-    loc: {},
+    loc: {
+      // @ts-expect-error TODO
+      start: {},
+      // @ts-expect-error TODO
+      end: { offset: endIndex },
+      source: ''
+    },
    codegenNode: undefined
  }
  addNode(el)
@ -338,14 +345,25 @@ function onOpenTag(tag: string) {
 function onCloseTag() {
  const el = elementStack.pop()!
  // whitepsace management
-  const nodes = el.children
+  el.children = condenseWhitespace(el.children)
+}
+
+const windowsNewlineRE = /\r\n/g
+const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g
+const nonWhitespaceRE = /[^\t\r\n\f ]/
+
+function isEmptyText(content: string) {
+  return !nonWhitespaceRE.test(content)
+}
+
+function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
  const shouldCondense = currentOptions.whitespace !== 'preserve'
  let removedWhitespace = false
  for (let i = 0; i < nodes.length; i++) {
    const node = nodes[i]
    if (node.type === NodeTypes.TEXT) {
      if (!inPre) {
-        if (!/[^\t\r\n\f ]/.test(node.content)) {
+        if (isEmptyText(node.content)) {
          const prev = nodes[i - 1]
          const next = nodes[i + 1]
          // Remove if:
@ -376,19 +394,17 @@ function onCloseTag() {
        } else if (shouldCondense) {
          // in condense mode, consecutive whitespaces in text are condensed
          // down to a single space.
-          node.content = node.content.replace(/[\t\r\n\f ]+/g, ' ')
+          node.content = node.content.replace(consecutiveWhitespaceRE, ' ')
        }
      } else {
        // #6410 normalize windows newlines in <pre>:
        // in SSR, browsers normalize server-rendered \r\n into a single \n
        // in the DOM
-        node.content = node.content.replace(/\r\n/g, '\n')
+        node.content = node.content.replace(windowsNewlineRE, '\n')
      }
    }
  }
-  if (removedWhitespace) {
-    el.children = nodes.filter(Boolean)
-  }
+  return removedWhitespace ? nodes.filter(Boolean) : nodes
 }

 function addNode(node: TemplateChildNode) {
@ -418,12 +434,11 @@ export function baseParse(
  options: ParserOptions = {}
 ): RootNode {
  reset()
-  currentInput = input.trim()
+  currentInput = input
  currentOptions = options
  htmlMode = !!options.htmlMode
  const root = (currentRoot = createRoot([]))
  tokenizer.parse(currentInput)
-  // temp hack for ts
-  console.log(endIndex)
+  root.children = condenseWhitespace(root.children)
  return root
 }