|
@@ -1,275 +0,0 @@
|
|
|
-import { startsWith, endsWith } from 'lodash'
|
|
|
-import type { Token } from './types'
|
|
|
-import { childlessTags } from './tags'
|
|
|
-
|
|
|
-interface State {
|
|
|
- str: string
|
|
|
- position: number
|
|
|
- tokens: Token[]
|
|
|
-}
|
|
|
-
|
|
|
-const jumpPosition = (state: State, end: number) => {
|
|
|
- const len = end - state.position
|
|
|
- movePositopn(state, len)
|
|
|
-}
|
|
|
-
|
|
|
-const movePositopn = (state: State, len: number) => {
|
|
|
- state.position = state.position + len
|
|
|
-}
|
|
|
-
|
|
|
-const findTextEnd = (str: string, index: number) => {
|
|
|
- const isEnd = false
|
|
|
- while (!isEnd) {
|
|
|
- const textEnd = str.indexOf('<', index)
|
|
|
- if (textEnd === -1) {
|
|
|
- return textEnd
|
|
|
- }
|
|
|
- const char = str.charAt(textEnd + 1)
|
|
|
- if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) {
|
|
|
- return textEnd
|
|
|
- }
|
|
|
- index = textEnd + 1
|
|
|
- }
|
|
|
- return -1
|
|
|
-}
|
|
|
-
|
|
|
-const lexText = (state: State) => {
|
|
|
- const { str } = state
|
|
|
- let textEnd = findTextEnd(str, state.position)
|
|
|
- if (textEnd === state.position) return
|
|
|
- if (textEnd === -1) {
|
|
|
- textEnd = str.length
|
|
|
- }
|
|
|
-
|
|
|
- const content = str.slice(state.position, textEnd)
|
|
|
- jumpPosition(state, textEnd)
|
|
|
-
|
|
|
- state.tokens.push({
|
|
|
- type: 'text',
|
|
|
- content,
|
|
|
- })
|
|
|
-}
|
|
|
-
|
|
|
-const lexComment = (state: State) => {
|
|
|
- const { str } = state
|
|
|
-
|
|
|
- movePositopn(state, 4)
|
|
|
- let contentEnd = str.indexOf('-->', state.position)
|
|
|
- let commentEnd = contentEnd + 3
|
|
|
- if (contentEnd === -1) {
|
|
|
- contentEnd = commentEnd = str.length
|
|
|
- }
|
|
|
-
|
|
|
- const content = str.slice(state.position, contentEnd)
|
|
|
- jumpPosition(state, commentEnd)
|
|
|
-
|
|
|
- state.tokens.push({
|
|
|
- type: 'comment',
|
|
|
- content,
|
|
|
- })
|
|
|
-}
|
|
|
-
|
|
|
-const lexTagName = (state: State) => {
|
|
|
- const { str } = state
|
|
|
- const len = str.length
|
|
|
- let start = state.position
|
|
|
-
|
|
|
- while (start < len) {
|
|
|
- const char = str.charAt(start)
|
|
|
- const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
|
|
|
- if (isTagChar) break
|
|
|
- start++
|
|
|
- }
|
|
|
-
|
|
|
- let end = start + 1
|
|
|
- while (end < len) {
|
|
|
- const char = str.charAt(end)
|
|
|
- const isTagChar = !(/\s/.test(char) || char === '/' || char === '>')
|
|
|
- if (!isTagChar) break
|
|
|
- end++
|
|
|
- }
|
|
|
-
|
|
|
- jumpPosition(state, end)
|
|
|
- const tagName = str.slice(start, end)
|
|
|
- state.tokens.push({
|
|
|
- type: 'tag',
|
|
|
- content: tagName
|
|
|
- })
|
|
|
- return tagName
|
|
|
-}
|
|
|
-
|
|
|
-const lexTagAttributes = (state: State) => {
|
|
|
- const { str, tokens } = state
|
|
|
- let cursor = state.position
|
|
|
- let quote = null
|
|
|
- let wordBegin = cursor
|
|
|
- const words = []
|
|
|
- const len = str.length
|
|
|
- while (cursor < len) {
|
|
|
- const char = str.charAt(cursor)
|
|
|
- if (quote) {
|
|
|
- const isQuoteEnd = char === quote
|
|
|
- if (isQuoteEnd) quote = null
|
|
|
- cursor++
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- const isTagEnd = char === '/' || char === '>'
|
|
|
- if (isTagEnd) {
|
|
|
- if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
|
|
|
- break
|
|
|
- }
|
|
|
-
|
|
|
- const isWordEnd = /\s/.test(char)
|
|
|
- if (isWordEnd) {
|
|
|
- if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor))
|
|
|
- wordBegin = cursor + 1
|
|
|
- cursor++
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- const isQuoteStart = char === '\'' || char === '"'
|
|
|
- if (isQuoteStart) {
|
|
|
- quote = char
|
|
|
- cursor++
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- cursor++
|
|
|
- }
|
|
|
- jumpPosition(state, cursor)
|
|
|
-
|
|
|
- const type = 'attribute'
|
|
|
- for (let i = 0; i < words.length; i++) {
|
|
|
- const word = words[i]
|
|
|
-
|
|
|
- const isNotPair = word.indexOf('=') === -1
|
|
|
- if (isNotPair) {
|
|
|
- const secondWord = words[i + 1]
|
|
|
- if (secondWord && startsWith(secondWord, '=')) {
|
|
|
- if (secondWord.length > 1) {
|
|
|
- const newWord = word + secondWord
|
|
|
- tokens.push({ type, content: newWord })
|
|
|
- i += 1
|
|
|
- continue
|
|
|
- }
|
|
|
- const thirdWord = words[i + 2]
|
|
|
- i += 1
|
|
|
- if (thirdWord) {
|
|
|
- const newWord = word + '=' + thirdWord
|
|
|
- tokens.push({ type, content: newWord })
|
|
|
- i += 1
|
|
|
- continue
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (endsWith(word, '=')) {
|
|
|
- const secondWord = words[i + 1]
|
|
|
- if (secondWord && secondWord.indexOf('=') === -1) {
|
|
|
- const newWord = word + secondWord
|
|
|
- tokens.push({ type, content: newWord })
|
|
|
- i += 1
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- const newWord = word.slice(0, -1)
|
|
|
- tokens.push({ type, content: newWord })
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- tokens.push({ type, content: word })
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-const lexSkipTag = (tagName: string, state: State) => {
|
|
|
- const { str, tokens } = state
|
|
|
- const safeTagName = tagName.toLowerCase()
|
|
|
- const len = str.length
|
|
|
- let index = state.position
|
|
|
-
|
|
|
- while (index < len) {
|
|
|
- const nextTag = str.indexOf('</', index)
|
|
|
- if (nextTag === -1) {
|
|
|
- lexText(state)
|
|
|
- break
|
|
|
- }
|
|
|
-
|
|
|
- const tagState = {
|
|
|
- str,
|
|
|
- position: state.position,
|
|
|
- tokens: [],
|
|
|
- }
|
|
|
- jumpPosition(tagState, nextTag)
|
|
|
- const name = lexTag(tagState)
|
|
|
- if (safeTagName !== name.toLowerCase()) {
|
|
|
- index = tagState.position
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- if (nextTag !== state.position) {
|
|
|
- const textStart = state.position
|
|
|
- jumpPosition(state, nextTag)
|
|
|
- tokens.push({
|
|
|
- type: 'text',
|
|
|
- content: str.slice(textStart, nextTag),
|
|
|
- })
|
|
|
- }
|
|
|
-
|
|
|
- tokens.push(...tagState.tokens)
|
|
|
- jumpPosition(state, tagState.position)
|
|
|
- break
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-const lexTag = (state: State) => {
|
|
|
- const { str } = state
|
|
|
- const secondChar = str.charAt(state.position + 1)
|
|
|
- const tagStartClose = secondChar === '/'
|
|
|
- movePositopn(state, tagStartClose ? 2 : 1)
|
|
|
- state.tokens.push({
|
|
|
- type: 'tag-start',
|
|
|
- close: tagStartClose,
|
|
|
- })
|
|
|
-
|
|
|
- const tagName = lexTagName(state)
|
|
|
- lexTagAttributes(state)
|
|
|
-
|
|
|
- const firstChar = str.charAt(state.position)
|
|
|
- const tagEndClose = firstChar === '/'
|
|
|
- movePositopn(state, tagEndClose ? 2 : 1)
|
|
|
- state.tokens.push({
|
|
|
- type: 'tag-end',
|
|
|
- close: tagEndClose,
|
|
|
- })
|
|
|
- return tagName
|
|
|
-}
|
|
|
-
|
|
|
-const lex = (state: State) => {
|
|
|
- const str = state.str
|
|
|
- const len = str.length
|
|
|
-
|
|
|
- while (state.position < len) {
|
|
|
- const start = state.position
|
|
|
- lexText(state)
|
|
|
-
|
|
|
- if (state.position === start) {
|
|
|
- const isComment = startsWith(str, '!--', start + 1)
|
|
|
- if (isComment) lexComment(state)
|
|
|
- else {
|
|
|
- const tagName = lexTag(state)
|
|
|
- const safeTag = tagName.toLowerCase()
|
|
|
- if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-export const lexer = (str: string): Token[] => {
|
|
|
- const state = {
|
|
|
- str,
|
|
|
- position: 0,
|
|
|
- tokens: [],
|
|
|
- }
|
|
|
- lex(state)
|
|
|
- return state.tokens
|
|
|
-}
|