diff options
| author | Shipwreckt <me@shipwreckt.co.uk> | 2025-10-31 20:02:14 +0000 |
|---|---|---|
| committer | Shipwreckt <me@shipwreckt.co.uk> | 2025-10-31 20:02:14 +0000 |
| commit | 7a52ddeba2a68388b544f529d2d92104420f77b0 (patch) | |
| tree | 15ddd47457a2cb4a96060747437d36474e4f6b4e /node_modules/htmlparser2/lib/Tokenizer.js | |
| parent | 53d6ae2b5568437afa5e4995580a3fb679b7b91b (diff) | |
Changed from static to 11ty!
Diffstat (limited to 'node_modules/htmlparser2/lib/Tokenizer.js')
| -rw-r--r-- | node_modules/htmlparser2/lib/Tokenizer.js | 821 |
1 files changed, 821 insertions, 0 deletions
diff --git a/node_modules/htmlparser2/lib/Tokenizer.js b/node_modules/htmlparser2/lib/Tokenizer.js new file mode 100644 index 0000000..6b3579b --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.js @@ -0,0 +1,821 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint")); +var decode_1 = require("entities/lib/decode"); +function isWhitespace(c) { + return (c === 32 /* Space */ || + c === 10 /* NewLine */ || + c === 9 /* Tab */ || + c === 12 /* FormFeed */ || + c === 13 /* CarriageReturn */); +} +function isEndOfTagSection(c) { + return c === 47 /* Slash */ || c === 62 /* Gt */ || isWhitespace(c); +} +function isNumber(c) { + return c >= 48 /* Zero */ && c <= 57 /* Nine */; +} +function isASCIIAlpha(c) { + return ((c >= 97 /* LowerA */ && c <= 122 /* LowerZ */) || + (c >= 65 /* UpperA */ && c <= 90 /* UpperZ */)); +} +/** + * Sequences used to match longer strings. + * + * We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End + * sequences with an increased offset. + */ +var Sequences = { + Cdata: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), + CdataEnd: new Uint16Array([0x5d, 0x5d, 0x3e]), + CommentEnd: new Uint16Array([0x2d, 0x2d, 0x3e]), + ScriptEnd: new Uint16Array([ + 0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, + ]), + StyleEnd: new Uint16Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), + TitleEnd: new Uint16Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title` +}; +var Tokenizer = /** @class */ (function () { + function Tokenizer(_a, cbs) { + var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c; + this.cbs = cbs; + /** The current state the tokenizer is in. */ + this._state = 1 /* Text */; + /** The read buffer. */ + this.buffer = ""; + /** The beginning of the section that is currently being read. */ + this.sectionStart = 0; + /** The index within the buffer that we are currently looking at. */ + this._index = 0; + /** + * Data that has already been processed will be removed from the buffer occasionally. + * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. + */ + this.bufferOffset = 0; + /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ + this.baseState = 1 /* Text */; + /** For special parsing behavior inside of script and style tags. */ + this.isSpecial = false; + /** Indicates whether the tokenizer has been paused. */ + this.running = true; + /** Indicates whether the tokenizer has finished running / `.end` has been called. */ + this.ended = false; + this.sequenceIndex = 0; + this.trieIndex = 0; + this.trieCurrent = 0; + this.trieResult = null; + this.entityExcess = 0; + this.xmlMode = xmlMode; + this.decodeEntities = decodeEntities; + this.entityTrie = xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree; + } + Tokenizer.prototype.reset = function () { + this._state = 1 /* Text */; + this.buffer = ""; + this.sectionStart = 0; + this._index = 0; + this.bufferOffset = 0; + this.baseState = 1 /* Text */; + this.currentSequence = undefined; + this.running = true; + this.ended = false; + }; + Tokenizer.prototype.write = function (chunk) { + if (this.ended) + return this.cbs.onerror(Error(".write() after done!")); + this.buffer += chunk; + this.parse(); + }; + Tokenizer.prototype.end = function (chunk) { + if (this.ended) + return this.cbs.onerror(Error(".end() after done!")); + if (chunk) + this.write(chunk); + this.ended = true; + if (this.running) + this.finish(); + }; + Tokenizer.prototype.pause = function () { + this.running = false; + }; + Tokenizer.prototype.resume = function () { + this.running = true; + if (this._index < this.buffer.length) { + this.parse(); + } + if (this.ended) { + this.finish(); + } + }; + /** + * The start of the current section. + */ + Tokenizer.prototype.getAbsoluteSectionStart = function () { + return this.sectionStart + this.bufferOffset; + }; + /** + * The current index within all of the written data. + */ + Tokenizer.prototype.getAbsoluteIndex = function () { + return this.bufferOffset + this._index; + }; + Tokenizer.prototype.stateText = function (c) { + if (c === 60 /* Lt */ || + (!this.decodeEntities && this.fastForwardTo(60 /* Lt */))) { + if (this._index > this.sectionStart) { + this.cbs.ontext(this.getSection()); + } + this._state = 2 /* BeforeTagName */; + this.sectionStart = this._index; + } + else if (this.decodeEntities && c === 38 /* Amp */) { + this._state = 25 /* BeforeEntity */; + } + }; + Tokenizer.prototype.stateSpecialStartSequence = function (c) { + var isEnd = this.sequenceIndex === this.currentSequence.length; + var isMatch = isEnd + ? // If we are at the end of the sequence, make sure the tag name has ended + isEndOfTagSection(c) + : // Otherwise, do a case-insensitive comparison + (c | 0x20) === this.currentSequence[this.sequenceIndex]; + if (!isMatch) { + this.isSpecial = false; + } + else if (!isEnd) { + this.sequenceIndex++; + return; + } + this.sequenceIndex = 0; + this._state = 3 /* InTagName */; + this.stateInTagName(c); + }; + /** Look for an end tag. For <title> tags, also decode entities. */ + Tokenizer.prototype.stateInSpecialTag = function (c) { + if (this.sequenceIndex === this.currentSequence.length) { + if (c === 62 /* Gt */ || isWhitespace(c)) { + var endOfText = this._index - this.currentSequence.length; + if (this.sectionStart < endOfText) { + // Spoof the index so that reported locations match up. + var actualIndex = this._index; + this._index = endOfText; + this.cbs.ontext(this.getSection()); + this._index = actualIndex; + } + this.isSpecial = false; + this.sectionStart = endOfText + 2; // Skip over the `</` + this.stateInClosingTagName(c); + return; // We are done; skip the rest of the function. + } + this.sequenceIndex = 0; + } + if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) { + this.sequenceIndex += 1; + } + else if (this.sequenceIndex === 0) { + if (this.currentSequence === Sequences.TitleEnd) { + // We have to parse entities in <title> tags. + if (this.decodeEntities && c === 38 /* Amp */) { + this._state = 25 /* BeforeEntity */; + } + } + else if (this.fastForwardTo(60 /* Lt */)) { + // Outside of <title> tags, we can fast-forward. + this.sequenceIndex = 1; + } + } + else { + // If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`. + this.sequenceIndex = Number(c === 60 /* Lt */); + } + }; + Tokenizer.prototype.stateCDATASequence = function (c) { + if (c === Sequences.Cdata[this.sequenceIndex]) { + if (++this.sequenceIndex === Sequences.Cdata.length) { + this._state = 21 /* InCommentLike */; + this.currentSequence = Sequences.CdataEnd; + this.sequenceIndex = 0; + this.sectionStart = this._index + 1; + } + } + else { + this.sequenceIndex = 0; + this._state = 16 /* InDeclaration */; + this.stateInDeclaration(c); // Reconsume the character + } + }; + /** + * When we wait for one specific character, we can speed things up + * by skipping through the buffer until we find it. + * + * @returns Whether the character was found. + */ + Tokenizer.prototype.fastForwardTo = function (c) { + while (++this._index < this.buffer.length) { + if (this.buffer.charCodeAt(this._index) === c) { + return true; + } + } + /* + * We increment the index at the end of the `parse` loop, + * so set it to `buffer.length - 1` here. + * + * TODO: Refactor `parse` to increment index before calling states. + */ + this._index = this.buffer.length - 1; + return false; + }; + /** + * Comments and CDATA end with `-->` and `]]>`. + * + * Their common qualities are: + * - Their end sequences have a distinct character they start with. + * - That character is then repeated, so we have to check multiple repeats. + * - All characters but the start character of the sequence can be skipped. + */ + Tokenizer.prototype.stateInCommentLike = function (c) { + if (c === this.currentSequence[this.sequenceIndex]) { + if (++this.sequenceIndex === this.currentSequence.length) { + // Remove 2 trailing chars + var section = this.buffer.slice(this.sectionStart, this._index - 2); + if (this.currentSequence === Sequences.CdataEnd) { + this.cbs.oncdata(section); + } + else { + this.cbs.oncomment(section); + } + this.sequenceIndex = 0; + this.sectionStart = this._index + 1; + this._state = 1 /* Text */; + } + } + else if (this.sequenceIndex === 0) { + // Fast-forward to the first character of the sequence + if (this.fastForwardTo(this.currentSequence[0])) { + this.sequenceIndex = 1; + } + } + else if (c !== this.currentSequence[this.sequenceIndex - 1]) { + // Allow long sequences, eg. --->, ]]]> + this.sequenceIndex = 0; + } + }; + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + Tokenizer.prototype.isTagStartChar = function (c) { + return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c); + }; + Tokenizer.prototype.startSpecial = function (sequence, offset) { + this.isSpecial = true; + this.currentSequence = sequence; + this.sequenceIndex = offset; + this._state = 23 /* SpecialStartSequence */; + }; + Tokenizer.prototype.stateBeforeTagName = function (c) { + if (c === 33 /* ExclamationMark */) { + this._state = 15 /* BeforeDeclaration */; + this.sectionStart = this._index + 1; + } + else if (c === 63 /* Questionmark */) { + this._state = 17 /* InProcessingInstruction */; + this.sectionStart = this._index + 1; + } + else if (this.isTagStartChar(c)) { + var lower = c | 0x20; + this.sectionStart = this._index; + if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { + this.startSpecial(Sequences.TitleEnd, 3); + } + else { + this._state = + !this.xmlMode && lower === Sequences.ScriptEnd[2] + ? 22 /* BeforeSpecialS */ + : 3 /* InTagName */; + } + } + else if (c === 47 /* Slash */) { + this._state = 5 /* BeforeClosingTagName */; + } + else { + this._state = 1 /* Text */; + this.stateText(c); + } + }; + Tokenizer.prototype.stateInTagName = function (c) { + if (isEndOfTagSection(c)) { + this.cbs.onopentagname(this.getSection()); + this.sectionStart = -1; + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + }; + Tokenizer.prototype.stateBeforeClosingTagName = function (c) { + if (isWhitespace(c)) { + // Ignore + } + else if (c === 62 /* Gt */) { + this._state = 1 /* Text */; + } + else { + this._state = this.isTagStartChar(c) + ? 6 /* InClosingTagName */ + : 20 /* InSpecialComment */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInClosingTagName = function (c) { + if (c === 62 /* Gt */ || isWhitespace(c)) { + this.cbs.onclosetag(this.getSection()); + this.sectionStart = -1; + this._state = 7 /* AfterClosingTagName */; + this.stateAfterClosingTagName(c); + } + }; + Tokenizer.prototype.stateAfterClosingTagName = function (c) { + // Skip everything until ">" + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeAttributeName = function (c) { + if (c === 62 /* Gt */) { + this.cbs.onopentagend(); + if (this.isSpecial) { + this._state = 24 /* InSpecialTag */; + this.sequenceIndex = 0; + } + else { + this._state = 1 /* Text */; + } + this.baseState = this._state; + this.sectionStart = this._index + 1; + } + else if (c === 47 /* Slash */) { + this._state = 4 /* InSelfClosingTag */; + } + else if (!isWhitespace(c)) { + this._state = 9 /* InAttributeName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInSelfClosingTag = function (c) { + if (c === 62 /* Gt */) { + this.cbs.onselfclosingtag(); + this._state = 1 /* Text */; + this.baseState = 1 /* Text */; + this.sectionStart = this._index + 1; + this.isSpecial = false; // Reset special state, in case of self-closing special tags + } + else if (!isWhitespace(c)) { + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + }; + Tokenizer.prototype.stateInAttributeName = function (c) { + if (c === 61 /* Eq */ || isEndOfTagSection(c)) { + this.cbs.onattribname(this.getSection()); + this.sectionStart = -1; + this._state = 10 /* AfterAttributeName */; + this.stateAfterAttributeName(c); + } + }; + Tokenizer.prototype.stateAfterAttributeName = function (c) { + if (c === 61 /* Eq */) { + this._state = 11 /* BeforeAttributeValue */; + } + else if (c === 47 /* Slash */ || c === 62 /* Gt */) { + this.cbs.onattribend(undefined); + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + else if (!isWhitespace(c)) { + this.cbs.onattribend(undefined); + this._state = 9 /* InAttributeName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateBeforeAttributeValue = function (c) { + if (c === 34 /* DoubleQuote */) { + this._state = 12 /* InAttributeValueDq */; + this.sectionStart = this._index + 1; + } + else if (c === 39 /* SingleQuote */) { + this._state = 13 /* InAttributeValueSq */; + this.sectionStart = this._index + 1; + } + else if (!isWhitespace(c)) { + this.sectionStart = this._index; + this._state = 14 /* InAttributeValueNq */; + this.stateInAttributeValueNoQuotes(c); // Reconsume token + } + }; + Tokenizer.prototype.handleInAttributeValue = function (c, quote) { + if (c === quote || + (!this.decodeEntities && this.fastForwardTo(quote))) { + this.cbs.onattribdata(this.getSection()); + this.sectionStart = -1; + this.cbs.onattribend(String.fromCharCode(quote)); + this._state = 8 /* BeforeAttributeName */; + } + else if (this.decodeEntities && c === 38 /* Amp */) { + this.baseState = this._state; + this._state = 25 /* BeforeEntity */; + } + }; + Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) { + this.handleInAttributeValue(c, 34 /* DoubleQuote */); + }; + Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) { + this.handleInAttributeValue(c, 39 /* SingleQuote */); + }; + Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) { + if (isWhitespace(c) || c === 62 /* Gt */) { + this.cbs.onattribdata(this.getSection()); + this.sectionStart = -1; + this.cbs.onattribend(null); + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + else if (this.decodeEntities && c === 38 /* Amp */) { + this.baseState = this._state; + this._state = 25 /* BeforeEntity */; + } + }; + Tokenizer.prototype.stateBeforeDeclaration = function (c) { + if (c === 91 /* OpeningSquareBracket */) { + this._state = 19 /* CDATASequence */; + this.sequenceIndex = 0; + } + else { + this._state = + c === 45 /* Dash */ + ? 18 /* BeforeComment */ + : 16 /* InDeclaration */; + } + }; + Tokenizer.prototype.stateInDeclaration = function (c) { + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this.cbs.ondeclaration(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateInProcessingInstruction = function (c) { + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this.cbs.onprocessinginstruction(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeComment = function (c) { + if (c === 45 /* Dash */) { + this._state = 21 /* InCommentLike */; + this.currentSequence = Sequences.CommentEnd; + // Allow short comments (eg. <!-->) + this.sequenceIndex = 2; + this.sectionStart = this._index + 1; + } + else { + this._state = 16 /* InDeclaration */; + } + }; + Tokenizer.prototype.stateInSpecialComment = function (c) { + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this.cbs.oncomment(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeSpecialS = function (c) { + var lower = c | 0x20; + if (lower === Sequences.ScriptEnd[3]) { + this.startSpecial(Sequences.ScriptEnd, 4); + } + else if (lower === Sequences.StyleEnd[3]) { + this.startSpecial(Sequences.StyleEnd, 4); + } + else { + this._state = 3 /* InTagName */; + this.stateInTagName(c); // Consume the token again + } + }; + Tokenizer.prototype.stateBeforeEntity = function (c) { + // Start excess with 1 to include the '&' + this.entityExcess = 1; + if (c === 35 /* Num */) { + this._state = 26 /* BeforeNumericEntity */; + } + else if (c === 38 /* Amp */) { + // We have two `&` characters in a row. Stay in the current state. + } + else { + this.trieIndex = 0; + this.trieCurrent = this.entityTrie[0]; + this.trieResult = null; + this._state = 27 /* InNamedEntity */; + this.stateInNamedEntity(c); + } + }; + Tokenizer.prototype.stateInNamedEntity = function (c) { + this.entityExcess += 1; + this.trieIndex = (0, decode_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c); + if (this.trieIndex < 0) { + this.emitNamedEntity(); + this._index--; + return; + } + this.trieCurrent = this.entityTrie[this.trieIndex]; + // If the branch is a value, store it and continue + if (this.trieCurrent & decode_1.BinTrieFlags.HAS_VALUE) { + // If we have a legacy entity while parsing strictly, just skip the number of bytes + if (!this.allowLegacyEntity() && c !== 59 /* Semi */) { + // No need to consider multi-byte values, as the legacy entity is always a single byte + this.trieIndex += 1; + } + else { + // Add 1 as we have already incremented the excess + var entityStart = this._index - this.entityExcess + 1; + if (entityStart > this.sectionStart) { + this.emitPartial(this.buffer.substring(this.sectionStart, entityStart)); + } + // If this is a surrogate pair, combine the higher bits from the node with the next byte + this.trieResult = + this.trieCurrent & decode_1.BinTrieFlags.MULTI_BYTE + ? String.fromCharCode(this.entityTrie[++this.trieIndex], this.entityTrie[++this.trieIndex]) + : String.fromCharCode(this.entityTrie[++this.trieIndex]); + this.entityExcess = 0; + this.sectionStart = this._index + 1; + } + } + }; + Tokenizer.prototype.emitNamedEntity = function () { + if (this.trieResult) { + this.emitPartial(this.trieResult); + } + this._state = this.baseState; + }; + Tokenizer.prototype.stateBeforeNumericEntity = function (c) { + if ((c | 0x20) === 120 /* LowerX */) { + this.entityExcess++; + this._state = 29 /* InHexEntity */; + } + else { + this._state = 28 /* InNumericEntity */; + this.stateInNumericEntity(c); + } + }; + Tokenizer.prototype.decodeNumericEntity = function (base, strict) { + var entityStart = this._index - this.entityExcess - 1; + var numberStart = entityStart + 2 + (base >> 4); + if (numberStart !== this._index) { + // Emit leading data if any + if (entityStart > this.sectionStart) { + this.emitPartial(this.buffer.substring(this.sectionStart, entityStart)); + } + // Parse entity + var entity = this.buffer.substring(numberStart, this._index); + var parsed = parseInt(entity, base); + this.emitPartial((0, decode_codepoint_1.default)(parsed)); + this.sectionStart = this._index + Number(strict); + } + this._state = this.baseState; + }; + Tokenizer.prototype.stateInNumericEntity = function (c) { + if (c === 59 /* Semi */) { + this.decodeNumericEntity(10, true); + } + else if (!isNumber(c)) { + if (this.allowLegacyEntity()) { + this.decodeNumericEntity(10, false); + } + else { + this._state = this.baseState; + } + this._index--; + } + else { + this.entityExcess++; + } + }; + Tokenizer.prototype.stateInHexEntity = function (c) { + if (c === 59 /* Semi */) { + this.decodeNumericEntity(16, true); + } + else if ((c < 97 /* LowerA */ || c > 102 /* LowerF */) && + (c < 65 /* UpperA */ || c > 70 /* UpperF */) && + !isNumber(c)) { + if (this.allowLegacyEntity()) { + this.decodeNumericEntity(16, false); + } + else { + this._state = this.baseState; + } + this._index--; + } + else { + this.entityExcess++; + } + }; + Tokenizer.prototype.allowLegacyEntity = function () { + return (!this.xmlMode && + (this.baseState === 1 /* Text */ || + this.baseState === 24 /* InSpecialTag */)); + }; + /** + * Remove data that has already been consumed from the buffer. + */ + Tokenizer.prototype.cleanup = function () { + // If we are inside of text, emit what we already have. + if (this.running && + this.sectionStart !== this._index && + (this._state === 1 /* Text */ || + (this._state === 24 /* InSpecialTag */ && + this.sequenceIndex === 0))) { + // TODO: We could emit attribute data here as well. + this.cbs.ontext(this.buffer.substr(this.sectionStart)); + this.sectionStart = this._index; + } + var start = this.sectionStart < 0 ? this._index : this.sectionStart; + this.buffer = + start === this.buffer.length ? "" : this.buffer.substr(start); + this._index -= start; + this.bufferOffset += start; + if (this.sectionStart > 0) { + this.sectionStart = 0; + } + }; + Tokenizer.prototype.shouldContinue = function () { + return this._index < this.buffer.length && this.running; + }; + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + Tokenizer.prototype.parse = function () { + while (this.shouldContinue()) { + var c = this.buffer.charCodeAt(this._index); + if (this._state === 1 /* Text */) { + this.stateText(c); + } + else if (this._state === 23 /* SpecialStartSequence */) { + this.stateSpecialStartSequence(c); + } + else if (this._state === 24 /* InSpecialTag */) { + this.stateInSpecialTag(c); + } + else if (this._state === 19 /* CDATASequence */) { + this.stateCDATASequence(c); + } + else if (this._state === 12 /* InAttributeValueDq */) { + this.stateInAttributeValueDoubleQuotes(c); + } + else if (this._state === 9 /* InAttributeName */) { + this.stateInAttributeName(c); + } + else if (this._state === 21 /* InCommentLike */) { + this.stateInCommentLike(c); + } + else if (this._state === 20 /* InSpecialComment */) { + this.stateInSpecialComment(c); + } + else if (this._state === 8 /* BeforeAttributeName */) { + this.stateBeforeAttributeName(c); + } + else if (this._state === 3 /* InTagName */) { + this.stateInTagName(c); + } + else if (this._state === 6 /* InClosingTagName */) { + this.stateInClosingTagName(c); + } + else if (this._state === 2 /* BeforeTagName */) { + this.stateBeforeTagName(c); + } + else if (this._state === 10 /* AfterAttributeName */) { + this.stateAfterAttributeName(c); + } + else if (this._state === 13 /* InAttributeValueSq */) { + this.stateInAttributeValueSingleQuotes(c); + } + else if (this._state === 11 /* BeforeAttributeValue */) { + this.stateBeforeAttributeValue(c); + } + else if (this._state === 5 /* BeforeClosingTagName */) { + this.stateBeforeClosingTagName(c); + } + else if (this._state === 7 /* AfterClosingTagName */) { + this.stateAfterClosingTagName(c); + } + else if (this._state === 22 /* BeforeSpecialS */) { + this.stateBeforeSpecialS(c); + } + else if (this._state === 14 /* InAttributeValueNq */) { + this.stateInAttributeValueNoQuotes(c); + } + else if (this._state === 4 /* InSelfClosingTag */) { + this.stateInSelfClosingTag(c); + } + else if (this._state === 16 /* InDeclaration */) { + this.stateInDeclaration(c); + } + else if (this._state === 15 /* BeforeDeclaration */) { + this.stateBeforeDeclaration(c); + } + else if (this._state === 18 /* BeforeComment */) { + this.stateBeforeComment(c); + } + else if (this._state === 17 /* InProcessingInstruction */) { + this.stateInProcessingInstruction(c); + } + else if (this._state === 27 /* InNamedEntity */) { + this.stateInNamedEntity(c); + } + else if (this._state === 25 /* BeforeEntity */) { + this.stateBeforeEntity(c); + } + else if (this._state === 29 /* InHexEntity */) { + this.stateInHexEntity(c); + } + else if (this._state === 28 /* InNumericEntity */) { + this.stateInNumericEntity(c); + } + else { + // `this._state === State.BeforeNumericEntity` + this.stateBeforeNumericEntity(c); + } + this._index++; + } + this.cleanup(); + }; + Tokenizer.prototype.finish = function () { + if (this._state === 27 /* InNamedEntity */) { + this.emitNamedEntity(); + } + // If there is remaining data, emit it in a reasonable way + if (this.sectionStart < this._index) { + this.handleTrailingData(); + } + this.cbs.onend(); + }; + /** Handle any trailing data. */ + Tokenizer.prototype.handleTrailingData = function () { + var data = this.buffer.substr(this.sectionStart); + if (this._state === 21 /* InCommentLike */) { + if (this.currentSequence === Sequences.CdataEnd) { + this.cbs.oncdata(data); + } + else { + this.cbs.oncomment(data); + } + } + else if (this._state === 28 /* InNumericEntity */ && + this.allowLegacyEntity()) { + this.decodeNumericEntity(10, false); + // All trailing data will have been consumed + } + else if (this._state === 29 /* InHexEntity */ && + this.allowLegacyEntity()) { + this.decodeNumericEntity(16, false); + // All trailing data will have been consumed + } + else if (this._state === 3 /* InTagName */ || + this._state === 8 /* BeforeAttributeName */ || + this._state === 11 /* BeforeAttributeValue */ || + this._state === 10 /* AfterAttributeName */ || + this._state === 9 /* InAttributeName */ || + this._state === 13 /* InAttributeValueSq */ || + this._state === 12 /* InAttributeValueDq */ || + this._state === 14 /* InAttributeValueNq */ || + this._state === 6 /* InClosingTagName */) { + /* + * If we are currently in an opening or closing tag, us not calling the + * respective callback signals that the tag should be ignored. + */ + } + else { + this.cbs.ontext(data); + } + }; + Tokenizer.prototype.getSection = function () { + return this.buffer.substring(this.sectionStart, this._index); + }; + Tokenizer.prototype.emitPartial = function (value) { + if (this.baseState !== 1 /* Text */ && + this.baseState !== 24 /* InSpecialTag */) { + this.cbs.onattribdata(value); + } + else { + this.cbs.ontext(value); + } + }; + return Tokenizer; +}()); +exports.default = Tokenizer; |
