From 7a52ddeba2a68388b544f529d2d92104420f77b0 Mon Sep 17 00:00:00 2001 From: Shipwreckt Date: Fri, 31 Oct 2025 20:02:14 +0000 Subject: Changed from static to 11ty! --- node_modules/htmlparser2/lib/Tokenizer.d.ts | 173 ++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 node_modules/htmlparser2/lib/Tokenizer.d.ts (limited to 'node_modules/htmlparser2/lib/Tokenizer.d.ts') diff --git a/node_modules/htmlparser2/lib/Tokenizer.d.ts b/node_modules/htmlparser2/lib/Tokenizer.d.ts new file mode 100644 index 0000000..3138a6f --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.d.ts @@ -0,0 +1,173 @@ +/** All the states the tokenizer can be in. */ +declare const enum State { + Text = 1, + BeforeTagName = 2, + InTagName = 3, + InSelfClosingTag = 4, + BeforeClosingTagName = 5, + InClosingTagName = 6, + AfterClosingTagName = 7, + BeforeAttributeName = 8, + InAttributeName = 9, + AfterAttributeName = 10, + BeforeAttributeValue = 11, + InAttributeValueDq = 12, + InAttributeValueSq = 13, + InAttributeValueNq = 14, + BeforeDeclaration = 15, + InDeclaration = 16, + InProcessingInstruction = 17, + BeforeComment = 18, + CDATASequence = 19, + InSpecialComment = 20, + InCommentLike = 21, + BeforeSpecialS = 22, + SpecialStartSequence = 23, + InSpecialTag = 24, + BeforeEntity = 25, + BeforeNumericEntity = 26, + InNamedEntity = 27, + InNumericEntity = 28, + InHexEntity = 29 +} +export interface Callbacks { + onattribdata(value: string): void; + onattribend(quote: string | undefined | null): void; + onattribname(name: string): void; + oncdata(data: string): void; + onclosetag(name: string): void; + oncomment(data: string): void; + ondeclaration(content: string): void; + onend(): void; + onerror(error: Error, state?: State): void; + onopentagend(): void; + onopentagname(name: string): void; + onprocessinginstruction(instruction: string): void; + onselfclosingtag(): void; + ontext(value: string): void; +} +export default class Tokenizer { + private readonly cbs; + /** The current state the tokenizer is in. */ + private _state; + /** The read buffer. */ + private buffer; + /** The beginning of the section that is currently being read. */ + sectionStart: number; + /** The index within the buffer that we are currently looking at. */ + private _index; + /** + * Data that has already been processed will be removed from the buffer occasionally. + * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. + */ + private bufferOffset; + /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ + private baseState; + /** For special parsing behavior inside of script and style tags. */ + private isSpecial; + /** Indicates whether the tokenizer has been paused. */ + private running; + /** Indicates whether the tokenizer has finished running / `.end` has been called. */ + private ended; + private readonly xmlMode; + private readonly decodeEntities; + private readonly entityTrie; + constructor({ xmlMode, decodeEntities, }: { + xmlMode?: boolean; + decodeEntities?: boolean; + }, cbs: Callbacks); + reset(): void; + write(chunk: string): void; + end(chunk?: string): void; + pause(): void; + resume(): void; + /** + * The start of the current section. + */ + getAbsoluteSectionStart(): number; + /** + * The current index within all of the written data. + */ + getAbsoluteIndex(): number; + private stateText; + private currentSequence; + private sequenceIndex; + private stateSpecialStartSequence; + /** Look for an end tag. For tags, also decode entities. */ + private stateInSpecialTag; + private stateCDATASequence; + /** + * When we wait for one specific character, we can speed things up + * by skipping through the buffer until we find it. + * + * @returns Whether the character was found. + */ + private fastForwardTo; + /** + * Comments and CDATA end with `-->` and `]]>`. + * + * Their common qualities are: + * - Their end sequences have a distinct character they start with. + * - That character is then repeated, so we have to check multiple repeats. + * - All characters but the start character of the sequence can be skipped. + */ + private stateInCommentLike; + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + private isTagStartChar; + private startSpecial; + private stateBeforeTagName; + private stateInTagName; + private stateBeforeClosingTagName; + private stateInClosingTagName; + private stateAfterClosingTagName; + private stateBeforeAttributeName; + private stateInSelfClosingTag; + private stateInAttributeName; + private stateAfterAttributeName; + private stateBeforeAttributeValue; + private handleInAttributeValue; + private stateInAttributeValueDoubleQuotes; + private stateInAttributeValueSingleQuotes; + private stateInAttributeValueNoQuotes; + private stateBeforeDeclaration; + private stateInDeclaration; + private stateInProcessingInstruction; + private stateBeforeComment; + private stateInSpecialComment; + private stateBeforeSpecialS; + private trieIndex; + private trieCurrent; + private trieResult; + private entityExcess; + private stateBeforeEntity; + private stateInNamedEntity; + private emitNamedEntity; + private stateBeforeNumericEntity; + private decodeNumericEntity; + private stateInNumericEntity; + private stateInHexEntity; + private allowLegacyEntity; + /** + * Remove data that has already been consumed from the buffer. + */ + private cleanup; + private shouldContinue; + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + private parse; + private finish; + /** Handle any trailing data. */ + private handleTrailingData; + private getSection; + private emitPartial; +} +export {}; +//# sourceMappingURL=Tokenizer.d.ts.map \ No newline at end of file -- cgit v1.2.3