diff options
Diffstat (limited to 'node_modules/htmlparser2/lib')
| -rw-r--r-- | node_modules/htmlparser2/lib/FeedHandler.d.ts | 23 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/FeedHandler.d.ts.map | 1 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/FeedHandler.js | 68 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/Parser.d.ts | 180 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/Parser.d.ts.map | 1 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/Parser.js | 424 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/Tokenizer.d.ts | 173 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/Tokenizer.d.ts.map | 1 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/Tokenizer.js | 821 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/WritableStream.d.ts | 16 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/WritableStream.d.ts.map | 1 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/WritableStream.js | 53 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/index.d.ts | 39 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/index.d.ts.map | 1 | ||||
| -rw-r--r-- | node_modules/htmlparser2/lib/index.js | 84 |
15 files changed, 1886 insertions, 0 deletions
diff --git a/node_modules/htmlparser2/lib/FeedHandler.d.ts b/node_modules/htmlparser2/lib/FeedHandler.d.ts new file mode 100644 index 0000000..0491715 --- /dev/null +++ b/node_modules/htmlparser2/lib/FeedHandler.d.ts @@ -0,0 +1,23 @@ +import DomHandler, { DomHandlerOptions } from "domhandler"; +import { getFeed, Feed } from "domutils"; +import { ParserOptions } from "./Parser"; +export { getFeed }; +/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */ +export declare class FeedHandler extends DomHandler { + feed?: Feed; + /** + * + * @param callback + * @param options + */ + constructor(callback?: ((error: Error | null) => void) | DomHandlerOptions, options?: DomHandlerOptions); + onend(): void; +} +/** + * Parse a feed. + * + * @param feed The feed that should be parsed, as a string. + * @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`. + */ +export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | null; +//# sourceMappingURL=FeedHandler.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/FeedHandler.d.ts.map b/node_modules/htmlparser2/lib/FeedHandler.d.ts.map new file mode 100644 index 0000000..34780db --- /dev/null +++ b/node_modules/htmlparser2/lib/FeedHandler.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"FeedHandler.d.ts","sourceRoot":"","sources":["../src/FeedHandler.ts"],"names":[],"mappings":"AAAA,OAAO,UAAU,EAAE,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC3D,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAU,aAAa,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,OAAO,EAAE,CAAC;AAEnB,wFAAwF;AACxF,qBAAa,WAAY,SAAQ,UAAU;IACvC,IAAI,CAAC,EAAE,IAAI,CAAC;IAEZ;;;;OAIG;gBAEC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,KAAK,IAAI,CAAC,GAAG,iBAAiB,EAC9D,OAAO,CAAC,EAAE,iBAAiB;IAS/B,KAAK,IAAI,IAAI;CAUhB;AAED;;;;;GAKG;AACH,wBAAgB,SAAS,CACrB,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,aAAa,GAAG,iBAAqC,GAC/D,IAAI,GAAG,IAAI,CAIb"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/FeedHandler.js b/node_modules/htmlparser2/lib/FeedHandler.js new file mode 100644 index 0000000..c85370b --- /dev/null +++ b/node_modules/htmlparser2/lib/FeedHandler.js @@ -0,0 +1,68 @@ +"use strict"; +var __extends = (this && this.__extends) || (function () { + var extendStatics = function (d, b) { + extendStatics = Object.setPrototypeOf || + ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || + function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; + return extendStatics(d, b); + }; + return function (d, b) { + if (typeof b !== "function" && b !== null) + throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); + extendStatics(d, b); + function __() { this.constructor = d; } + d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); + }; +})(); +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.parseFeed = exports.FeedHandler = exports.getFeed = void 0; +var domhandler_1 = __importDefault(require("domhandler")); +var domutils_1 = require("domutils"); +Object.defineProperty(exports, "getFeed", { enumerable: true, get: function () { return domutils_1.getFeed; } }); +var Parser_1 = require("./Parser"); +/** @deprecated Handler is no longer necessary; use `getFeed` or `parseFeed` instead. */ +var FeedHandler = /** @class */ (function (_super) { + __extends(FeedHandler, _super); + /** + * + * @param callback + * @param options + */ + function FeedHandler(callback, options) { + var _this = this; + if (typeof callback === "object") { + callback = undefined; + options = callback; + } + _this = _super.call(this, callback, options) || this; + return _this; + } + FeedHandler.prototype.onend = function () { + var feed = (0, domutils_1.getFeed)(this.dom); + if (feed) { + this.feed = feed; + this.handleCallback(null); + } + else { + this.handleCallback(new Error("couldn't find root of feed")); + } + }; + return FeedHandler; +}(domhandler_1.default)); +exports.FeedHandler = FeedHandler; +/** + * Parse a feed. + * + * @param feed The feed that should be parsed, as a string. + * @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`. + */ +function parseFeed(feed, options) { + if (options === void 0) { options = { xmlMode: true }; } + var handler = new domhandler_1.default(null, options); + new Parser_1.Parser(handler, options).end(feed); + return (0, domutils_1.getFeed)(handler.dom); +} +exports.parseFeed = parseFeed; diff --git a/node_modules/htmlparser2/lib/Parser.d.ts b/node_modules/htmlparser2/lib/Parser.d.ts new file mode 100644 index 0000000..dee6755 --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.d.ts @@ -0,0 +1,180 @@ +import Tokenizer from "./Tokenizer"; +export interface ParserOptions { + /** + * Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment + * and if "empty" tags (eg. `<br>`) can have children. If `false`, the content of special tags + * will be text only. For feeds and other XML content (documents that don't consist of HTML), + * set this to `true`. + * + * @default false + */ + xmlMode?: boolean; + /** + * Decode entities within the document. + * + * @default true + */ + decodeEntities?: boolean; + /** + * If set to true, all tags will be lowercased. + * + * @default !xmlMode + */ + lowerCaseTags?: boolean; + /** + * If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed. + * + * @default !xmlMode + */ + lowerCaseAttributeNames?: boolean; + /** + * If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled. + * NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text. + * + * @default xmlMode + */ + recognizeCDATA?: boolean; + /** + * If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`. + * NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized. + * + * @default xmlMode + */ + recognizeSelfClosing?: boolean; + /** + * Allows the default tokenizer to be overwritten. + */ + Tokenizer?: typeof Tokenizer; +} +export interface Handler { + onparserinit(parser: Parser): void; + /** + * Resets the handler back to starting state + */ + onreset(): void; + /** + * Signals the handler that parsing is done + */ + onend(): void; + onerror(error: Error): void; + onclosetag(name: string, isImplied: boolean): void; + onopentagname(name: string): void; + /** + * + * @param name Name of the attribute + * @param value Value of the attribute. + * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value. + */ + onattribute(name: string, value: string, quote?: string | undefined | null): void; + onopentag(name: string, attribs: { + [s: string]: string; + }, isImplied: boolean): void; + ontext(data: string): void; + oncomment(data: string): void; + oncdatastart(): void; + oncdataend(): void; + oncommentend(): void; + onprocessinginstruction(name: string, data: string): void; +} +export declare class Parser { + private readonly options; + /** The start index of the last event. */ + startIndex: number; + /** The end index of the last event. */ + endIndex: number; + /** + * Store the start index of the current open tag, + * so we can update the start index for attributes. + */ + private openTagStart; + private tagname; + private attribname; + private attribvalue; + private attribs; + private stack; + private readonly foreignContext; + private readonly cbs; + private readonly lowerCaseTagNames; + private readonly lowerCaseAttributeNames; + private readonly tokenizer; + constructor(cbs?: Partial<Handler> | null, options?: ParserOptions); + /** @internal */ + ontext(data: string): void; + protected isVoidElement(name: string): boolean; + /** @internal */ + onopentagname(name: string): void; + private emitOpenTag; + private endOpenTag; + /** @internal */ + onopentagend(): void; + /** @internal */ + onclosetag(name: string): void; + /** @internal */ + onselfclosingtag(): void; + private closeCurrentTag; + /** @internal */ + onattribname(name: string): void; + /** @internal */ + onattribdata(value: string): void; + /** @internal */ + onattribend(quote: string | undefined | null): void; + private getInstructionName; + /** @internal */ + ondeclaration(value: string): void; + /** @internal */ + onprocessinginstruction(value: string): void; + /** @internal */ + oncomment(value: string): void; + /** @internal */ + oncdata(value: string): void; + /** @internal */ + onerror(err: Error): void; + /** @internal */ + onend(): void; + /** + * Resets the parser to a blank state, ready to parse a new HTML document + */ + reset(): void; + /** + * Resets the parser, then parses a complete document and + * pushes it to the handler. + * + * @param data Document to parse. + */ + parseComplete(data: string): void; + /** + * Parses a chunk of data and calls the corresponding callbacks. + * + * @param chunk Chunk to parse. + */ + write(chunk: string): void; + /** + * Parses the end of the buffer and clears the stack, calls onend. + * + * @param chunk Optional final chunk to parse. + */ + end(chunk?: string): void; + /** + * Pauses parsing. The parser won't emit events until `resume` is called. + */ + pause(): void; + /** + * Resumes parsing after `pause` was called. + */ + resume(): void; + /** + * Alias of `write`, for backwards compatibility. + * + * @param chunk Chunk to parse. + * @deprecated + */ + parseChunk(chunk: string): void; + /** + * Alias of `end`, for backwards compatibility. + * + * @param chunk Optional final chunk to parse. + * @deprecated + */ + done(chunk?: string): void; +} +//# sourceMappingURL=Parser.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Parser.d.ts.map b/node_modules/htmlparser2/lib/Parser.d.ts.map new file mode 100644 index 0000000..62a110c --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"Parser.d.ts","sourceRoot":"","sources":["../src/Parser.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,aAAa,CAAC;AAsGpC,MAAM,WAAW,aAAa;IAC1B;;;;;;;OAOG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAElC;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,SAAS,CAAC;CAChC;AAED,MAAM,WAAW,OAAO;IACpB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAEnC;;OAEG;IACH,OAAO,IAAI,IAAI,CAAC;IAEhB;;OAEG;IACH,KAAK,IAAI,IAAI,CAAC;IACd,OAAO,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,GAAG,IAAI,CAAC;IACnD,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC;;;;;OAKG;IACH,WAAW,CACP,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAClC,IAAI,CAAC;IACR,SAAS,CACL,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE;QAAE,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,EAChC,SAAS,EAAE,OAAO,GACnB,IAAI,CAAC;IACR,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,YAAY,IAAI,IAAI,CAAC;IACrB,UAAU,IAAI,IAAI,CAAC;IACnB,YAAY,IAAI,IAAI,CAAC;IACrB,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7D;AAID,qBAAa,MAAM;IAwBX,OAAO,CAAC,QAAQ,CAAC,OAAO;IAvB5B,yCAAyC;IAClC,UAAU,SAAK;IACtB,uCAAuC;IAChC,QAAQ,SAAK;IACpB;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAK;IAEzB,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,UAAU,CAAM;IACxB,OAAO,CAAC,WAAW,CAAM;IACzB,OAAO,CAAC,OAAO,CAA0C;IACzD,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAmB;IACvC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAU;IAC5C,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAU;IAClD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAY;gBAGlC,GAAG,CAAC,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,EACZ,OAAO,GAAE,aAAkB;IAehD,gBAAgB;IAChB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAO1B,SAAS,CAAC,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAI9C,gBAAgB;IAChB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAUjC,OAAO,CAAC,WAAW;IA4BnB,OAAO,CAAC,UAAU;IAelB,gBAAgB;IAChB,YAAY,IAAI,IAAI;IAOpB,gBAAgB;IAChB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAuC9B,gBAAgB;IAChB,gBAAgB,IAAI,IAAI;IAgBxB,OAAO,CAAC,eAAe;IAYvB,gBAAgB;IAChB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAShC,gBAAgB;IAChB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAIjC,gBAAgB;IAChB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,IAAI;IAcnD,OAAO,CAAC,kBAAkB;IAW1B,gBAAgB;IAChB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAYlC,gBAAgB;IAChB,uBAAuB,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAY5C,gBAAgB;IAChB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAU9B,gBAAgB;IAChB,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAgB5B,gBAAgB;IAChB,OAAO,CAAC,GAAG,EAAE,KAAK,GAAG,IAAI;IAIzB,gBAAgB;IAChB,KAAK,IAAI,IAAI;IAab;;OAEG;IACI,KAAK,IAAI,IAAI;IAYpB;;;;;OAKG;IACI,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAKxC;;;;OAIG;IACI,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAIjC;;;;OAIG;IACI,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;IAIhC;;OAEG;IACI,KAAK,IAAI,IAAI;IAIpB;;OAEG;IACI,MAAM,IAAI,IAAI;IAIrB;;;;;OAKG;IACI,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAGtC;;;;;OAKG;IACI,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;CAGpC"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Parser.js b/node_modules/htmlparser2/lib/Parser.js new file mode 100644 index 0000000..ad3897d --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.js @@ -0,0 +1,424 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Parser = void 0; +var Tokenizer_1 = __importDefault(require("./Tokenizer")); +var formTags = new Set([ + "input", + "option", + "optgroup", + "select", + "button", + "datalist", + "textarea", +]); +var pTag = new Set(["p"]); +var tableSectionTags = new Set(["thead", "tbody"]); +var ddtTags = new Set(["dd", "dt"]); +var rtpTags = new Set(["rt", "rp"]); +var openImpliesClose = new Map([ + ["tr", new Set(["tr", "th", "td"])], + ["th", new Set(["th"])], + ["td", new Set(["thead", "th", "td"])], + ["body", new Set(["head", "link", "script"])], + ["li", new Set(["li"])], + ["p", pTag], + ["h1", pTag], + ["h2", pTag], + ["h3", pTag], + ["h4", pTag], + ["h5", pTag], + ["h6", pTag], + ["select", formTags], + ["input", formTags], + ["output", formTags], + ["button", formTags], + ["datalist", formTags], + ["textarea", formTags], + ["option", new Set(["option"])], + ["optgroup", new Set(["optgroup", "option"])], + ["dd", ddtTags], + ["dt", ddtTags], + ["address", pTag], + ["article", pTag], + ["aside", pTag], + ["blockquote", pTag], + ["details", pTag], + ["div", pTag], + ["dl", pTag], + ["fieldset", pTag], + ["figcaption", pTag], + ["figure", pTag], + ["footer", pTag], + ["form", pTag], + ["header", pTag], + ["hr", pTag], + ["main", pTag], + ["nav", pTag], + ["ol", pTag], + ["pre", pTag], + ["section", pTag], + ["table", pTag], + ["ul", pTag], + ["rt", rtpTags], + ["rp", rtpTags], + ["tbody", tableSectionTags], + ["tfoot", tableSectionTags], +]); +var voidElements = new Set([ + "area", + "base", + "basefont", + "br", + "col", + "command", + "embed", + "frame", + "hr", + "img", + "input", + "isindex", + "keygen", + "link", + "meta", + "param", + "source", + "track", + "wbr", +]); +var foreignContextElements = new Set(["math", "svg"]); +var htmlIntegrationElements = new Set([ + "mi", + "mo", + "mn", + "ms", + "mtext", + "annotation-xml", + "foreignobject", + "desc", + "title", +]); +var reNameEnd = /\s|\//; +var Parser = /** @class */ (function () { + function Parser(cbs, options) { + if (options === void 0) { options = {}; } + var _a, _b, _c, _d, _e; + this.options = options; + /** The start index of the last event. */ + this.startIndex = 0; + /** The end index of the last event. */ + this.endIndex = 0; + /** + * Store the start index of the current open tag, + * so we can update the start index for attributes. + */ + this.openTagStart = 0; + this.tagname = ""; + this.attribname = ""; + this.attribvalue = ""; + this.attribs = null; + this.stack = []; + this.foreignContext = []; + this.cbs = cbs !== null && cbs !== void 0 ? cbs : {}; + this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode; + this.lowerCaseAttributeNames = + (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode; + this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_1.default)(this.options, this); + (_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this); + } + // Tokenizer event handlers + /** @internal */ + Parser.prototype.ontext = function (data) { + var _a, _b; + var idx = this.tokenizer.getAbsoluteIndex(); + this.endIndex = idx - 1; + (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data); + this.startIndex = idx; + }; + Parser.prototype.isVoidElement = function (name) { + return !this.options.xmlMode && voidElements.has(name); + }; + /** @internal */ + Parser.prototype.onopentagname = function (name) { + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.lowerCaseTagNames) { + name = name.toLowerCase(); + } + this.emitOpenTag(name); + }; + Parser.prototype.emitOpenTag = function (name) { + var _a, _b, _c, _d; + this.openTagStart = this.startIndex; + this.tagname = name; + var impliesClose = !this.options.xmlMode && openImpliesClose.get(name); + if (impliesClose) { + while (this.stack.length > 0 && + impliesClose.has(this.stack[this.stack.length - 1])) { + var el = this.stack.pop(); + (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el, true); + } + } + if (!this.isVoidElement(name)) { + this.stack.push(name); + if (foreignContextElements.has(name)) { + this.foreignContext.push(true); + } + else if (htmlIntegrationElements.has(name)) { + this.foreignContext.push(false); + } + } + (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name); + if (this.cbs.onopentag) + this.attribs = {}; + }; + Parser.prototype.endOpenTag = function (isImplied) { + var _a, _b; + this.startIndex = this.openTagStart; + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.attribs) { + (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied); + this.attribs = null; + } + if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) { + this.cbs.onclosetag(this.tagname, true); + } + this.tagname = ""; + }; + /** @internal */ + Parser.prototype.onopentagend = function () { + this.endOpenTag(false); + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + }; + /** @internal */ + Parser.prototype.onclosetag = function (name) { + var _a, _b, _c, _d, _e, _f; + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.lowerCaseTagNames) { + name = name.toLowerCase(); + } + if (foreignContextElements.has(name) || + htmlIntegrationElements.has(name)) { + this.foreignContext.pop(); + } + if (!this.isVoidElement(name)) { + var pos = this.stack.lastIndexOf(name); + if (pos !== -1) { + if (this.cbs.onclosetag) { + var count = this.stack.length - pos; + while (count--) { + // We know the stack has sufficient elements. + this.cbs.onclosetag(this.stack.pop(), count !== 0); + } + } + else + this.stack.length = pos; + } + else if (!this.options.xmlMode && name === "p") { + this.emitOpenTag(name); + this.closeCurrentTag(true); + } + } + else if (!this.options.xmlMode && name === "br") { + // We can't go through `emitOpenTag` here, as `br` would be implicitly closed. + (_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name); + (_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, name, {}, true); + (_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, name, false); + } + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + }; + /** @internal */ + Parser.prototype.onselfclosingtag = function () { + if (this.options.xmlMode || + this.options.recognizeSelfClosing || + this.foreignContext[this.foreignContext.length - 1]) { + this.closeCurrentTag(false); + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + } + else { + // Ignore the fact that the tag is self-closing. + this.onopentagend(); + } + }; + Parser.prototype.closeCurrentTag = function (isOpenImplied) { + var _a, _b; + var name = this.tagname; + this.endOpenTag(isOpenImplied); + // Self-closing tags will be on the top of the stack + if (this.stack[this.stack.length - 1] === name) { + // If the opening tag isn't implied, the closing tag has to be implied. + (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied); + this.stack.pop(); + } + }; + /** @internal */ + Parser.prototype.onattribname = function (name) { + this.startIndex = this.tokenizer.getAbsoluteSectionStart(); + if (this.lowerCaseAttributeNames) { + name = name.toLowerCase(); + } + this.attribname = name; + }; + /** @internal */ + Parser.prototype.onattribdata = function (value) { + this.attribvalue += value; + }; + /** @internal */ + Parser.prototype.onattribend = function (quote) { + var _a, _b; + this.endIndex = this.tokenizer.getAbsoluteIndex(); + (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote); + if (this.attribs && + !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) { + this.attribs[this.attribname] = this.attribvalue; + } + this.attribname = ""; + this.attribvalue = ""; + }; + Parser.prototype.getInstructionName = function (value) { + var idx = value.search(reNameEnd); + var name = idx < 0 ? value : value.substr(0, idx); + if (this.lowerCaseTagNames) { + name = name.toLowerCase(); + } + return name; + }; + /** @internal */ + Parser.prototype.ondeclaration = function (value) { + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.cbs.onprocessinginstruction) { + var name_1 = this.getInstructionName(value); + this.cbs.onprocessinginstruction("!" + name_1, "!" + value); + } + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + }; + /** @internal */ + Parser.prototype.onprocessinginstruction = function (value) { + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.cbs.onprocessinginstruction) { + var name_2 = this.getInstructionName(value); + this.cbs.onprocessinginstruction("?" + name_2, "?" + value); + } + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + }; + /** @internal */ + Parser.prototype.oncomment = function (value) { + var _a, _b, _c, _d; + this.endIndex = this.tokenizer.getAbsoluteIndex(); + (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value); + (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c); + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + }; + /** @internal */ + Parser.prototype.oncdata = function (value) { + var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k; + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.options.xmlMode || this.options.recognizeCDATA) { + (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a); + (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value); + (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e); + } + else { + (_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[" + value + "]]"); + (_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j); + } + // Set `startIndex` for next node + this.startIndex = this.endIndex + 1; + }; + /** @internal */ + Parser.prototype.onerror = function (err) { + var _a, _b; + (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, err); + }; + /** @internal */ + Parser.prototype.onend = function () { + var _a, _b; + if (this.cbs.onclosetag) { + // Set the end index for all remaining tags + this.endIndex = this.startIndex; + for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i], true)) + ; + } + (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a); + }; + /** + * Resets the parser to a blank state, ready to parse a new HTML document + */ + Parser.prototype.reset = function () { + var _a, _b, _c, _d; + (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a); + this.tokenizer.reset(); + this.tagname = ""; + this.attribname = ""; + this.attribs = null; + this.stack = []; + this.startIndex = 0; + this.endIndex = 0; + (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this); + }; + /** + * Resets the parser, then parses a complete document and + * pushes it to the handler. + * + * @param data Document to parse. + */ + Parser.prototype.parseComplete = function (data) { + this.reset(); + this.end(data); + }; + /** + * Parses a chunk of data and calls the corresponding callbacks. + * + * @param chunk Chunk to parse. + */ + Parser.prototype.write = function (chunk) { + this.tokenizer.write(chunk); + }; + /** + * Parses the end of the buffer and clears the stack, calls onend. + * + * @param chunk Optional final chunk to parse. + */ + Parser.prototype.end = function (chunk) { + this.tokenizer.end(chunk); + }; + /** + * Pauses parsing. The parser won't emit events until `resume` is called. + */ + Parser.prototype.pause = function () { + this.tokenizer.pause(); + }; + /** + * Resumes parsing after `pause` was called. + */ + Parser.prototype.resume = function () { + this.tokenizer.resume(); + }; + /** + * Alias of `write`, for backwards compatibility. + * + * @param chunk Chunk to parse. + * @deprecated + */ + Parser.prototype.parseChunk = function (chunk) { + this.write(chunk); + }; + /** + * Alias of `end`, for backwards compatibility. + * + * @param chunk Optional final chunk to parse. + * @deprecated + */ + Parser.prototype.done = function (chunk) { + this.end(chunk); + }; + return Parser; +}()); +exports.Parser = Parser; diff --git a/node_modules/htmlparser2/lib/Tokenizer.d.ts b/node_modules/htmlparser2/lib/Tokenizer.d.ts new file mode 100644 index 0000000..3138a6f --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.d.ts @@ -0,0 +1,173 @@ +/** All the states the tokenizer can be in. */ +declare const enum State { + Text = 1, + BeforeTagName = 2, + InTagName = 3, + InSelfClosingTag = 4, + BeforeClosingTagName = 5, + InClosingTagName = 6, + AfterClosingTagName = 7, + BeforeAttributeName = 8, + InAttributeName = 9, + AfterAttributeName = 10, + BeforeAttributeValue = 11, + InAttributeValueDq = 12, + InAttributeValueSq = 13, + InAttributeValueNq = 14, + BeforeDeclaration = 15, + InDeclaration = 16, + InProcessingInstruction = 17, + BeforeComment = 18, + CDATASequence = 19, + InSpecialComment = 20, + InCommentLike = 21, + BeforeSpecialS = 22, + SpecialStartSequence = 23, + InSpecialTag = 24, + BeforeEntity = 25, + BeforeNumericEntity = 26, + InNamedEntity = 27, + InNumericEntity = 28, + InHexEntity = 29 +} +export interface Callbacks { + onattribdata(value: string): void; + onattribend(quote: string | undefined | null): void; + onattribname(name: string): void; + oncdata(data: string): void; + onclosetag(name: string): void; + oncomment(data: string): void; + ondeclaration(content: string): void; + onend(): void; + onerror(error: Error, state?: State): void; + onopentagend(): void; + onopentagname(name: string): void; + onprocessinginstruction(instruction: string): void; + onselfclosingtag(): void; + ontext(value: string): void; +} +export default class Tokenizer { + private readonly cbs; + /** The current state the tokenizer is in. */ + private _state; + /** The read buffer. */ + private buffer; + /** The beginning of the section that is currently being read. */ + sectionStart: number; + /** The index within the buffer that we are currently looking at. */ + private _index; + /** + * Data that has already been processed will be removed from the buffer occasionally. + * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. + */ + private bufferOffset; + /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ + private baseState; + /** For special parsing behavior inside of script and style tags. */ + private isSpecial; + /** Indicates whether the tokenizer has been paused. */ + private running; + /** Indicates whether the tokenizer has finished running / `.end` has been called. */ + private ended; + private readonly xmlMode; + private readonly decodeEntities; + private readonly entityTrie; + constructor({ xmlMode, decodeEntities, }: { + xmlMode?: boolean; + decodeEntities?: boolean; + }, cbs: Callbacks); + reset(): void; + write(chunk: string): void; + end(chunk?: string): void; + pause(): void; + resume(): void; + /** + * The start of the current section. + */ + getAbsoluteSectionStart(): number; + /** + * The current index within all of the written data. + */ + getAbsoluteIndex(): number; + private stateText; + private currentSequence; + private sequenceIndex; + private stateSpecialStartSequence; + /** Look for an end tag. For <title> tags, also decode entities. */ + private stateInSpecialTag; + private stateCDATASequence; + /** + * When we wait for one specific character, we can speed things up + * by skipping through the buffer until we find it. + * + * @returns Whether the character was found. + */ + private fastForwardTo; + /** + * Comments and CDATA end with `-->` and `]]>`. + * + * Their common qualities are: + * - Their end sequences have a distinct character they start with. + * - That character is then repeated, so we have to check multiple repeats. + * - All characters but the start character of the sequence can be skipped. + */ + private stateInCommentLike; + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + private isTagStartChar; + private startSpecial; + private stateBeforeTagName; + private stateInTagName; + private stateBeforeClosingTagName; + private stateInClosingTagName; + private stateAfterClosingTagName; + private stateBeforeAttributeName; + private stateInSelfClosingTag; + private stateInAttributeName; + private stateAfterAttributeName; + private stateBeforeAttributeValue; + private handleInAttributeValue; + private stateInAttributeValueDoubleQuotes; + private stateInAttributeValueSingleQuotes; + private stateInAttributeValueNoQuotes; + private stateBeforeDeclaration; + private stateInDeclaration; + private stateInProcessingInstruction; + private stateBeforeComment; + private stateInSpecialComment; + private stateBeforeSpecialS; + private trieIndex; + private trieCurrent; + private trieResult; + private entityExcess; + private stateBeforeEntity; + private stateInNamedEntity; + private emitNamedEntity; + private stateBeforeNumericEntity; + private decodeNumericEntity; + private stateInNumericEntity; + private stateInHexEntity; + private allowLegacyEntity; + /** + * Remove data that has already been consumed from the buffer. + */ + private cleanup; + private shouldContinue; + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + private parse; + private finish; + /** Handle any trailing data. */ + private handleTrailingData; + private getSection; + private emitPartial; +} +export {}; +//# sourceMappingURL=Tokenizer.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Tokenizer.d.ts.map b/node_modules/htmlparser2/lib/Tokenizer.d.ts.map new file mode 100644 index 0000000..221b9e2 --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"Tokenizer.d.ts","sourceRoot":"","sources":["../src/Tokenizer.ts"],"names":[],"mappings":"AAsCA,8CAA8C;AAC9C,mBAAW,KAAK;IACZ,IAAI,IAAI;IACR,aAAa,IAAA;IACb,SAAS,IAAA;IACT,gBAAgB,IAAA;IAChB,oBAAoB,IAAA;IACpB,gBAAgB,IAAA;IAChB,mBAAmB,IAAA;IAGnB,mBAAmB,IAAA;IACnB,eAAe,IAAA;IACf,kBAAkB,KAAA;IAClB,oBAAoB,KAAA;IACpB,kBAAkB,KAAA;IAClB,kBAAkB,KAAA;IAClB,kBAAkB,KAAA;IAGlB,iBAAiB,KAAA;IACjB,aAAa,KAAA;IAGb,uBAAuB,KAAA;IAGvB,aAAa,KAAA;IACb,aAAa,KAAA;IACb,gBAAgB,KAAA;IAChB,aAAa,KAAA;IAGb,cAAc,KAAA;IACd,oBAAoB,KAAA;IACpB,YAAY,KAAA;IAEZ,YAAY,KAAA;IACZ,mBAAmB,KAAA;IACnB,aAAa,KAAA;IACb,eAAe,KAAA;IACf,WAAW,KAAA;CACd;AA2BD,MAAM,WAAW,SAAS;IACtB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;IACpD,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,KAAK,IAAI,IAAI,CAAC;IACd,OAAO,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC;IAC3C,YAAY,IAAI,IAAI,CAAC;IACrB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,gBAAgB,IAAI,IAAI,CAAC;IACzB,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAmBD,MAAM,CAAC,OAAO,OAAO,SAAS;IAgCtB,OAAO,CAAC,QAAQ,CAAC,GAAG;IA/BxB,6CAA6C;IAC7C,OAAO,CAAC,MAAM,CAAc;IAC5B,uBAAuB;IACvB,OAAO,CAAC,MAAM,CAAM;IACpB,iEAAiE;IAC1D,YAAY,SAAK;IACxB,oEAAoE;IACpE,OAAO,CAAC,MAAM,CAAK;IACnB;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAK;IACzB,kIAAkI;IAClI,OAAO,CAAC,SAAS,CAAc;IAC/B,oEAAoE;IACpE,OAAO,CAAC,SAAS,CAAS;IAC1B,uDAAuD;IACvD,OAAO,CAAC,OAAO,CAAQ;IACvB,qFAAqF;IACrF,OAAO,CAAC,KAAK,CAAS;IAEtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;IACzC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAc;gBAGrC,EACI,OAAe,EACf,cAAqB,GACxB,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,OAAO,CAAA;KAAE,EACjC,GAAG,EAAE,SAAS;IAO5B,KAAK,IAAI,IAAI;IAYb,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAM1B,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;IAOzB,KAAK,IAAI,IAAI;IAIb,MAAM,IAAI,IAAI;IAUrB;;OAEG;IACI,uBAAuB,IAAI,MAAM;IAIxC;;OAEG;IACI,gBAAgB,IAAI,MAAM;IAIjC,OAAO,CAAC,SAAS;IAejB,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,yBAAyB;IAoBjC,mEAAmE;IACnE,OAAO,CAAC,iBAAiB;IAwCzB,OAAO,CAAC,kBAAkB;IAe1B;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAkBrB;;;;;;;OAOG;IACH,OAAO,CAAC,kBAAkB;IA8B1B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,cAAc;IAQtB,OAAO,CAAC,yBAAyB;IAYjC,OAAO,CAAC,qBAAqB;IAQ7B,OAAO,CAAC,wBAAwB;IAOhC,OAAO,CAAC,wBAAwB;IAkBhC,OAAO,CAAC,qBAAqB;IAY7B,OAAO,CAAC,oBAAoB;IAQ5B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,yBAAyB;IAajC,OAAO,CAAC,sBAAsB;IAc9B,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,6BAA6B;IAYrC,OAAO,CAAC,sBAAsB;IAW9B,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,4BAA4B;IAOpC,OAAO,CAAC,kBAAkB;IAW1B,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,mBAAmB;IAY3B,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,UAAU,CAAuB;IACzC,OAAO,CAAC,YAAY,CAAK;IAEzB,OAAO,CAAC,iBAAiB;IAiBzB,OAAO,CAAC,kBAAkB;IAkD1B,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,wBAAwB;IAUhC,OAAO,CAAC,mBAAmB;IAoB3B,OAAO,CAAC,oBAAoB;IAc5B,OAAO,CAAC,gBAAgB;IAmBxB,OAAO,CAAC,iBAAiB;IAQzB;;OAEG;IACH,OAAO,CAAC,OAAO;IAyBf,OAAO,CAAC,cAAc;IAItB;;;;OAIG;IACH,OAAO,CAAC,KAAK;IAoEb,OAAO,CAAC,MAAM;IAYd,gCAAgC;IAChC,OAAO,CAAC,kBAAkB;IAwC1B,OAAO,CAAC,UAAU;IAGlB,OAAO,CAAC,WAAW;CAUtB"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Tokenizer.js b/node_modules/htmlparser2/lib/Tokenizer.js new file mode 100644 index 0000000..6b3579b --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.js @@ -0,0 +1,821 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint")); +var decode_1 = require("entities/lib/decode"); +function isWhitespace(c) { + return (c === 32 /* Space */ || + c === 10 /* NewLine */ || + c === 9 /* Tab */ || + c === 12 /* FormFeed */ || + c === 13 /* CarriageReturn */); +} +function isEndOfTagSection(c) { + return c === 47 /* Slash */ || c === 62 /* Gt */ || isWhitespace(c); +} +function isNumber(c) { + return c >= 48 /* Zero */ && c <= 57 /* Nine */; +} +function isASCIIAlpha(c) { + return ((c >= 97 /* LowerA */ && c <= 122 /* LowerZ */) || + (c >= 65 /* UpperA */ && c <= 90 /* UpperZ */)); +} +/** + * Sequences used to match longer strings. + * + * We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End + * sequences with an increased offset. + */ +var Sequences = { + Cdata: new Uint16Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), + CdataEnd: new Uint16Array([0x5d, 0x5d, 0x3e]), + CommentEnd: new Uint16Array([0x2d, 0x2d, 0x3e]), + ScriptEnd: new Uint16Array([ + 0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, + ]), + StyleEnd: new Uint16Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), + TitleEnd: new Uint16Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title` +}; +var Tokenizer = /** @class */ (function () { + function Tokenizer(_a, cbs) { + var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c; + this.cbs = cbs; + /** The current state the tokenizer is in. */ + this._state = 1 /* Text */; + /** The read buffer. */ + this.buffer = ""; + /** The beginning of the section that is currently being read. */ + this.sectionStart = 0; + /** The index within the buffer that we are currently looking at. */ + this._index = 0; + /** + * Data that has already been processed will be removed from the buffer occasionally. + * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. + */ + this.bufferOffset = 0; + /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ + this.baseState = 1 /* Text */; + /** For special parsing behavior inside of script and style tags. */ + this.isSpecial = false; + /** Indicates whether the tokenizer has been paused. */ + this.running = true; + /** Indicates whether the tokenizer has finished running / `.end` has been called. */ + this.ended = false; + this.sequenceIndex = 0; + this.trieIndex = 0; + this.trieCurrent = 0; + this.trieResult = null; + this.entityExcess = 0; + this.xmlMode = xmlMode; + this.decodeEntities = decodeEntities; + this.entityTrie = xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree; + } + Tokenizer.prototype.reset = function () { + this._state = 1 /* Text */; + this.buffer = ""; + this.sectionStart = 0; + this._index = 0; + this.bufferOffset = 0; + this.baseState = 1 /* Text */; + this.currentSequence = undefined; + this.running = true; + this.ended = false; + }; + Tokenizer.prototype.write = function (chunk) { + if (this.ended) + return this.cbs.onerror(Error(".write() after done!")); + this.buffer += chunk; + this.parse(); + }; + Tokenizer.prototype.end = function (chunk) { + if (this.ended) + return this.cbs.onerror(Error(".end() after done!")); + if (chunk) + this.write(chunk); + this.ended = true; + if (this.running) + this.finish(); + }; + Tokenizer.prototype.pause = function () { + this.running = false; + }; + Tokenizer.prototype.resume = function () { + this.running = true; + if (this._index < this.buffer.length) { + this.parse(); + } + if (this.ended) { + this.finish(); + } + }; + /** + * The start of the current section. + */ + Tokenizer.prototype.getAbsoluteSectionStart = function () { + return this.sectionStart + this.bufferOffset; + }; + /** + * The current index within all of the written data. + */ + Tokenizer.prototype.getAbsoluteIndex = function () { + return this.bufferOffset + this._index; + }; + Tokenizer.prototype.stateText = function (c) { + if (c === 60 /* Lt */ || + (!this.decodeEntities && this.fastForwardTo(60 /* Lt */))) { + if (this._index > this.sectionStart) { + this.cbs.ontext(this.getSection()); + } + this._state = 2 /* BeforeTagName */; + this.sectionStart = this._index; + } + else if (this.decodeEntities && c === 38 /* Amp */) { + this._state = 25 /* BeforeEntity */; + } + }; + Tokenizer.prototype.stateSpecialStartSequence = function (c) { + var isEnd = this.sequenceIndex === this.currentSequence.length; + var isMatch = isEnd + ? // If we are at the end of the sequence, make sure the tag name has ended + isEndOfTagSection(c) + : // Otherwise, do a case-insensitive comparison + (c | 0x20) === this.currentSequence[this.sequenceIndex]; + if (!isMatch) { + this.isSpecial = false; + } + else if (!isEnd) { + this.sequenceIndex++; + return; + } + this.sequenceIndex = 0; + this._state = 3 /* InTagName */; + this.stateInTagName(c); + }; + /** Look for an end tag. For <title> tags, also decode entities. */ + Tokenizer.prototype.stateInSpecialTag = function (c) { + if (this.sequenceIndex === this.currentSequence.length) { + if (c === 62 /* Gt */ || isWhitespace(c)) { + var endOfText = this._index - this.currentSequence.length; + if (this.sectionStart < endOfText) { + // Spoof the index so that reported locations match up. + var actualIndex = this._index; + this._index = endOfText; + this.cbs.ontext(this.getSection()); + this._index = actualIndex; + } + this.isSpecial = false; + this.sectionStart = endOfText + 2; // Skip over the `</` + this.stateInClosingTagName(c); + return; // We are done; skip the rest of the function. + } + this.sequenceIndex = 0; + } + if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) { + this.sequenceIndex += 1; + } + else if (this.sequenceIndex === 0) { + if (this.currentSequence === Sequences.TitleEnd) { + // We have to parse entities in <title> tags. + if (this.decodeEntities && c === 38 /* Amp */) { + this._state = 25 /* BeforeEntity */; + } + } + else if (this.fastForwardTo(60 /* Lt */)) { + // Outside of <title> tags, we can fast-forward. + this.sequenceIndex = 1; + } + } + else { + // If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`. + this.sequenceIndex = Number(c === 60 /* Lt */); + } + }; + Tokenizer.prototype.stateCDATASequence = function (c) { + if (c === Sequences.Cdata[this.sequenceIndex]) { + if (++this.sequenceIndex === Sequences.Cdata.length) { + this._state = 21 /* InCommentLike */; + this.currentSequence = Sequences.CdataEnd; + this.sequenceIndex = 0; + this.sectionStart = this._index + 1; + } + } + else { + this.sequenceIndex = 0; + this._state = 16 /* InDeclaration */; + this.stateInDeclaration(c); // Reconsume the character + } + }; + /** + * When we wait for one specific character, we can speed things up + * by skipping through the buffer until we find it. + * + * @returns Whether the character was found. + */ + Tokenizer.prototype.fastForwardTo = function (c) { + while (++this._index < this.buffer.length) { + if (this.buffer.charCodeAt(this._index) === c) { + return true; + } + } + /* + * We increment the index at the end of the `parse` loop, + * so set it to `buffer.length - 1` here. + * + * TODO: Refactor `parse` to increment index before calling states. + */ + this._index = this.buffer.length - 1; + return false; + }; + /** + * Comments and CDATA end with `-->` and `]]>`. + * + * Their common qualities are: + * - Their end sequences have a distinct character they start with. + * - That character is then repeated, so we have to check multiple repeats. + * - All characters but the start character of the sequence can be skipped. + */ + Tokenizer.prototype.stateInCommentLike = function (c) { + if (c === this.currentSequence[this.sequenceIndex]) { + if (++this.sequenceIndex === this.currentSequence.length) { + // Remove 2 trailing chars + var section = this.buffer.slice(this.sectionStart, this._index - 2); + if (this.currentSequence === Sequences.CdataEnd) { + this.cbs.oncdata(section); + } + else { + this.cbs.oncomment(section); + } + this.sequenceIndex = 0; + this.sectionStart = this._index + 1; + this._state = 1 /* Text */; + } + } + else if (this.sequenceIndex === 0) { + // Fast-forward to the first character of the sequence + if (this.fastForwardTo(this.currentSequence[0])) { + this.sequenceIndex = 1; + } + } + else if (c !== this.currentSequence[this.sequenceIndex - 1]) { + // Allow long sequences, eg. --->, ]]]> + this.sequenceIndex = 0; + } + }; + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + Tokenizer.prototype.isTagStartChar = function (c) { + return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c); + }; + Tokenizer.prototype.startSpecial = function (sequence, offset) { + this.isSpecial = true; + this.currentSequence = sequence; + this.sequenceIndex = offset; + this._state = 23 /* SpecialStartSequence */; + }; + Tokenizer.prototype.stateBeforeTagName = function (c) { + if (c === 33 /* ExclamationMark */) { + this._state = 15 /* BeforeDeclaration */; + this.sectionStart = this._index + 1; + } + else if (c === 63 /* Questionmark */) { + this._state = 17 /* InProcessingInstruction */; + this.sectionStart = this._index + 1; + } + else if (this.isTagStartChar(c)) { + var lower = c | 0x20; + this.sectionStart = this._index; + if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { + this.startSpecial(Sequences.TitleEnd, 3); + } + else { + this._state = + !this.xmlMode && lower === Sequences.ScriptEnd[2] + ? 22 /* BeforeSpecialS */ + : 3 /* InTagName */; + } + } + else if (c === 47 /* Slash */) { + this._state = 5 /* BeforeClosingTagName */; + } + else { + this._state = 1 /* Text */; + this.stateText(c); + } + }; + Tokenizer.prototype.stateInTagName = function (c) { + if (isEndOfTagSection(c)) { + this.cbs.onopentagname(this.getSection()); + this.sectionStart = -1; + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + }; + Tokenizer.prototype.stateBeforeClosingTagName = function (c) { + if (isWhitespace(c)) { + // Ignore + } + else if (c === 62 /* Gt */) { + this._state = 1 /* Text */; + } + else { + this._state = this.isTagStartChar(c) + ? 6 /* InClosingTagName */ + : 20 /* InSpecialComment */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInClosingTagName = function (c) { + if (c === 62 /* Gt */ || isWhitespace(c)) { + this.cbs.onclosetag(this.getSection()); + this.sectionStart = -1; + this._state = 7 /* AfterClosingTagName */; + this.stateAfterClosingTagName(c); + } + }; + Tokenizer.prototype.stateAfterClosingTagName = function (c) { + // Skip everything until ">" + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeAttributeName = function (c) { + if (c === 62 /* Gt */) { + this.cbs.onopentagend(); + if (this.isSpecial) { + this._state = 24 /* InSpecialTag */; + this.sequenceIndex = 0; + } + else { + this._state = 1 /* Text */; + } + this.baseState = this._state; + this.sectionStart = this._index + 1; + } + else if (c === 47 /* Slash */) { + this._state = 4 /* InSelfClosingTag */; + } + else if (!isWhitespace(c)) { + this._state = 9 /* InAttributeName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInSelfClosingTag = function (c) { + if (c === 62 /* Gt */) { + this.cbs.onselfclosingtag(); + this._state = 1 /* Text */; + this.baseState = 1 /* Text */; + this.sectionStart = this._index + 1; + this.isSpecial = false; // Reset special state, in case of self-closing special tags + } + else if (!isWhitespace(c)) { + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + }; + Tokenizer.prototype.stateInAttributeName = function (c) { + if (c === 61 /* Eq */ || isEndOfTagSection(c)) { + this.cbs.onattribname(this.getSection()); + this.sectionStart = -1; + this._state = 10 /* AfterAttributeName */; + this.stateAfterAttributeName(c); + } + }; + Tokenizer.prototype.stateAfterAttributeName = function (c) { + if (c === 61 /* Eq */) { + this._state = 11 /* BeforeAttributeValue */; + } + else if (c === 47 /* Slash */ || c === 62 /* Gt */) { + this.cbs.onattribend(undefined); + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + else if (!isWhitespace(c)) { + this.cbs.onattribend(undefined); + this._state = 9 /* InAttributeName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateBeforeAttributeValue = function (c) { + if (c === 34 /* DoubleQuote */) { + this._state = 12 /* InAttributeValueDq */; + this.sectionStart = this._index + 1; + } + else if (c === 39 /* SingleQuote */) { + this._state = 13 /* InAttributeValueSq */; + this.sectionStart = this._index + 1; + } + else if (!isWhitespace(c)) { + this.sectionStart = this._index; + this._state = 14 /* InAttributeValueNq */; + this.stateInAttributeValueNoQuotes(c); // Reconsume token + } + }; + Tokenizer.prototype.handleInAttributeValue = function (c, quote) { + if (c === quote || + (!this.decodeEntities && this.fastForwardTo(quote))) { + this.cbs.onattribdata(this.getSection()); + this.sectionStart = -1; + this.cbs.onattribend(String.fromCharCode(quote)); + this._state = 8 /* BeforeAttributeName */; + } + else if (this.decodeEntities && c === 38 /* Amp */) { + this.baseState = this._state; + this._state = 25 /* BeforeEntity */; + } + }; + Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) { + this.handleInAttributeValue(c, 34 /* DoubleQuote */); + }; + Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) { + this.handleInAttributeValue(c, 39 /* SingleQuote */); + }; + Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) { + if (isWhitespace(c) || c === 62 /* Gt */) { + this.cbs.onattribdata(this.getSection()); + this.sectionStart = -1; + this.cbs.onattribend(null); + this._state = 8 /* BeforeAttributeName */; + this.stateBeforeAttributeName(c); + } + else if (this.decodeEntities && c === 38 /* Amp */) { + this.baseState = this._state; + this._state = 25 /* BeforeEntity */; + } + }; + Tokenizer.prototype.stateBeforeDeclaration = function (c) { + if (c === 91 /* OpeningSquareBracket */) { + this._state = 19 /* CDATASequence */; + this.sequenceIndex = 0; + } + else { + this._state = + c === 45 /* Dash */ + ? 18 /* BeforeComment */ + : 16 /* InDeclaration */; + } + }; + Tokenizer.prototype.stateInDeclaration = function (c) { + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this.cbs.ondeclaration(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateInProcessingInstruction = function (c) { + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this.cbs.onprocessinginstruction(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeComment = function (c) { + if (c === 45 /* Dash */) { + this._state = 21 /* InCommentLike */; + this.currentSequence = Sequences.CommentEnd; + // Allow short comments (eg. <!-->) + this.sequenceIndex = 2; + this.sectionStart = this._index + 1; + } + else { + this._state = 16 /* InDeclaration */; + } + }; + Tokenizer.prototype.stateInSpecialComment = function (c) { + if (c === 62 /* Gt */ || this.fastForwardTo(62 /* Gt */)) { + this.cbs.oncomment(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeSpecialS = function (c) { + var lower = c | 0x20; + if (lower === Sequences.ScriptEnd[3]) { + this.startSpecial(Sequences.ScriptEnd, 4); + } + else if (lower === Sequences.StyleEnd[3]) { + this.startSpecial(Sequences.StyleEnd, 4); + } + else { + this._state = 3 /* InTagName */; + this.stateInTagName(c); // Consume the token again + } + }; + Tokenizer.prototype.stateBeforeEntity = function (c) { + // Start excess with 1 to include the '&' + this.entityExcess = 1; + if (c === 35 /* Num */) { + this._state = 26 /* BeforeNumericEntity */; + } + else if (c === 38 /* Amp */) { + // We have two `&` characters in a row. Stay in the current state. + } + else { + this.trieIndex = 0; + this.trieCurrent = this.entityTrie[0]; + this.trieResult = null; + this._state = 27 /* InNamedEntity */; + this.stateInNamedEntity(c); + } + }; + Tokenizer.prototype.stateInNamedEntity = function (c) { + this.entityExcess += 1; + this.trieIndex = (0, decode_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c); + if (this.trieIndex < 0) { + this.emitNamedEntity(); + this._index--; + return; + } + this.trieCurrent = this.entityTrie[this.trieIndex]; + // If the branch is a value, store it and continue + if (this.trieCurrent & decode_1.BinTrieFlags.HAS_VALUE) { + // If we have a legacy entity while parsing strictly, just skip the number of bytes + if (!this.allowLegacyEntity() && c !== 59 /* Semi */) { + // No need to consider multi-byte values, as the legacy entity is always a single byte + this.trieIndex += 1; + } + else { + // Add 1 as we have already incremented the excess + var entityStart = this._index - this.entityExcess + 1; + if (entityStart > this.sectionStart) { + this.emitPartial(this.buffer.substring(this.sectionStart, entityStart)); + } + // If this is a surrogate pair, combine the higher bits from the node with the next byte + this.trieResult = + this.trieCurrent & decode_1.BinTrieFlags.MULTI_BYTE + ? String.fromCharCode(this.entityTrie[++this.trieIndex], this.entityTrie[++this.trieIndex]) + : String.fromCharCode(this.entityTrie[++this.trieIndex]); + this.entityExcess = 0; + this.sectionStart = this._index + 1; + } + } + }; + Tokenizer.prototype.emitNamedEntity = function () { + if (this.trieResult) { + this.emitPartial(this.trieResult); + } + this._state = this.baseState; + }; + Tokenizer.prototype.stateBeforeNumericEntity = function (c) { + if ((c | 0x20) === 120 /* LowerX */) { + this.entityExcess++; + this._state = 29 /* InHexEntity */; + } + else { + this._state = 28 /* InNumericEntity */; + this.stateInNumericEntity(c); + } + }; + Tokenizer.prototype.decodeNumericEntity = function (base, strict) { + var entityStart = this._index - this.entityExcess - 1; + var numberStart = entityStart + 2 + (base >> 4); + if (numberStart !== this._index) { + // Emit leading data if any + if (entityStart > this.sectionStart) { + this.emitPartial(this.buffer.substring(this.sectionStart, entityStart)); + } + // Parse entity + var entity = this.buffer.substring(numberStart, this._index); + var parsed = parseInt(entity, base); + this.emitPartial((0, decode_codepoint_1.default)(parsed)); + this.sectionStart = this._index + Number(strict); + } + this._state = this.baseState; + }; + Tokenizer.prototype.stateInNumericEntity = function (c) { + if (c === 59 /* Semi */) { + this.decodeNumericEntity(10, true); + } + else if (!isNumber(c)) { + if (this.allowLegacyEntity()) { + this.decodeNumericEntity(10, false); + } + else { + this._state = this.baseState; + } + this._index--; + } + else { + this.entityExcess++; + } + }; + Tokenizer.prototype.stateInHexEntity = function (c) { + if (c === 59 /* Semi */) { + this.decodeNumericEntity(16, true); + } + else if ((c < 97 /* LowerA */ || c > 102 /* LowerF */) && + (c < 65 /* UpperA */ || c > 70 /* UpperF */) && + !isNumber(c)) { + if (this.allowLegacyEntity()) { + this.decodeNumericEntity(16, false); + } + else { + this._state = this.baseState; + } + this._index--; + } + else { + this.entityExcess++; + } + }; + Tokenizer.prototype.allowLegacyEntity = function () { + return (!this.xmlMode && + (this.baseState === 1 /* Text */ || + this.baseState === 24 /* InSpecialTag */)); + }; + /** + * Remove data that has already been consumed from the buffer. + */ + Tokenizer.prototype.cleanup = function () { + // If we are inside of text, emit what we already have. + if (this.running && + this.sectionStart !== this._index && + (this._state === 1 /* Text */ || + (this._state === 24 /* InSpecialTag */ && + this.sequenceIndex === 0))) { + // TODO: We could emit attribute data here as well. + this.cbs.ontext(this.buffer.substr(this.sectionStart)); + this.sectionStart = this._index; + } + var start = this.sectionStart < 0 ? this._index : this.sectionStart; + this.buffer = + start === this.buffer.length ? "" : this.buffer.substr(start); + this._index -= start; + this.bufferOffset += start; + if (this.sectionStart > 0) { + this.sectionStart = 0; + } + }; + Tokenizer.prototype.shouldContinue = function () { + return this._index < this.buffer.length && this.running; + }; + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + Tokenizer.prototype.parse = function () { + while (this.shouldContinue()) { + var c = this.buffer.charCodeAt(this._index); + if (this._state === 1 /* Text */) { + this.stateText(c); + } + else if (this._state === 23 /* SpecialStartSequence */) { + this.stateSpecialStartSequence(c); + } + else if (this._state === 24 /* InSpecialTag */) { + this.stateInSpecialTag(c); + } + else if (this._state === 19 /* CDATASequence */) { + this.stateCDATASequence(c); + } + else if (this._state === 12 /* InAttributeValueDq */) { + this.stateInAttributeValueDoubleQuotes(c); + } + else if (this._state === 9 /* InAttributeName */) { + this.stateInAttributeName(c); + } + else if (this._state === 21 /* InCommentLike */) { + this.stateInCommentLike(c); + } + else if (this._state === 20 /* InSpecialComment */) { + this.stateInSpecialComment(c); + } + else if (this._state === 8 /* BeforeAttributeName */) { + this.stateBeforeAttributeName(c); + } + else if (this._state === 3 /* InTagName */) { + this.stateInTagName(c); + } + else if (this._state === 6 /* InClosingTagName */) { + this.stateInClosingTagName(c); + } + else if (this._state === 2 /* BeforeTagName */) { + this.stateBeforeTagName(c); + } + else if (this._state === 10 /* AfterAttributeName */) { + this.stateAfterAttributeName(c); + } + else if (this._state === 13 /* InAttributeValueSq */) { + this.stateInAttributeValueSingleQuotes(c); + } + else if (this._state === 11 /* BeforeAttributeValue */) { + this.stateBeforeAttributeValue(c); + } + else if (this._state === 5 /* BeforeClosingTagName */) { + this.stateBeforeClosingTagName(c); + } + else if (this._state === 7 /* AfterClosingTagName */) { + this.stateAfterClosingTagName(c); + } + else if (this._state === 22 /* BeforeSpecialS */) { + this.stateBeforeSpecialS(c); + } + else if (this._state === 14 /* InAttributeValueNq */) { + this.stateInAttributeValueNoQuotes(c); + } + else if (this._state === 4 /* InSelfClosingTag */) { + this.stateInSelfClosingTag(c); + } + else if (this._state === 16 /* InDeclaration */) { + this.stateInDeclaration(c); + } + else if (this._state === 15 /* BeforeDeclaration */) { + this.stateBeforeDeclaration(c); + } + else if (this._state === 18 /* BeforeComment */) { + this.stateBeforeComment(c); + } + else if (this._state === 17 /* InProcessingInstruction */) { + this.stateInProcessingInstruction(c); + } + else if (this._state === 27 /* InNamedEntity */) { + this.stateInNamedEntity(c); + } + else if (this._state === 25 /* BeforeEntity */) { + this.stateBeforeEntity(c); + } + else if (this._state === 29 /* InHexEntity */) { + this.stateInHexEntity(c); + } + else if (this._state === 28 /* InNumericEntity */) { + this.stateInNumericEntity(c); + } + else { + // `this._state === State.BeforeNumericEntity` + this.stateBeforeNumericEntity(c); + } + this._index++; + } + this.cleanup(); + }; + Tokenizer.prototype.finish = function () { + if (this._state === 27 /* InNamedEntity */) { + this.emitNamedEntity(); + } + // If there is remaining data, emit it in a reasonable way + if (this.sectionStart < this._index) { + this.handleTrailingData(); + } + this.cbs.onend(); + }; + /** Handle any trailing data. */ + Tokenizer.prototype.handleTrailingData = function () { + var data = this.buffer.substr(this.sectionStart); + if (this._state === 21 /* InCommentLike */) { + if (this.currentSequence === Sequences.CdataEnd) { + this.cbs.oncdata(data); + } + else { + this.cbs.oncomment(data); + } + } + else if (this._state === 28 /* InNumericEntity */ && + this.allowLegacyEntity()) { + this.decodeNumericEntity(10, false); + // All trailing data will have been consumed + } + else if (this._state === 29 /* InHexEntity */ && + this.allowLegacyEntity()) { + this.decodeNumericEntity(16, false); + // All trailing data will have been consumed + } + else if (this._state === 3 /* InTagName */ || + this._state === 8 /* BeforeAttributeName */ || + this._state === 11 /* BeforeAttributeValue */ || + this._state === 10 /* AfterAttributeName */ || + this._state === 9 /* InAttributeName */ || + this._state === 13 /* InAttributeValueSq */ || + this._state === 12 /* InAttributeValueDq */ || + this._state === 14 /* InAttributeValueNq */ || + this._state === 6 /* InClosingTagName */) { + /* + * If we are currently in an opening or closing tag, us not calling the + * respective callback signals that the tag should be ignored. + */ + } + else { + this.cbs.ontext(data); + } + }; + Tokenizer.prototype.getSection = function () { + return this.buffer.substring(this.sectionStart, this._index); + }; + Tokenizer.prototype.emitPartial = function (value) { + if (this.baseState !== 1 /* Text */ && + this.baseState !== 24 /* InSpecialTag */) { + this.cbs.onattribdata(value); + } + else { + this.cbs.ontext(value); + } + }; + return Tokenizer; +}()); +exports.default = Tokenizer; diff --git a/node_modules/htmlparser2/lib/WritableStream.d.ts b/node_modules/htmlparser2/lib/WritableStream.d.ts new file mode 100644 index 0000000..0755f99 --- /dev/null +++ b/node_modules/htmlparser2/lib/WritableStream.d.ts @@ -0,0 +1,16 @@ +/// <reference types="node" /> +import { Handler, ParserOptions } from "./Parser"; +import { Writable } from "stream"; +/** + * WritableStream makes the `Parser` interface available as a NodeJS stream. + * + * @see Parser + */ +export declare class WritableStream extends Writable { + private readonly _parser; + private readonly _decoder; + constructor(cbs: Partial<Handler>, options?: ParserOptions); + _write(chunk: string | Buffer, encoding: string, cb: () => void): void; + _final(cb: () => void): void; +} +//# sourceMappingURL=WritableStream.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/WritableStream.d.ts.map b/node_modules/htmlparser2/lib/WritableStream.d.ts.map new file mode 100644 index 0000000..31295a2 --- /dev/null +++ b/node_modules/htmlparser2/lib/WritableStream.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"WritableStream.d.ts","sourceRoot":"","sources":["../src/WritableStream.ts"],"names":[],"mappings":";AAAA,OAAO,EAAU,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAK1D,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAQlC;;;;GAIG;AACH,qBAAa,cAAe,SAAQ,QAAQ;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuB;gBAEpC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,EAAE,aAAa;IAK1D,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,IAAI,GAAG,IAAI;IAOtE,MAAM,CAAC,EAAE,EAAE,MAAM,IAAI,GAAG,IAAI;CAI/B"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/WritableStream.js b/node_modules/htmlparser2/lib/WritableStream.js new file mode 100644 index 0000000..ff87995 --- /dev/null +++ b/node_modules/htmlparser2/lib/WritableStream.js @@ -0,0 +1,53 @@ +"use strict"; +var __extends = (this && this.__extends) || (function () { + var extendStatics = function (d, b) { + extendStatics = Object.setPrototypeOf || + ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || + function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; + return extendStatics(d, b); + }; + return function (d, b) { + if (typeof b !== "function" && b !== null) + throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); + extendStatics(d, b); + function __() { this.constructor = d; } + d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); + }; +})(); +Object.defineProperty(exports, "__esModule", { value: true }); +exports.WritableStream = void 0; +var Parser_1 = require("./Parser"); +/* + * NOTE: If either of these two imports produces a type error, + * please update your @types/node dependency! + */ +var stream_1 = require("stream"); +var string_decoder_1 = require("string_decoder"); +// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream +function isBuffer(_chunk, encoding) { + return encoding === "buffer"; +} +/** + * WritableStream makes the `Parser` interface available as a NodeJS stream. + * + * @see Parser + */ +var WritableStream = /** @class */ (function (_super) { + __extends(WritableStream, _super); + function WritableStream(cbs, options) { + var _this = _super.call(this, { decodeStrings: false }) || this; + _this._decoder = new string_decoder_1.StringDecoder(); + _this._parser = new Parser_1.Parser(cbs, options); + return _this; + } + WritableStream.prototype._write = function (chunk, encoding, cb) { + this._parser.write(isBuffer(chunk, encoding) ? this._decoder.write(chunk) : chunk); + cb(); + }; + WritableStream.prototype._final = function (cb) { + this._parser.end(this._decoder.end()); + cb(); + }; + return WritableStream; +}(stream_1.Writable)); +exports.WritableStream = WritableStream; diff --git a/node_modules/htmlparser2/lib/index.d.ts b/node_modules/htmlparser2/lib/index.d.ts new file mode 100644 index 0000000..997c6d9 --- /dev/null +++ b/node_modules/htmlparser2/lib/index.d.ts @@ -0,0 +1,39 @@ +import { Parser, ParserOptions } from "./Parser"; +export { Parser, ParserOptions }; +import { DomHandler, DomHandlerOptions, Node, Element, Document } from "domhandler"; +export { DomHandler, DomHandlerOptions }; +declare type Options = ParserOptions & DomHandlerOptions; +/** + * Parses the data, returns the resulting document. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + */ +export declare function parseDocument(data: string, options?: Options): Document; +/** + * Parses data, returns an array of the root nodes. + * + * Note that the root nodes still have a `Document` node as their parent. + * Use `parseDocument` to get the `Document` node instead. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + * @deprecated Use `parseDocument` instead. + */ +export declare function parseDOM(data: string, options?: Options): Node[]; +/** + * Creates a parser instance, with an attached DOM handler. + * + * @param cb A callback that will be called once parsing has been completed. + * @param options Optional options for the parser and DOM builder. + * @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM. + */ +export declare function createDomStream(cb: (error: Error | null, dom: Node[]) => void, options?: Options, elementCb?: (element: Element) => void): Parser; +export { default as Tokenizer, Callbacks as TokenizerCallbacks, } from "./Tokenizer"; +import * as ElementType from "domelementtype"; +export { ElementType }; +export * from "./FeedHandler"; +export * as DomUtils from "domutils"; +export { DomHandler as DefaultHandler }; +export { FeedHandler as RssHandler } from "./FeedHandler"; +//# sourceMappingURL=index.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/index.d.ts.map b/node_modules/htmlparser2/lib/index.d.ts.map new file mode 100644 index 0000000..058fcba --- /dev/null +++ b/node_modules/htmlparser2/lib/index.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AAEjC,OAAO,EACH,UAAU,EACV,iBAAiB,EACjB,IAAI,EACJ,OAAO,EACP,QAAQ,EACX,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,CAAC;AAEzC,aAAK,OAAO,GAAG,aAAa,GAAG,iBAAiB,CAAC;AAIjD;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,QAAQ,CAIvE;AACD;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,IAAI,EAAE,CAEhE;AACD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC3B,EAAE,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,IAAI,EAC9C,OAAO,CAAC,EAAE,OAAO,EACjB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,GACvC,MAAM,CAGR;AAED,OAAO,EACH,OAAO,IAAI,SAAS,EACpB,SAAS,IAAI,kBAAkB,GAClC,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,WAAW,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,CAAC;AAOvB,cAAc,eAAe,CAAC;AAC9B,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC;AAGrC,OAAO,EAAE,UAAU,IAAI,cAAc,EAAE,CAAC;AACxC,OAAO,EAAE,WAAW,IAAI,UAAU,EAAE,MAAM,eAAe,CAAC"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/index.js b/node_modules/htmlparser2/lib/index.js new file mode 100644 index 0000000..3e461fa --- /dev/null +++ b/node_modules/htmlparser2/lib/index.js @@ -0,0 +1,84 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __exportStar = (this && this.__exportStar) || function(m, exports) { + for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.RssHandler = exports.DefaultHandler = exports.DomUtils = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DomHandler = exports.Parser = void 0; +var Parser_1 = require("./Parser"); +Object.defineProperty(exports, "Parser", { enumerable: true, get: function () { return Parser_1.Parser; } }); +var domhandler_1 = require("domhandler"); +Object.defineProperty(exports, "DomHandler", { enumerable: true, get: function () { return domhandler_1.DomHandler; } }); +Object.defineProperty(exports, "DefaultHandler", { enumerable: true, get: function () { return domhandler_1.DomHandler; } }); +// Helper methods +/** + * Parses the data, returns the resulting document. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + */ +function parseDocument(data, options) { + var handler = new domhandler_1.DomHandler(undefined, options); + new Parser_1.Parser(handler, options).end(data); + return handler.root; +} +exports.parseDocument = parseDocument; +/** + * Parses data, returns an array of the root nodes. + * + * Note that the root nodes still have a `Document` node as their parent. + * Use `parseDocument` to get the `Document` node instead. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + * @deprecated Use `parseDocument` instead. + */ +function parseDOM(data, options) { + return parseDocument(data, options).children; +} +exports.parseDOM = parseDOM; +/** + * Creates a parser instance, with an attached DOM handler. + * + * @param cb A callback that will be called once parsing has been completed. + * @param options Optional options for the parser and DOM builder. + * @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM. + */ +function createDomStream(cb, options, elementCb) { + var handler = new domhandler_1.DomHandler(cb, options, elementCb); + return new Parser_1.Parser(handler, options); +} +exports.createDomStream = createDomStream; +var Tokenizer_1 = require("./Tokenizer"); +Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_1).default; } }); +var ElementType = __importStar(require("domelementtype")); +exports.ElementType = ElementType; +/* + * All of the following exports exist for backwards-compatibility. + * They should probably be removed eventually. + */ +__exportStar(require("./FeedHandler"), exports); +exports.DomUtils = __importStar(require("domutils")); +var FeedHandler_1 = require("./FeedHandler"); +Object.defineProperty(exports, "RssHandler", { enumerable: true, get: function () { return FeedHandler_1.FeedHandler; } }); |
