summaryrefslogtreecommitdiff
path: root/node_modules/htmlparser2/node_modules/entities/lib/decode.js
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/htmlparser2/node_modules/entities/lib/decode.js')
-rw-r--r--node_modules/htmlparser2/node_modules/entities/lib/decode.js145
1 files changed, 145 insertions, 0 deletions
diff --git a/node_modules/htmlparser2/node_modules/entities/lib/decode.js b/node_modules/htmlparser2/node_modules/entities/lib/decode.js
new file mode 100644
index 0000000..7074d06
--- /dev/null
+++ b/node_modules/htmlparser2/node_modules/entities/lib/decode.js
@@ -0,0 +1,145 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+ return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.JUMP_OFFSET_BASE = exports.BinTrieFlags = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0;
+var decode_data_html_1 = __importDefault(require("./generated/decode-data-html"));
+exports.htmlDecodeTree = decode_data_html_1.default;
+var decode_data_xml_1 = __importDefault(require("./generated/decode-data-xml"));
+exports.xmlDecodeTree = decode_data_xml_1.default;
+var decode_codepoint_1 = __importDefault(require("./decode_codepoint"));
+var BinTrieFlags;
+(function (BinTrieFlags) {
+ BinTrieFlags[BinTrieFlags["HAS_VALUE"] = 32768] = "HAS_VALUE";
+ BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 32512] = "BRANCH_LENGTH";
+ BinTrieFlags[BinTrieFlags["MULTI_BYTE"] = 128] = "MULTI_BYTE";
+ BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
+})(BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {}));
+exports.JUMP_OFFSET_BASE = 48 /* ZERO */ - 1;
+function getDecoder(decodeTree) {
+ return function decodeHTMLBinary(str, strict) {
+ var ret = "";
+ var lastIdx = 0;
+ var strIdx = 0;
+ while ((strIdx = str.indexOf("&", strIdx)) >= 0) {
+ ret += str.slice(lastIdx, strIdx);
+ lastIdx = strIdx;
+ // Skip the "&"
+ strIdx += 1;
+ // If we have a numeric entity, handle this separately.
+ if (str.charCodeAt(strIdx) === 35 /* NUM */) {
+ // Skip the leading "&#". For hex entities, also skip the leading "x".
+ var start = strIdx + 1;
+ var base = 10;
+ var cp = str.charCodeAt(start);
+ if ((cp | 32 /* To_LOWER_BIT */) === 120 /* LOWER_X */) {
+ base = 16;
+ strIdx += 1;
+ start += 1;
+ }
+ while (((cp = str.charCodeAt(++strIdx)) >= 48 /* ZERO */ &&
+ cp <= 57 /* NINE */) ||
+ (base === 16 &&
+ (cp | 32 /* To_LOWER_BIT */) >= 97 /* LOWER_A */ &&
+ (cp | 32 /* To_LOWER_BIT */) <= 102 /* LOWER_F */))
+ ;
+ if (start !== strIdx) {
+ var entity = str.substring(start, strIdx);
+ var parsed = parseInt(entity, base);
+ if (str.charCodeAt(strIdx) === 59 /* SEMI */) {
+ strIdx += 1;
+ }
+ else if (strict) {
+ continue;
+ }
+ ret += decode_codepoint_1.default(parsed);
+ lastIdx = strIdx;
+ }
+ continue;
+ }
+ var result = null;
+ var excess = 1;
+ var treeIdx = 0;
+ var current = decodeTree[treeIdx];
+ for (; strIdx < str.length; strIdx++, excess++) {
+ treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx));
+ if (treeIdx < 0)
+ break;
+ current = decodeTree[treeIdx];
+ // If the branch is a value, store it and continue
+ if (current & BinTrieFlags.HAS_VALUE) {
+ // If we have a legacy entity while parsing strictly, just skip the number of bytes
+ if (strict && str.charCodeAt(strIdx) !== 59 /* SEMI */) {
+ // No need to consider multi-byte values, as the legacy entity is always a single byte
+ treeIdx += 1;
+ }
+ else {
+ // If this is a surrogate pair, combine the higher bits from the node with the next byte
+ result =
+ current & BinTrieFlags.MULTI_BYTE
+ ? String.fromCharCode(decodeTree[++treeIdx], decodeTree[++treeIdx])
+ : String.fromCharCode(decodeTree[++treeIdx]);
+ excess = 0;
+ }
+ }
+ }
+ if (result != null) {
+ ret += result;
+ lastIdx = strIdx - excess + 1;
+ }
+ }
+ return ret + str.slice(lastIdx);
+ };
+}
+function determineBranch(decodeTree, current, nodeIdx, char) {
+ if (current <= 128) {
+ return char === current ? nodeIdx : -1;
+ }
+ var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 8;
+ if (branchCount === 0) {
+ return -1;
+ }
+ if (branchCount === 1) {
+ return char === decodeTree[nodeIdx] ? nodeIdx + 1 : -1;
+ }
+ var jumpOffset = current & BinTrieFlags.JUMP_TABLE;
+ if (jumpOffset) {
+ var value = char - exports.JUMP_OFFSET_BASE - jumpOffset;
+ return value < 0 || value > branchCount
+ ? -1
+ : decodeTree[nodeIdx + value] - 1;
+ }
+ // Binary search for the character.
+ var lo = nodeIdx;
+ var hi = lo + branchCount - 1;
+ while (lo <= hi) {
+ var mid = (lo + hi) >>> 1;
+ var midVal = decodeTree[mid];
+ if (midVal < char) {
+ lo = mid + 1;
+ }
+ else if (midVal > char) {
+ hi = mid - 1;
+ }
+ else {
+ return decodeTree[mid + branchCount];
+ }
+ }
+ return -1;
+}
+exports.determineBranch = determineBranch;
+var htmlDecoder = getDecoder(decode_data_html_1.default);
+var xmlDecoder = getDecoder(decode_data_xml_1.default);
+function decodeHTML(str) {
+ return htmlDecoder(str, false);
+}
+exports.decodeHTML = decodeHTML;
+function decodeHTMLStrict(str) {
+ return htmlDecoder(str, true);
+}
+exports.decodeHTMLStrict = decodeHTMLStrict;
+function decodeXML(str) {
+ return xmlDecoder(str, true);
+}
+exports.decodeXML = decodeXML;