diff options
Diffstat (limited to 'node_modules/htmlparser2/node_modules/entities/lib/decode.js')
| -rw-r--r-- | node_modules/htmlparser2/node_modules/entities/lib/decode.js | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/node_modules/htmlparser2/node_modules/entities/lib/decode.js b/node_modules/htmlparser2/node_modules/entities/lib/decode.js new file mode 100644 index 0000000..7074d06 --- /dev/null +++ b/node_modules/htmlparser2/node_modules/entities/lib/decode.js @@ -0,0 +1,145 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.decodeXML = exports.decodeHTMLStrict = exports.decodeHTML = exports.determineBranch = exports.JUMP_OFFSET_BASE = exports.BinTrieFlags = exports.xmlDecodeTree = exports.htmlDecodeTree = void 0; +var decode_data_html_1 = __importDefault(require("./generated/decode-data-html")); +exports.htmlDecodeTree = decode_data_html_1.default; +var decode_data_xml_1 = __importDefault(require("./generated/decode-data-xml")); +exports.xmlDecodeTree = decode_data_xml_1.default; +var decode_codepoint_1 = __importDefault(require("./decode_codepoint")); +var BinTrieFlags; +(function (BinTrieFlags) { + BinTrieFlags[BinTrieFlags["HAS_VALUE"] = 32768] = "HAS_VALUE"; + BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 32512] = "BRANCH_LENGTH"; + BinTrieFlags[BinTrieFlags["MULTI_BYTE"] = 128] = "MULTI_BYTE"; + BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE"; +})(BinTrieFlags = exports.BinTrieFlags || (exports.BinTrieFlags = {})); +exports.JUMP_OFFSET_BASE = 48 /* ZERO */ - 1; +function getDecoder(decodeTree) { + return function decodeHTMLBinary(str, strict) { + var ret = ""; + var lastIdx = 0; + var strIdx = 0; + while ((strIdx = str.indexOf("&", strIdx)) >= 0) { + ret += str.slice(lastIdx, strIdx); + lastIdx = strIdx; + // Skip the "&" + strIdx += 1; + // If we have a numeric entity, handle this separately. + if (str.charCodeAt(strIdx) === 35 /* NUM */) { + // Skip the leading "&#". For hex entities, also skip the leading "x". + var start = strIdx + 1; + var base = 10; + var cp = str.charCodeAt(start); + if ((cp | 32 /* To_LOWER_BIT */) === 120 /* LOWER_X */) { + base = 16; + strIdx += 1; + start += 1; + } + while (((cp = str.charCodeAt(++strIdx)) >= 48 /* ZERO */ && + cp <= 57 /* NINE */) || + (base === 16 && + (cp | 32 /* To_LOWER_BIT */) >= 97 /* LOWER_A */ && + (cp | 32 /* To_LOWER_BIT */) <= 102 /* LOWER_F */)) + ; + if (start !== strIdx) { + var entity = str.substring(start, strIdx); + var parsed = parseInt(entity, base); + if (str.charCodeAt(strIdx) === 59 /* SEMI */) { + strIdx += 1; + } + else if (strict) { + continue; + } + ret += decode_codepoint_1.default(parsed); + lastIdx = strIdx; + } + continue; + } + var result = null; + var excess = 1; + var treeIdx = 0; + var current = decodeTree[treeIdx]; + for (; strIdx < str.length; strIdx++, excess++) { + treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx)); + if (treeIdx < 0) + break; + current = decodeTree[treeIdx]; + // If the branch is a value, store it and continue + if (current & BinTrieFlags.HAS_VALUE) { + // If we have a legacy entity while parsing strictly, just skip the number of bytes + if (strict && str.charCodeAt(strIdx) !== 59 /* SEMI */) { + // No need to consider multi-byte values, as the legacy entity is always a single byte + treeIdx += 1; + } + else { + // If this is a surrogate pair, combine the higher bits from the node with the next byte + result = + current & BinTrieFlags.MULTI_BYTE + ? String.fromCharCode(decodeTree[++treeIdx], decodeTree[++treeIdx]) + : String.fromCharCode(decodeTree[++treeIdx]); + excess = 0; + } + } + } + if (result != null) { + ret += result; + lastIdx = strIdx - excess + 1; + } + } + return ret + str.slice(lastIdx); + }; +} +function determineBranch(decodeTree, current, nodeIdx, char) { + if (current <= 128) { + return char === current ? nodeIdx : -1; + } + var branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 8; + if (branchCount === 0) { + return -1; + } + if (branchCount === 1) { + return char === decodeTree[nodeIdx] ? nodeIdx + 1 : -1; + } + var jumpOffset = current & BinTrieFlags.JUMP_TABLE; + if (jumpOffset) { + var value = char - exports.JUMP_OFFSET_BASE - jumpOffset; + return value < 0 || value > branchCount + ? -1 + : decodeTree[nodeIdx + value] - 1; + } + // Binary search for the character. + var lo = nodeIdx; + var hi = lo + branchCount - 1; + while (lo <= hi) { + var mid = (lo + hi) >>> 1; + var midVal = decodeTree[mid]; + if (midVal < char) { + lo = mid + 1; + } + else if (midVal > char) { + hi = mid - 1; + } + else { + return decodeTree[mid + branchCount]; + } + } + return -1; +} +exports.determineBranch = determineBranch; +var htmlDecoder = getDecoder(decode_data_html_1.default); +var xmlDecoder = getDecoder(decode_data_xml_1.default); +function decodeHTML(str) { + return htmlDecoder(str, false); +} +exports.decodeHTML = decodeHTML; +function decodeHTMLStrict(str) { + return htmlDecoder(str, true); +} +exports.decodeHTMLStrict = decodeHTMLStrict; +function decodeXML(str) { + return xmlDecoder(str, true); +} +exports.decodeXML = decodeXML; |
