summaryrefslogtreecommitdiff
path: root/node_modules/entities/src/encode.ts
diff options
context:
space:
mode:
authorShipwreckt <me@shipwreckt.co.uk>2025-10-31 20:02:14 +0000
committerShipwreckt <me@shipwreckt.co.uk>2025-10-31 20:02:14 +0000
commit7a52ddeba2a68388b544f529d2d92104420f77b0 (patch)
tree15ddd47457a2cb4a96060747437d36474e4f6b4e /node_modules/entities/src/encode.ts
parent53d6ae2b5568437afa5e4995580a3fb679b7b91b (diff)
Changed from static to 11ty!
Diffstat (limited to 'node_modules/entities/src/encode.ts')
-rw-r--r--node_modules/entities/src/encode.ts77
1 files changed, 77 insertions, 0 deletions
diff --git a/node_modules/entities/src/encode.ts b/node_modules/entities/src/encode.ts
new file mode 100644
index 0000000..5bb40a6
--- /dev/null
+++ b/node_modules/entities/src/encode.ts
@@ -0,0 +1,77 @@
+import { htmlTrie } from "./generated/encode-html.js";
+import { xmlReplacer, getCodePoint } from "./escape.js";
+
+const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
+
+/**
+ * Encodes all characters in the input using HTML entities. This includes
+ * characters that are valid ASCII characters in HTML documents, such as `#`.
+ *
+ * To get a more compact output, consider using the `encodeNonAsciiHTML`
+ * function, which will only encode characters that are not valid in HTML
+ * documents, as well as non-ASCII characters.
+ *
+ * If a character has no equivalent entity, a numeric hexadecimal reference
+ * (eg. `&#xfc;`) will be used.
+ */
+export function encodeHTML(input: string): string {
+ return encodeHTMLTrieRe(htmlReplacer, input);
+}
+/**
+ * Encodes all non-ASCII characters, as well as characters not valid in HTML
+ * documents using HTML entities. This function will not encode characters that
+ * are valid in HTML documents, such as `#`.
+ *
+ * If a character has no equivalent entity, a numeric hexadecimal reference
+ * (eg. `&#xfc;`) will be used.
+ */
+export function encodeNonAsciiHTML(input: string): string {
+ return encodeHTMLTrieRe(xmlReplacer, input);
+}
+
+function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
+ let returnValue = "";
+ let lastIndex = 0;
+ let match;
+
+ while ((match = regExp.exec(input)) !== null) {
+ const { index } = match;
+ returnValue += input.substring(lastIndex, index);
+ const char = input.charCodeAt(index);
+ let next = htmlTrie.get(char);
+
+ if (typeof next === "object") {
+ // We are in a branch. Try to match the next char.
+ if (index + 1 < input.length) {
+ const nextChar = input.charCodeAt(index + 1);
+ const value =
+ typeof next.n === "number"
+ ? next.n === nextChar
+ ? next.o
+ : undefined
+ : next.n.get(nextChar);
+
+ if (value !== undefined) {
+ returnValue += value;
+ lastIndex = regExp.lastIndex += 1;
+ continue;
+ }
+ }
+
+ next = next.v;
+ }
+
+ // We might have a tree node without a value; skip and use a numeric entity.
+ if (next === undefined) {
+ const cp = getCodePoint(input, index);
+ returnValue += `&#x${cp.toString(16)};`;
+ // Increase by 1 if we have a surrogate pair
+ lastIndex = regExp.lastIndex += Number(cp !== char);
+ } else {
+ returnValue += next;
+ lastIndex = index + 1;
+ }
+ }
+
+ return returnValue + input.substr(lastIndex);
+}