// MIT License, Copyright (c) 2024 Marvin Borner // high-quality syntax highlighter and intelligence console.warn( "bruijnIntelligence is still WIP, warnings are not representative", ); // i hate web dev const decodeHTML = (html) => { const txt = document.createElement("textarea"); txt.innerHTML = html; return txt.value; }; const encodeHTML = (str) => str.replace(/[\u00A0-\u9999<>\&\#]/g, (i) => "&#" + i.charCodeAt(0) + ";"); // just basic token parsing const parseTerm = (str) => { const tokens = []; const matchers = { whitespace: /^(\s+)(.*)/, type: /^(⧗ .*)(.*)$/, string: /^("[^\"]*")(.*)/, char: /^('[^\']*')(.*)/, complex: /^(\([+-][0-9]+\.[0-9]+[+-][0-9]+\.[0-9]+i\))(.*)/, float: /^(\([+-][0-9]+\.[0-9]+[qr]?\))(.*)/, number: /^(\([+-][0-9]+[ubtd]?\))(.*)/, index: /^([0-9])(.*)/, abstraction: /^([\[\]])(.*)/, application: /^([\(\)])(.*)/, namespace: /^([Α-ΩA-Z]+[^\s\[\]\(\)\.]*)(.*)/, identifier: /^([α-ωa-z]+[^\s\[\]\(\)\.]*)(.*)/, special: /^([^0-9Α-ΩA-Zα-ωa-z\s\(\)\[\]"']+)(.*)/, }; while (str) { let matches; for ([kind, matcher] of Object.entries(matchers)) { if ((matches = str.match(matcher))) { tokens.push({ kind, string: matches[1] }); break; } } if (!matches) { console.warn("unknown term", str.slice(0, 10)); break; } str = matches[2]; } return tokens; }; const parseFile = (str) => { const tree = []; str.split("\n").forEach((line) => { let matches; if ((matches = line.match(/^:input (.*)/))) { tree.push({ kind: "input", path: matches[1] }); } else if ((matches = line.match(/^:import (.*) (.*)$/))) { tree.push({ kind: "import", path: matches[1], namespace: matches[2] }); } else if ((matches = line.match(/^:import (.*)$/))) { const namespace = matches[1].split("/").slice(-1)[0]; tree.push({ kind: "import", path: matches[1], namespace: namespace }); } else if ((matches = line.match(/^:test (\(.*\)) (\(.*\))$/))) { tree.push({ kind: "test", left: parseTerm(matches[1]), right: parseTerm(matches[2]), }); } else if ((matches = line.match(/^:time (.*)$/))) { tree.push({ kind: "time", term: parseTerm(matches[1]) }); } else if ( (matches = line.match(/^([ \t]*(?:#[^ ]|[^:\n#])[^ ]*) (.*)$/)) ) { tree.push({ kind: "definition", name: matches[1], term: parseTerm(matches[2]), }); } else if ((matches = line.match(/^# (.*)$/))) { tree.push({ kind: "comment", string: matches[1] }); } else if ((matches = line.match(/(\s)+/))) { tree.push({ kind: "whitespace", string: matches[1] }); } else if (line === "") { tree.push({ kind: "whitespace", string: line }); } else { console.warn("unknown line", line); } }); return tree; }; const dumpTree = (tree) => { const dumpTerm = (tokens) => tokens .map( (token) => `${encodeHTML(token.string)}`, ) .join(""); return tree .map((line) => { switch (line.kind) { case "input": return `:input ${line.path}`; case "import": return `:import ${line.path} ${line.namespace}`; case "test": return `:test ${dumpTerm(line.left)} ${dumpTerm(line.right)}`; case "time": return `:time ${dumpTerm(line.term)}`; case "definition": return `${encodeHTML(line.name)} ${dumpTerm(line.term)}`; case "comment": return `# ${encodeHTML(line.string)}`; case "whitespace": return line.string; case "verbatim": return dumpTerm(line.tokens); default: console.error("unknown kind", line.kind); return ""; } }) .join("\n"); }; // TODO: this is fine for medium files but not large programs const highlight = (elem) => { const tree = parseFile(decodeHTML(elem.innerHTML)); elem.innerHTML = dumpTree(tree); }; const highlightTerm = (elem) => { const tokens = parseTerm(decodeHTML(elem.innerHTML)); elem.innerHTML = dumpTree([{ kind: "verbatim", tokens }]); }; /** * It might seem dumb that we parse everything again from DOM. * You're right. However, we use a very similar structure in blog * so it's much easier to recycle the code here. * TODO: eventually merge this with blog */ const bruijnScroll = (elem) => (f) => elem.scrollIntoView(); const bruijnParse = (stdMap) => { const tokens = [ ...document.querySelectorAll("code.language-bruijn span"), ].filter((t) => !!t.classList.length); const root = { indent: -1, children: [], nodes: [], name: "" }; const stack = [root]; for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; const current = stack[stack.length - 1]; const inner = token.innerHTML; const tabIndent = inner[0] === "\t"; const indent = token.classList[0] === "definition" ? tabIndent ? (inner.match(/[\t]/g) ?? "").length : (inner.match(/[\s]/g) ?? "").length / 4 : 0; const fresh = { kind: "definition", elem: token, indent, children: [], nodes: [], name: inner.trim(), action: bruijnScroll(token), }; if (token.classList[0] === "definition") { // definition if (indent < current.indent) { // indent left for (let j = 0; j < current.indent - indent + 1; j++) stack.pop(); stack[stack.length - 1].children.push(fresh); stack.push(fresh); } else if (indent > current.indent) { // indent right current.children.push(fresh); stack.push(fresh); } else { // same indent stack.pop(); stack[stack.length - 1].children.push(fresh); stack.push(fresh); } } else if (token.classList[0] === "pp") { // preprocessor const instr = inner.trim(); const isImport = instr.startsWith(":import"); const isInput = instr.startsWith(":input"); if (isImport || isInput) { const parts = instr.split(" "); const path = parts[1].replace("std/", "") + ".bruijn"; if (!(path in stdMap)) { console.warn("invalid import", path); continue; } stdMap[path].forEach((d) => { const name = isInput ? d.name : parts[2] == "." ? d.name : parts[2] + "." + d.name; const link = d.source.replace("/", "_"); const original = path.replace("/", "_"); if (isInput || (isImport && d.kind !== "import")) root.children.push({ kind: "import", elem: token, indent: 0, action: (f) => window.open( `https://bruijn.marvinborner.de/std/${f == "" ? original : link}.html#${f}`, "_blank", ), children: [], nodes: [], name, }); }); } else if (instr.startsWith(":test") || instr.startsWith(":time")) { root.children.push(fresh); } } // just before expression: definition/preprocessor if (["definition", "pp"].includes(token.classList[0])) { const bruijnStack = []; for ( let j = i + 1; j < tokens.length && !["definition", "pp"].includes(tokens[j].classList[0]); j++ ) { if ( j > 0 && tokens[j - 1].classList[0] === "namespace" && tokens[j].classList[0] === "special" && tokens[j].innerHTML.trim().startsWith(".") && tokens[j].innerHTML.trim().length !== 1 // && something next != space TODO ) { // namespaced prefix const decoded = decodeHTML(tokens[j].innerHTML.trim()); const prefixed = decoded[decoded.length - 1] === "‣" ? decoded : decoded + "‣"; fresh.nodes.push({ kind: "prefix", elem: tokens[j], name: tokens[j - 1].innerHTML.trim() + prefixed, bruijnStack: [...bruijnStack], // for subs canonical: prefixed.slice(1), }); } else if ( tokens[j].classList[0] === "special" && tokens[j].innerHTML.trim() !== "." // && something next != space TODO ) { // normal prefix const decoded = decodeHTML(tokens[j].innerHTML.trim()); const prefixed = decoded[decoded.length - 1] === "‣" ? decoded : decoded + "‣"; fresh.nodes.push({ kind: "prefix", elem: tokens[j], name: prefixed, bruijnStack: [...bruijnStack], // for subs canonical: prefixed, }); // normal mixfix TODO: this is a ugly hack and only works if lucky and binary const mixfixed = decoded[decoded.length - 1].includes("…") ? decoded : "…" + decoded + "…"; fresh.nodes.push({ kind: "mixfix", elem: tokens[j], name: mixfixed, bruijnStack: [...bruijnStack], // for subs canonical: mixfixed, }); } else if ( j > 1 && tokens[j - 2].classList[0] === "namespace" && tokens[j - 1].classList[0] === "special" && tokens[j - 1].innerHTML.trim() === "." && tokens[j].classList[0] === "identifier" ) { // namespaced identifier fresh.nodes.push({ kind: "identifier", elem: tokens[j], name: tokens[j - 2].innerHTML.trim() + "." + tokens[j].innerHTML.trim(), bruijnStack: [...bruijnStack], // for subs canonical: tokens[j].innerHTML.trim(), }); } else if (tokens[j].classList[0] === "identifier") { // normal identifier fresh.nodes.push({ kind: "identifier", elem: tokens[j], name: tokens[j].innerHTML.trim(), bruijnStack: [...bruijnStack], // for subs canonical: tokens[j].innerHTML.trim(), }); } else if (tokens[j].classList[0] === "abstraction") { // square bracket / abstraction const inner = tokens[j].innerHTML.trim(); if (inner === "[") bruijnStack.push({ kind: "abstraction", elem: tokens[j], indent: 0, action: (f) => {}, children: [], nodes: [], }); else if (inner === "]") { const left = bruijnStack.pop(); left.right = tokens[j]; } } else if (tokens[j].classList[0] === "index") { // de Bruijn index const inner = tokens[j].innerHTML.trim(); if (inner.length !== 1 || !(inner[0] >= "0" && inner[0] <= "9")) { console.warn("weird de Bruijn index", inner); continue; } const left = bruijnStack[bruijnStack.length - 1 - +inner]; fresh.nodes.push({ kind: "index", elem: tokens[j], target: left, right: left ? left.right : left, }); // also highlight abstractions in sup terms const walkUp = [...stack]; walkUp.shift(); // without root walkUp.pop(); // without current let metaStacks = {}; metaStacks[stack[stack.length - 1].name] = [ [...bruijnStack].reverse(), ]; while (walkUp.length > 0) { const sup = walkUp.pop(); sup.nodes .filter((node) => node.name in metaStacks) .forEach((node) => { const metaStack = []; metaStacks[node.name].forEach((meta) => { metaStack.push( [...meta].concat([...node.bruijnStack].reverse()), ); }); metaStack.forEach((meta) => { const upperLeft = meta[+inner]; if (!upperLeft) { // console.warn(sup.name, inner, sup.elem, node.elem); // TODO: fix?? } else fresh.nodes.push({ kind: "index", elem: tokens[j], target: upperLeft, right: upperLeft ? upperLeft.right : upperLeft, }); }); metaStacks[sup.name] = metaStack; }); } // TODO: case is missing: // foo [bar] // baz [0 1 2] # won't get highlighted // bar [baz] } } } } return root; }; const bruijnApply = (root) => { const crumbs = [0]; // unintended identity while (crumbs.length > 0) { // TODO: this makes it really slow and isn't necessary const index = (indices, obj) => { const head = indices[0]; const tail = indices.slice(1); if (!obj || head >= obj.children.length) return undefined; else if (tail.length > 0) return index(tail, obj.children[head]); else return obj.children[head]; }; const current = index(crumbs, root); if (!current) { crumbs.pop(); if (crumbs.length > 0) ++crumbs[crumbs.length - 1]; continue; } current.nodes.forEach((node) => { let res; if (node.kind === "index") res = node.target; // first search in sub // this only nests 1 deep, see previous versions for DFS // e.g. β* was buggy with this though, we'd need BFS let _crumbs = [...crumbs, 0]; while (!res && _crumbs.length > 0) { const found = index(_crumbs, root); if (!found) break; if (found.name === node.name) { res = found; break; } ++_crumbs[_crumbs.length - 1]; } // ...then in sup _crumbs = [...crumbs]; // skip itself _crumbs[_crumbs.length - 1] -= _crumbs[_crumbs.length - 1] == 0 ? 0 : 1; while (!res && _crumbs.length > 0) { const found = index(_crumbs, root); if (!found) { console.error("wtf lol what happened"); break; } if (found.name === node.name) { res = found; break; } if (_crumbs[_crumbs.length - 1] === 0) _crumbs.pop(); else --_crumbs[_crumbs.length - 1]; } if (res) { // densely connected listeners node.elem.addEventListener( "click", (ev) => { res.action(node.canonical); ev.stopImmediatePropagation(); }, true, ); res.elem.addEventListener( "click", (ev) => { res.action(""); ev.stopImmediatePropagation(); }, true, ); const connectors = [node.elem, res.elem]; if ("right" in res) connectors.push(res.right); // right abstraction connectors.forEach((elem) => { elem.addEventListener("mouseenter", () => { node.elem.classList.add("code-highlight"); connectors.forEach((elem2) => elem2.classList.add("code-highlight"), ); }); elem.addEventListener("mouseout", () => { node.elem.classList.remove("code-highlight"); connectors.forEach((elem2) => elem2.classList.remove("code-highlight"), ); }); elem.classList.add("code-clickable"); }); if (["definition", "import"].includes(res.kind)) { node.elem.title = `Jump to ${res.kind} source`; } } else if (node.kind === "identifier") { // TODO: also include prefix/mixfix once better console.warn("unbound", node.kind, node.name); } }); // try to nest deeper crumbs.push(0); } }; const becomeIntelligent = () => { fetch("https://bruijn.marvinborner.de/std_map.json") .then((resp) => { return resp.json(); }) .then((data) => { [...document.querySelectorAll("code.language-bruijn")].forEach(highlight); [...document.querySelectorAll("code.bruijn")].forEach(highlightTerm); const tree = bruijnParse(data); bruijnApply(tree); // jump to given function const hash = document.location.hash; if (hash) { const definitions = document.querySelectorAll( "code.language-bruijn span.definition", ); definitions.forEach((definition) => { if (definition.innerText === decodeURI(hash).slice(1)) { bruijnScroll(definition)(); // trigger hover/highlight effect const hover = new Event("mouseenter"); definition.dispatchEvent(hover); } }); } }) .catch((err) => { console.error(err); }); }; // onload needed for wiki window.onload = () => { if (document.querySelector("code.language-bruijn, code.bruijn")) becomeIntelligent(); };