import { cloneEmojiRegexItem, createOptionalEmojiRegexItem, createSequenceEmojiRegexItem, createSetEmojiRegexItem } from "./base.js"; import { optimiseNumbersSet } from "./numbers.js"; /** * Typescript stuff */ function assertNever(v) {} /** * Find similar item sequences * * Returns sequence(s) with highest score. Only one of results should be * applied to items. If there are multiple sequences, clone items list, * attempt to apply each sequence, run further optimisations on each fork * and see which one returns better result. * * Returns undefined if no common sequences found */ function findSimilarRegexItemSequences(items) { const startRegex = Object.create(null); const endRegex = Object.create(null); const addMapItem = (target, index, regex, slice) => { if (!target[regex]) { target[regex] = { score: 0, slices: [{ index, slice }] }; return; } const item = target[regex]; item.score += regex.length; item.slices.push({ index, slice }); }; for (let index = 0; index < items.length; index++) { const baseItem = items[index]; switch (baseItem.type) { case "optional": case "utf16": { addMapItem(startRegex, index, baseItem.regex, "full"); addMapItem(endRegex, index, baseItem.regex, "full"); break; } case "sequence": { addMapItem(startRegex, index, baseItem.regex, "full"); addMapItem(endRegex, index, baseItem.regex, "full"); const sequence = baseItem.items; for (let i = 1; i < sequence.length; i++) { const startSequence = createSequenceEmojiRegexItem(sequence.slice(0, i)); addMapItem(startRegex, index, startSequence.regex, i); const endSequence = createSequenceEmojiRegexItem(sequence.slice(i)); addMapItem(endRegex, index, endSequence.regex, i); } break; } case "set": throw new Error("Unexpected set within a set"); default: assertNever(baseItem); } } let result; const checkResults = (target, type) => { for (const regex in target) { const item = target[regex]; if (!item.score) continue; if (!result || result.score < item.score) { result = { score: item.score, sequences: [{ type, slices: item.slices }] }; continue; } if (result.score === item.score) result.sequences.push({ type, slices: item.slices }); } }; checkResults(startRegex, "start"); checkResults(endRegex, "end"); return result; } /** * Merge similar sequences * * Accepts callback to run optimisation on created subset */ function mergeSimilarRegexItemSequences(items, merge, optimise) { const { type, slices } = merge; const indexes = /* @__PURE__ */ new Set(); let hasFullSequence = false; let longestMatch = 0; let longestMatchIndex = -1; const differentSequences = []; for (let i = 0; i < slices.length; i++) { const { index, slice } = slices[i]; const item = items[index]; let length; if (slice === "full") { hasFullSequence = true; if (item.type === "sequence") length = item.items.length; else length = 1; } else { if (item.type !== "sequence") throw new Error(`Unexpected partial match for type "${item.type}"`); length = type === "start" ? slice : item.items.length - slice; differentSequences.push(type === "start" ? item.items.slice(slice) : item.items.slice(0, slice)); } if (length > longestMatch) { longestMatchIndex = index; longestMatch = length; } indexes.add(index); } if (longestMatch < 1 || longestMatchIndex < 0) throw new Error("Cannot find common sequence"); const commonItem = items[longestMatchIndex]; let sequence; if (commonItem.type !== "sequence") { if (longestMatch !== 1) throw new Error("Something went wrong. Cannot have long match in non-sequence"); sequence = [commonItem]; } else sequence = type === "start" ? commonItem.items.slice(0, longestMatch) : commonItem.items.slice(commonItem.items.length - longestMatch); const setItems = []; for (let i = 0; i < differentSequences.length; i++) { const list = differentSequences[i]; if (list.length === 1) setItems.push(list[0]); else setItems.push(createSequenceEmojiRegexItem(list)); } const set = createSetEmojiRegexItem(setItems); let mergedChunk = set.sets.length === 1 ? set.sets[0] : optimise ? optimise(set) : set; if (hasFullSequence) mergedChunk = createOptionalEmojiRegexItem(mergedChunk); sequence[type === "start" ? "push" : "unshift"](mergedChunk); const results = [createSequenceEmojiRegexItem(sequence), ...items.filter((item, index) => !indexes.has(index))]; return results; } /** * Merge similar items */ function mergeSimilarItemsInSet(set) { const updatedSet = optimiseNumbersSet(set); if (updatedSet.type !== "set") return updatedSet; set = updatedSet; let merges; while (merges = findSimilarRegexItemSequences(set.sets)) { const sequences = merges.sequences; if (sequences.length === 1) { const merged = mergeSimilarRegexItemSequences(set.sets.map((item) => cloneEmojiRegexItem(item, true)), sequences[0], mergeSimilarItemsInSet); if (merged.length === 1) return merged[0]; set = createSetEmojiRegexItem(merged); continue; } let newItem; for (let i = 0; i < sequences.length; i++) { const merged = mergeSimilarRegexItemSequences(set.sets.map((item) => cloneEmojiRegexItem(item, true)), sequences[i], mergeSimilarItemsInSet); const mergedItem = merged.length === 1 ? merged[0] : createSetEmojiRegexItem(merged); if (!newItem || mergedItem.regex.length < newItem.regex.length) newItem = mergedItem; } if (!newItem) throw new Error("Empty sequences list"); if (newItem.type !== "set") return newItem; set = newItem; } return set; } export { findSimilarRegexItemSequences, mergeSimilarItemsInSet, mergeSimilarRegexItemSequences };