169 lines
5.6 KiB
JavaScript
169 lines
5.6 KiB
JavaScript
import { cloneEmojiRegexItem, createOptionalEmojiRegexItem, createSequenceEmojiRegexItem, createSetEmojiRegexItem } from "./base.js";
|
|
import { optimiseNumbersSet } from "./numbers.js";
|
|
|
|
/**
|
|
* Typescript stuff
|
|
*/
|
|
function assertNever(v) {}
|
|
/**
|
|
* Find similar item sequences
|
|
*
|
|
* Returns sequence(s) with highest score. Only one of results should be
|
|
* applied to items. If there are multiple sequences, clone items list,
|
|
* attempt to apply each sequence, run further optimisations on each fork
|
|
* and see which one returns better result.
|
|
*
|
|
* Returns undefined if no common sequences found
|
|
*/
|
|
function findSimilarRegexItemSequences(items) {
|
|
const startRegex = Object.create(null);
|
|
const endRegex = Object.create(null);
|
|
const addMapItem = (target, index, regex, slice) => {
|
|
if (!target[regex]) {
|
|
target[regex] = {
|
|
score: 0,
|
|
slices: [{
|
|
index,
|
|
slice
|
|
}]
|
|
};
|
|
return;
|
|
}
|
|
const item = target[regex];
|
|
item.score += regex.length;
|
|
item.slices.push({
|
|
index,
|
|
slice
|
|
});
|
|
};
|
|
for (let index = 0; index < items.length; index++) {
|
|
const baseItem = items[index];
|
|
switch (baseItem.type) {
|
|
case "optional":
|
|
case "utf16": {
|
|
addMapItem(startRegex, index, baseItem.regex, "full");
|
|
addMapItem(endRegex, index, baseItem.regex, "full");
|
|
break;
|
|
}
|
|
case "sequence": {
|
|
addMapItem(startRegex, index, baseItem.regex, "full");
|
|
addMapItem(endRegex, index, baseItem.regex, "full");
|
|
const sequence = baseItem.items;
|
|
for (let i = 1; i < sequence.length; i++) {
|
|
const startSequence = createSequenceEmojiRegexItem(sequence.slice(0, i));
|
|
addMapItem(startRegex, index, startSequence.regex, i);
|
|
const endSequence = createSequenceEmojiRegexItem(sequence.slice(i));
|
|
addMapItem(endRegex, index, endSequence.regex, i);
|
|
}
|
|
break;
|
|
}
|
|
case "set": throw new Error("Unexpected set within a set");
|
|
default: assertNever(baseItem);
|
|
}
|
|
}
|
|
let result;
|
|
const checkResults = (target, type) => {
|
|
for (const regex in target) {
|
|
const item = target[regex];
|
|
if (!item.score) continue;
|
|
if (!result || result.score < item.score) {
|
|
result = {
|
|
score: item.score,
|
|
sequences: [{
|
|
type,
|
|
slices: item.slices
|
|
}]
|
|
};
|
|
continue;
|
|
}
|
|
if (result.score === item.score) result.sequences.push({
|
|
type,
|
|
slices: item.slices
|
|
});
|
|
}
|
|
};
|
|
checkResults(startRegex, "start");
|
|
checkResults(endRegex, "end");
|
|
return result;
|
|
}
|
|
/**
|
|
* Merge similar sequences
|
|
*
|
|
* Accepts callback to run optimisation on created subset
|
|
*/
|
|
function mergeSimilarRegexItemSequences(items, merge, optimise) {
|
|
const { type, slices } = merge;
|
|
const indexes = /* @__PURE__ */ new Set();
|
|
let hasFullSequence = false;
|
|
let longestMatch = 0;
|
|
let longestMatchIndex = -1;
|
|
const differentSequences = [];
|
|
for (let i = 0; i < slices.length; i++) {
|
|
const { index, slice } = slices[i];
|
|
const item = items[index];
|
|
let length;
|
|
if (slice === "full") {
|
|
hasFullSequence = true;
|
|
if (item.type === "sequence") length = item.items.length;
|
|
else length = 1;
|
|
} else {
|
|
if (item.type !== "sequence") throw new Error(`Unexpected partial match for type "${item.type}"`);
|
|
length = type === "start" ? slice : item.items.length - slice;
|
|
differentSequences.push(type === "start" ? item.items.slice(slice) : item.items.slice(0, slice));
|
|
}
|
|
if (length > longestMatch) {
|
|
longestMatchIndex = index;
|
|
longestMatch = length;
|
|
}
|
|
indexes.add(index);
|
|
}
|
|
if (longestMatch < 1 || longestMatchIndex < 0) throw new Error("Cannot find common sequence");
|
|
const commonItem = items[longestMatchIndex];
|
|
let sequence;
|
|
if (commonItem.type !== "sequence") {
|
|
if (longestMatch !== 1) throw new Error("Something went wrong. Cannot have long match in non-sequence");
|
|
sequence = [commonItem];
|
|
} else sequence = type === "start" ? commonItem.items.slice(0, longestMatch) : commonItem.items.slice(commonItem.items.length - longestMatch);
|
|
const setItems = [];
|
|
for (let i = 0; i < differentSequences.length; i++) {
|
|
const list = differentSequences[i];
|
|
if (list.length === 1) setItems.push(list[0]);
|
|
else setItems.push(createSequenceEmojiRegexItem(list));
|
|
}
|
|
const set = createSetEmojiRegexItem(setItems);
|
|
let mergedChunk = set.sets.length === 1 ? set.sets[0] : optimise ? optimise(set) : set;
|
|
if (hasFullSequence) mergedChunk = createOptionalEmojiRegexItem(mergedChunk);
|
|
sequence[type === "start" ? "push" : "unshift"](mergedChunk);
|
|
const results = [createSequenceEmojiRegexItem(sequence), ...items.filter((item, index) => !indexes.has(index))];
|
|
return results;
|
|
}
|
|
/**
|
|
* Merge similar items
|
|
*/
|
|
function mergeSimilarItemsInSet(set) {
|
|
const updatedSet = optimiseNumbersSet(set);
|
|
if (updatedSet.type !== "set") return updatedSet;
|
|
set = updatedSet;
|
|
let merges;
|
|
while (merges = findSimilarRegexItemSequences(set.sets)) {
|
|
const sequences = merges.sequences;
|
|
if (sequences.length === 1) {
|
|
const merged = mergeSimilarRegexItemSequences(set.sets.map((item) => cloneEmojiRegexItem(item, true)), sequences[0], mergeSimilarItemsInSet);
|
|
if (merged.length === 1) return merged[0];
|
|
set = createSetEmojiRegexItem(merged);
|
|
continue;
|
|
}
|
|
let newItem;
|
|
for (let i = 0; i < sequences.length; i++) {
|
|
const merged = mergeSimilarRegexItemSequences(set.sets.map((item) => cloneEmojiRegexItem(item, true)), sequences[i], mergeSimilarItemsInSet);
|
|
const mergedItem = merged.length === 1 ? merged[0] : createSetEmojiRegexItem(merged);
|
|
if (!newItem || mergedItem.regex.length < newItem.regex.length) newItem = mergedItem;
|
|
}
|
|
if (!newItem) throw new Error("Empty sequences list");
|
|
if (newItem.type !== "set") return newItem;
|
|
set = newItem;
|
|
}
|
|
return set;
|
|
}
|
|
|
|
export { findSimilarRegexItemSequences, mergeSimilarItemsInSet, mergeSimilarRegexItemSequences }; |