package wspp; import base.ExtendedRandom; import base.Named; import base.Pair; import base.TestCounter; import wordStat.WordStatChecker; import wordStat.WordStatTester; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.IntFunction; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; /** * @author Georgiy Korneev (kgeorgiy@kgeorgiy.info) */ public final class WsppTester { private WsppTester() { } public static Consumer variant( final Named>> comparator, final Named> selector, final Named> extractor, final Named extra ) { // Stream "magic" code. You do not expect to understand it return counter -> WordStatChecker.test( counter, "Wspp" + comparator.name() + selector.name() + extractor.name() + extra.name(), text -> { final Map totals = Arrays.stream(text) .flatMap(Arrays::stream) .map(word -> word.toLowerCase(Locale.ROOT)) .collect(Collectors.toMap(Function.identity(), k -> 1, Integer::sum, LinkedHashMap::new)); final int[] lengths = Arrays.stream(text).mapToInt(a -> a.length).toArray(); final int[] sizes = new int[lengths.length + 1]; int start = 0; for (int i = 0; i < lengths.length; i++) { sizes[i] = start; start += lengths[i]; } sizes[lengths.length] = start; final Map selected = IntStream.range(0, text.length).boxed() .flatMap(r -> { final String[] line = text[r]; return IntStream.range(0, line.length).boxed() .collect(Collectors.groupingBy( w -> line[w].toLowerCase(Locale.ROOT), Collectors.collectingAndThen( Collectors.mapping( w -> extractor.value().select( r + 1, w + 1, line.length, sizes[r] + w + 1, sizes[lengths.length] ), Collectors.toUnmodifiableList() ), list -> selector.value() .apply(list.size()) .mapToObj(list::get) .toList() ) )) .entrySet().stream(); } ) .collect(Collectors.groupingBy( Map.Entry::getKey, Collectors.flatMapping( e -> e.getValue().stream(), Collectors.mapping( String::valueOf, Collectors.mapping(" "::concat, Collectors.joining()) ) ) )); return totals.entrySet().stream() .sorted(comparator.value()) .map(e -> Pair.of(e.getKey(), e.getValue() + selected.get(e.getKey()))) .collect(Collectors.toList()); }, checker -> { final Pattern pattern = Pattern.compile(new String(Base64.getDecoder().decode("W15ccHtJc0xldHRlcn0nXHB7UGR9" + extra.value()), StandardCharsets.US_ASCII) + "]+"); final String good = String.join("", pattern.split(WordStatTester.POST_LOWER)); checker.test(pattern, "To be, or not to be, that is the question:"); checker.test( pattern, "Monday's child is fair of face.", "Tuesday's child is full of grace." ); checker.test( pattern, "Шалтай-Болтай", "Сидел на стене.", "Шалтай-Болтай", "Свалился во сне." ); checker.randomTest(3, 10, 10, 3, ExtendedRandom.ENGLISH, WordStatChecker.SIMPLE_DELIMITERS); checker.randomTest(10, 3, 5, 5, ExtendedRandom.RUSSIAN, WordStatChecker.SIMPLE_DELIMITERS); checker.randomTest(3, 10, 10, 3, ExtendedRandom.GREEK, WordStatChecker.SIMPLE_DELIMITERS); checker.randomTest(3, 10, 10, 3, WordStatChecker.DASH, WordStatChecker.SIMPLE_DELIMITERS); checker.randomTest(3, 10, 10, 3, ExtendedRandom.ENGLISH, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(10, 3, 5, 5, ExtendedRandom.RUSSIAN, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(3, 10, 10, 3, ExtendedRandom.GREEK, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(3, 10, 10, 3, WordStatChecker.DASH, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(10, 20, 10, 3, good, WordStatChecker.SIMPLE_DELIMITERS); checker.randomTest(10, 20, 10, 3, good, WordStatChecker.ADVANCED_DELIMITERS); final int d = TestCounter.DENOMINATOR; final int d2 = TestCounter.DENOMINATOR2; checker.randomTest(100, 1000 / d, 1000 / d2, 1000 / d2, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(10, 1000 / d, 1000 / d2, 1000 / d2, good, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(10000 / d, 20, 10, 5, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS); checker.randomTest(1000000 / d, 2, 2, 1, WordStatChecker.ALL, WordStatChecker.ADVANCED_DELIMITERS); checker.test(pattern, WordStatTester.PRE_LOWER); checker.test(pattern, WordStatTester.POST_LOWER); } ); } @FunctionalInterface public interface Extractor { T select(int l, int li, int lt, int gi, int gt); } }