import java.io.*; import java.util.*; public class WordStatLengthPrefix { public static void main(String[] args) { if (args.length != 2) { System.err.println("incorrect input!"); System.err.println("usage: java WordStat inputFile outputFile"); } String inputFileName = args[0]; String outputFileName = args[1]; try { BufferedReader r = new BufferedReader(new FileReader(inputFileName)); Map wordMap = new HashMap<>(); StringBuilder sb = new StringBuilder(); int wordIndex = 0; int data = r.read(); while (data != -1) { char c = (char) data; if (Character.getType(c) == Character.DASH_PUNCTUATION || Character.isLetter(c) || c == '\'') { sb.append(c); } else { if (sb.length() > 0) { String word = sb.toString().toLowerCase(); if (word.length() != 1) { String prefix = word.substring(0, word.length() / 2); if (wordMap.containsKey(prefix)) { wordMap.get(prefix).count++; } else { wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex)); wordIndex++; } } sb.setLength(0); } } data = r.read(); } if (sb.length() > 0) { String word = sb.toString().toLowerCase(); if (word.length() != 1) { String prefix = word.substring(0, word.length() / 2); if (wordMap.containsKey(prefix)) { wordMap.get(prefix).count++; } else { wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex)); wordIndex++; } } } r.close(); List sortedWords = new ArrayList<>(wordMap.values()); sortedWords.sort(Comparator.comparingInt((WordInfo w) -> w.word.length()) .thenComparingInt(w -> w.firstIndex)); PrintWriter writer = new PrintWriter(outputFileName, "UTF-8"); for (WordInfo info : sortedWords) { writer.println(info.word + " " + info.count); } writer.close(); } catch (Exception ex) { System.err.println("An error occured: " + ex.getMessage()); } } }