package wspp; import java.io.*; import java.nio.charset.StandardCharsets; public class WordScanner { private BufferedReader br; private String line = null; private int pos = 0; public WordScanner(String fileName) throws IOException { br = new BufferedReader(new InputStreamReader( new FileInputStream(fileName), StandardCharsets.UTF_8)); } private boolean hasNextLine() throws IOException { if (line != null && pos < line.length()) return true; line = br.readLine(); pos = 0; return line != null; } private boolean isWordChar(char c) { return Character.isLetter(c) || c == '\'' || Character.getType(c) == Character.DASH_PUNCTUATION; } public boolean hasNextWord() throws IOException { while (hasNextLine()) { while (pos < line.length() && !isWordChar(line.charAt(pos))) { pos++; } if (pos < line.length()) { return true; } } return false; } public String nextWord() { while (pos < line.length() && !isWordChar(line.charAt(pos))) { pos++; } int start = pos; while (pos < line.length() && isWordChar(line.charAt(pos))) { pos++; } return line.substring(start, pos).toLowerCase(); } public void close() throws IOException { br.close(); } }