Compare commits
25 Commits
de97058fe9
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 238f8048e0 | |||
| 231a37be9e | |||
| 3678af68c0 | |||
| d8154da7a9 | |||
| ddba0fc8b5 | |||
| dec9762ba2 | |||
| 380611c4df | |||
| 1e5c8fab61 | |||
| ab6616b619 | |||
| 6f5907b244 | |||
| 304f238a8a | |||
| b3dd5ae4c7 | |||
| 29c361725e | |||
| 868a206eb1 | |||
| 21d9d2c105 | |||
| 5ef52d793b | |||
| 5945b021f2 | |||
| e197eb2690 | |||
| 7b3f22216f | |||
| 6c42ebab03 | |||
| 8173cf4dab | |||
| 971119da82 | |||
| ab51c1a3dc | |||
| 826ab5a4d9 | |||
| 03dabc9dee |
@@ -19,4 +19,4 @@ jobs:
|
||||
|
||||
- name: Run Fast Reverse tests
|
||||
run: |
|
||||
java -ea -cp out reverse.FastReverseTest Base
|
||||
java -ea -cp out reverse.FastReverseTest Base 3233
|
||||
|
||||
@@ -19,4 +19,4 @@ jobs:
|
||||
|
||||
- name: Run Word Stat tests
|
||||
run: |
|
||||
java -ea -cp out wordStat.WordStatTest Base
|
||||
java -ea -cp out wordStat.WordStatTest Base 3233 3435 3637 3839 4142 4749
|
||||
|
||||
22
.gitea/workflows/wspp.yml
Normal file
22
.gitea/workflows/wspp.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Word Stat++ Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Compile Java
|
||||
run: |
|
||||
mkdir -p out
|
||||
javac -d out $(find java -name "*.java")
|
||||
|
||||
- name: Run Word Stat++ tests
|
||||
run: |
|
||||
java -ea -cp out wspp.WsppTest Base 3233
|
||||
20
README.md
20
README.md
@@ -279,7 +279,7 @@ include_toc: true
|
||||
|
||||
|
||||
Модификации
|
||||
* *Base*
|
||||
* *Base* ✅
|
||||
* Исходный код тестов: [FastReverseTest.java](java/reverse/FastReverseTest.java)
|
||||
* Откомпилированные тесты: [FastReverseTest.jar](artifacts/FastReverseTest.jar)
|
||||
* Аргументы командной строки: модификации
|
||||
@@ -311,7 +311,7 @@ include_toc: true
|
||||
и [закрывающими](https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/lang/Character.html#END_PUNCTUATION)
|
||||
скобками
|
||||
* Класс должен иметь имя `ReverseRotate`
|
||||
* *3233*
|
||||
* *3233* ✅
|
||||
* Выведите (в реверсивном порядке) только числа,
|
||||
у которых сумма номеров строки и столбца четная
|
||||
* Числа дополнительно могут разделяться
|
||||
@@ -341,7 +341,7 @@ include_toc: true
|
||||
|
||||
|
||||
Модификации
|
||||
* *Base*
|
||||
* *Base* ✅
|
||||
* Класс должен иметь имя `WordStat`
|
||||
* Исходный код тестов:
|
||||
[WordStatTest.java](java/wordStat/WordStatTest.java),
|
||||
@@ -349,10 +349,10 @@ include_toc: true
|
||||
[WordStatChecker.java](java/wordStat/WordStatChecker.java)
|
||||
* Откомпилированные тесты: [WordStatTest.jar](artifacts/WordStatTest.jar)
|
||||
* Аргументы командной строки: модификации
|
||||
* *FastSort*
|
||||
* *FastSort* ✅
|
||||
* Пусть _n_ – число слов во входном файле,
|
||||
тогда программа должна работать за O(_n_ log _n_).
|
||||
* *3637*
|
||||
* *3637* ✅
|
||||
* Назовём _серединой слова_ подстроку, полученную удалением
|
||||
первых и последних 3 символов слова.
|
||||
Слова длины меньшей 7 игнорируются.
|
||||
@@ -360,7 +360,7 @@ include_toc: true
|
||||
середины слов, встречающихся во входном файле,
|
||||
упорядоченные по возрастанию длины (при равенстве – по первому вхождению).
|
||||
* Класс должен иметь имя `WordStatLengthMiddle`
|
||||
* *3839*
|
||||
* *3839* ✅
|
||||
* Назовём _аффиксами слова_
|
||||
его префикс и суффикс длины `n / 2`, где `n` — длина слова.
|
||||
Слова длины один игнорируются.
|
||||
@@ -368,7 +368,7 @@ include_toc: true
|
||||
аффиксы слов, встречающихся во входном файле,
|
||||
упорядоченные по возрастанию длины (при равенстве – по первому вхождению).
|
||||
* Класс должен иметь имя `WordStatLengthAffix`
|
||||
* *3435*
|
||||
* *3435* ✅
|
||||
* Назовём _суффиксом слова_ подстроку,
|
||||
состоящую из `n / 2` последних символов слова, где `n` — длина слова.
|
||||
Слова длины один игнорируются.
|
||||
@@ -376,12 +376,12 @@ include_toc: true
|
||||
суффиксы слов, встречающихся во входном файле,
|
||||
упорядоченные по возрастанию длины (при равенстве – по первому вхождению).
|
||||
* Класс должен иметь имя `WordStatLengthSuffix`
|
||||
* *3233*
|
||||
* *3233* ✅
|
||||
* Выходной файл должен содержать все различные
|
||||
слова встречающиеся во входном файле,
|
||||
упорядоченные по возрастанию длины (при равенстве – по первому вхождению).
|
||||
* Класс должен иметь имя `WordStatLength`
|
||||
* *4142*
|
||||
* *4142* ✅
|
||||
* Назовём _серединой слова_ подстроку, полученную удалением
|
||||
первых и последних 3 символов слова.
|
||||
Слова длины меньшей 7 игнорируются.
|
||||
@@ -389,7 +389,7 @@ include_toc: true
|
||||
середины слов, встречающихся во входном файле,
|
||||
упорядоченные по возрастанию длины (при равенстве – по первому вхождению).
|
||||
* Класс должен иметь имя `WordStatLengthMiddle`
|
||||
* *4749*
|
||||
* *4749* ✅
|
||||
* Назовём _префиксом слова_ подстроку,
|
||||
состоящую из `n / 2` первых символов слова, где `n` — длина слова.
|
||||
Слова длины один игнорируются.
|
||||
|
||||
@@ -18,17 +18,28 @@ public class FastScanner {
|
||||
|
||||
boolean hasNextInt() {
|
||||
if (line == null) return false;
|
||||
while (pos < line.length() && Character.isWhitespace(line.charAt(pos))) pos++;
|
||||
return pos < line.length();
|
||||
while (pos < line.length() && (Character.isWhitespace(line.charAt(pos)) ||
|
||||
Character.getType(line.charAt(pos)) == Character.START_PUNCTUATION ||
|
||||
Character.getType(line.charAt(pos)) == Character.END_PUNCTUATION)) {
|
||||
pos++;
|
||||
}
|
||||
return pos < line.length() && (Character.isDigit(line.charAt(pos)) || line.charAt(pos) == '-');
|
||||
}
|
||||
|
||||
int nextInt() {
|
||||
while (pos < line.length() && Character.isWhitespace(line.charAt(pos))) pos++;
|
||||
while (pos < line.length() && (Character.isWhitespace(line.charAt(pos)) ||
|
||||
Character.getType(line.charAt(pos)) == Character.START_PUNCTUATION ||
|
||||
Character.getType(line.charAt(pos)) == Character.END_PUNCTUATION)) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
int start = pos;
|
||||
boolean negative = line.charAt(pos) == '-';
|
||||
if (negative) pos++;
|
||||
|
||||
while (pos < line.length() && Character.isDigit(line.charAt(pos))) pos++;
|
||||
while (pos < line.length() && Character.isDigit(line.charAt(pos))) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
for (int i = negative ? start + 1 : start; i < pos; i++) {
|
||||
|
||||
11
java/wordStat/WordInfo.java
Normal file
11
java/wordStat/WordInfo.java
Normal file
@@ -0,0 +1,11 @@
|
||||
public class WordInfo {
|
||||
String word;
|
||||
int count;
|
||||
int firstIndex;
|
||||
|
||||
WordInfo(String word, int count, int firstIndex) {
|
||||
this.word = word;
|
||||
this.count = count;
|
||||
this.firstIndex = firstIndex;
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,54 @@
|
||||
package wordStat;
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
public class WordStat {
|
||||
public static void main(String[] args) {
|
||||
System.out.println("hello world!");
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat inputFile outputFile");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(new FileReader(inputFileName));
|
||||
|
||||
LinkedHashMap<String, Integer> wordCount = new LinkedHashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) || c == '\'') {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (!sb.isEmpty()) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
wordCount.put(word, wordCount.getOrDefault(word, 0) + 1);
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, StandardCharsets.UTF_8);
|
||||
|
||||
for (Map.Entry<String, Integer> entry : wordCount.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
int value = entry.getValue();
|
||||
writer.println(key + " " + value);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
71
java/wordStat/WordStatLength.java
Normal file
71
java/wordStat/WordStatLength.java
Normal file
@@ -0,0 +1,71 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class WordStatLength {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat inputFile outputFile");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(new FileReader(inputFileName));
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) || c == '\'') {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(Comparator.comparingInt((WordInfo w) -> w.word.length())
|
||||
.thenComparingInt(w -> w.firstIndex));
|
||||
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
94
java/wordStat/WordStatLengthAffix.java
Normal file
94
java/wordStat/WordStatLengthAffix.java
Normal file
@@ -0,0 +1,94 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class WordStatLengthAffix {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat inputFile outputFile");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(new FileReader(inputFileName));
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) || c == '\'') {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(0, word.length() / 2);
|
||||
String suffix = word.substring(word.length() - word.length() / 2);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
if (wordMap.containsKey(suffix)) {
|
||||
wordMap.get(suffix).count++;
|
||||
} else {
|
||||
wordMap.put(suffix, new WordInfo(suffix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(0, word.length() / 2);
|
||||
String suffix = word.substring(word.length() - word.length() / 2);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
if (wordMap.containsKey(suffix)) {
|
||||
wordMap.get(suffix).count++;
|
||||
} else {
|
||||
wordMap.put(suffix, new WordInfo(suffix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(Comparator.comparingInt((WordInfo w) -> w.word.length())
|
||||
.thenComparingInt(w -> w.firstIndex));
|
||||
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
79
java/wordStat/WordStatLengthMiddle.java
Normal file
79
java/wordStat/WordStatLengthMiddle.java
Normal file
@@ -0,0 +1,79 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class WordStatLengthMiddle {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat inputFile outputFile");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(new FileReader(inputFileName));
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) || c == '\'') {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() >= 7) {
|
||||
word = word.substring(3, word.length() - 3);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() >= 7) {
|
||||
word = word.substring(3, word.length() - 3);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(Comparator.comparingInt((WordInfo w) -> w.word.length())
|
||||
.thenComparingInt(w -> w.firstIndex));
|
||||
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
79
java/wordStat/WordStatLengthPrefix.java
Normal file
79
java/wordStat/WordStatLengthPrefix.java
Normal file
@@ -0,0 +1,79 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class WordStatLengthPrefix {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat inputFile outputFile");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(new FileReader(inputFileName));
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) || c == '\'') {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(0, word.length() / 2);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
String prefix = word.substring(0, word.length() / 2);
|
||||
if (wordMap.containsKey(prefix)) {
|
||||
wordMap.get(prefix).count++;
|
||||
} else {
|
||||
wordMap.put(prefix, new WordInfo(prefix, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(Comparator.comparingInt((WordInfo w) -> w.word.length())
|
||||
.thenComparingInt(w -> w.firstIndex));
|
||||
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
79
java/wordStat/WordStatLengthSuffix.java
Normal file
79
java/wordStat/WordStatLengthSuffix.java
Normal file
@@ -0,0 +1,79 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class WordStatLengthSuffix {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("incorrect input!");
|
||||
System.err.println("usage: java WordStat inputFile outputFile");
|
||||
}
|
||||
|
||||
String inputFileName = args[0];
|
||||
String outputFileName = args[1];
|
||||
try {
|
||||
BufferedReader r = new BufferedReader(new FileReader(inputFileName));
|
||||
|
||||
Map<String, WordInfo> wordMap = new HashMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int wordIndex = 0;
|
||||
|
||||
int data = r.read();
|
||||
while (data != -1) {
|
||||
char c = (char) data;
|
||||
|
||||
if (Character.getType(c) == Character.DASH_PUNCTUATION ||
|
||||
Character.isLetter(c) || c == '\'') {
|
||||
sb.append(c);
|
||||
} else {
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
word = word.substring(word.length() - word.length() / 2);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
sb.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
data = r.read();
|
||||
}
|
||||
|
||||
|
||||
if (sb.length() > 0) {
|
||||
String word = sb.toString().toLowerCase();
|
||||
if (word.length() != 1) {
|
||||
word = word.substring(word.length() - word.length() / 2);
|
||||
if (wordMap.containsKey(word)) {
|
||||
wordMap.get(word).count++;
|
||||
} else {
|
||||
wordMap.put(word, new WordInfo(word, 1, wordIndex));
|
||||
wordIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
|
||||
List<WordInfo> sortedWords = new ArrayList<>(wordMap.values());
|
||||
sortedWords.sort(Comparator.comparingInt((WordInfo w) -> w.word.length())
|
||||
.thenComparingInt(w -> w.firstIndex));
|
||||
|
||||
|
||||
PrintWriter writer = new PrintWriter(outputFileName, "UTF-8");
|
||||
|
||||
for (WordInfo info : sortedWords) {
|
||||
writer.println(info.word + " " + info.count);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (Exception ex) {
|
||||
System.err.println("An error occured: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
28
java/wspp/IntList.java
Normal file
28
java/wspp/IntList.java
Normal file
@@ -0,0 +1,28 @@
|
||||
package wspp;
|
||||
|
||||
public class IntList {
|
||||
private int[] array;
|
||||
private int size;
|
||||
|
||||
public IntList() {
|
||||
array = new int[10];
|
||||
size = 0;
|
||||
}
|
||||
|
||||
public void add(int value) {
|
||||
if (size >= array.length) {
|
||||
int[] newArray = new int[array.length * 2];
|
||||
System.arraycopy(array, 0, newArray, 0, size);
|
||||
array = newArray;
|
||||
}
|
||||
array[size++] = value;
|
||||
}
|
||||
|
||||
public int get(int index) {
|
||||
return array[index];
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
}
|
||||
6
java/wspp/WordInfo.java
Normal file
6
java/wspp/WordInfo.java
Normal file
@@ -0,0 +1,6 @@
|
||||
package wspp;
|
||||
|
||||
public class WordInfo {
|
||||
int count;
|
||||
IntList positions;
|
||||
}
|
||||
74
java/wspp/WordScanner.java
Normal file
74
java/wspp/WordScanner.java
Normal file
@@ -0,0 +1,74 @@
|
||||
package wspp;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
public class WordScanner {
|
||||
private BufferedReader br;
|
||||
private String line = null;
|
||||
private int pos = 0;
|
||||
private int lineNumber = 0;
|
||||
|
||||
public WordScanner(String fileName) throws IOException {
|
||||
br = new BufferedReader(new InputStreamReader(
|
||||
new FileInputStream(fileName), StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
private boolean hasNextLine() throws IOException {
|
||||
if (line != null && pos < line.length()) return true;
|
||||
line = br.readLine();
|
||||
if (line != null) {
|
||||
lineNumber++;
|
||||
}
|
||||
pos = 0;
|
||||
return line != null;
|
||||
}
|
||||
|
||||
private boolean isWordChar(char c) {
|
||||
return Character.isLetter(c) || Character.isDigit(c) ||
|
||||
c == '\'' || c == '$' || c == '_' ||
|
||||
Character.getType(c) == Character.DASH_PUNCTUATION;
|
||||
}
|
||||
|
||||
public boolean hasNextWord() throws IOException {
|
||||
while (hasNextLine()) {
|
||||
while (pos < line.length() && !isWordChar(line.charAt(pos))) {
|
||||
pos++;
|
||||
}
|
||||
if (pos < line.length()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String nextWord() {
|
||||
while (pos < line.length() && !isWordChar(line.charAt(pos))) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
int start = pos;
|
||||
while (pos < line.length() && isWordChar(line.charAt(pos))) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
return line.substring(start, pos).toLowerCase();
|
||||
}
|
||||
|
||||
public int getLineNumber() {
|
||||
return lineNumber;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
br.close();
|
||||
}
|
||||
|
||||
public void reset(String fileName) throws IOException {
|
||||
br.close();
|
||||
br = new BufferedReader(new InputStreamReader(
|
||||
new FileInputStream(fileName), StandardCharsets.UTF_8));
|
||||
line = null;
|
||||
pos = 0;
|
||||
lineNumber = 0;
|
||||
}
|
||||
}
|
||||
64
java/wspp/Wspp.java
Normal file
64
java/wspp/Wspp.java
Normal file
@@ -0,0 +1,64 @@
|
||||
package wspp;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
public class Wspp {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("Usage: java Wspp inputFile outputFile");
|
||||
return;
|
||||
}
|
||||
|
||||
String inputFile = args[0];
|
||||
String outputFile = args[1];
|
||||
|
||||
try {
|
||||
WordScanner scanner = new WordScanner(inputFile);
|
||||
|
||||
Map<String, WordInfo> wordMap = new LinkedHashMap<>();
|
||||
int wordPosition = 1;
|
||||
|
||||
while (scanner.hasNextWord()) {
|
||||
String word = scanner.nextWord();
|
||||
|
||||
if (wordMap.containsKey(word)) {
|
||||
WordInfo info = wordMap.get(word);
|
||||
info.count++;
|
||||
info.positions.add(wordPosition);
|
||||
} else {
|
||||
WordInfo info = new WordInfo();
|
||||
info.count = 1;
|
||||
info.positions = new IntList();
|
||||
info.positions.add(wordPosition);
|
||||
wordMap.put(word, info);
|
||||
}
|
||||
|
||||
wordPosition++;
|
||||
}
|
||||
|
||||
scanner.close();
|
||||
|
||||
PrintWriter writer = new PrintWriter(
|
||||
new OutputStreamWriter(
|
||||
new FileOutputStream(outputFile), StandardCharsets.UTF_8));
|
||||
|
||||
for (Map.Entry<String, WordInfo> entry : wordMap.entrySet()) {
|
||||
String word = entry.getKey();
|
||||
WordInfo info = entry.getValue();
|
||||
|
||||
writer.print(word + " " + info.count);
|
||||
for (int i = 0; i < info.positions.size(); i++) {
|
||||
writer.print(" " + info.positions.get(i));
|
||||
}
|
||||
writer.println();
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (IOException e) {
|
||||
System.err.println("Error: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
92
java/wspp/WsppPos.java
Normal file
92
java/wspp/WsppPos.java
Normal file
@@ -0,0 +1,92 @@
|
||||
package wspp;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
public class WsppPos {
|
||||
public static void main(String[] args) {
|
||||
if (args.length != 2) {
|
||||
System.err.println("Usage: java WsppPos inputFile outputFile");
|
||||
return;
|
||||
}
|
||||
|
||||
String inputFile = args[0];
|
||||
String outputFile = args[1];
|
||||
|
||||
try {
|
||||
// Первый проход: считаем количество слов в каждой строке
|
||||
WordScanner scanner = new WordScanner(inputFile);
|
||||
Map<Integer, Integer> wordsPerLine = new HashMap<>();
|
||||
|
||||
while (scanner.hasNextWord()) {
|
||||
scanner.nextWord();
|
||||
int line = scanner.getLineNumber();
|
||||
wordsPerLine.put(line, wordsPerLine.getOrDefault(line, 0) + 1);
|
||||
}
|
||||
scanner.close();
|
||||
|
||||
// Второй проход: собираем статистику
|
||||
scanner = new WordScanner(inputFile);
|
||||
Map<String, WordInfo> wordMap = new LinkedHashMap<>();
|
||||
Map<Integer, Integer> currentPosInLine = new HashMap<>();
|
||||
|
||||
while (scanner.hasNextWord()) {
|
||||
String word = scanner.nextWord();
|
||||
int lineNum = scanner.getLineNumber();
|
||||
|
||||
// Позиция с начала строки
|
||||
int posInLine = currentPosInLine.getOrDefault(lineNum, 0) + 1;
|
||||
currentPosInLine.put(lineNum, posInLine);
|
||||
|
||||
// Пересчитываем в позицию с конца строки
|
||||
int totalWordsInLine = wordsPerLine.get(lineNum);
|
||||
int posFromEnd = totalWordsInLine - posInLine + 1;
|
||||
|
||||
if (wordMap.containsKey(word)) {
|
||||
WordInfo info = wordMap.get(word);
|
||||
info.count++;
|
||||
info.lineNumbers.add(lineNum);
|
||||
info.positions.add(posFromEnd);
|
||||
} else {
|
||||
WordInfo info = new WordInfo();
|
||||
info.count = 1;
|
||||
info.lineNumbers = new IntList();
|
||||
info.positions = new IntList();
|
||||
info.lineNumbers.add(lineNum);
|
||||
info.positions.add(posFromEnd);
|
||||
wordMap.put(word, info);
|
||||
}
|
||||
}
|
||||
|
||||
scanner.close();
|
||||
|
||||
// Запись результата
|
||||
PrintWriter writer = new PrintWriter(
|
||||
new OutputStreamWriter(
|
||||
new FileOutputStream(outputFile), StandardCharsets.UTF_8));
|
||||
|
||||
for (Map.Entry<String, WordInfo> entry : wordMap.entrySet()) {
|
||||
String word = entry.getKey();
|
||||
WordInfo info = entry.getValue();
|
||||
|
||||
writer.print(word + " " + info.count);
|
||||
for (int i = 0; i < info.lineNumbers.size(); i++) {
|
||||
writer.print(" " + info.lineNumbers.get(i) + ":" + info.positions.get(i));
|
||||
}
|
||||
writer.println();
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
} catch (IOException e) {
|
||||
System.err.println("Error: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
static class WordInfo {
|
||||
int count;
|
||||
IntList lineNumbers;
|
||||
IntList positions;
|
||||
}
|
||||
}
|
||||
1763
lectures/README.md
Normal file
1763
lectures/README.md
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user