diff --git a/pom.xml b/pom.xml
index 52adb1c..0fb13ab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,6 +14,26 @@
pdfbox
2.0.29
+
+ org.apache.poi
+ poi-ooxml
+ 5.2.3
+
+
+ org.apache.poi
+ poi
+ 5.2.3
+
+
+ org.apache.xmlbeans
+ xmlbeans
+ 5.1.1
+
+
+ org.apache.logging.log4j
+ log4j-core
+ 2.18.0
+
diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java
index d98f24c..9b0b100 100644
--- a/src/main/java/domain/FileLoader.java
+++ b/src/main/java/domain/FileLoader.java
@@ -14,14 +14,14 @@ public class FileLoader {
public FileLoader() {
this.inputFile = null;
}
-
+ //KI erstellte Methode
public File loadFileGUI() {
try {
JFileChooser fileChooser = new JFileChooser();
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); //selbst hinzugefügt
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); //selbst hinzugefügt
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); //selbst hinzugefügt
int result = fileChooser.showOpenDialog(null);
if (result == JFileChooser.APPROVE_OPTION) {
diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java
index ea4b6d5..c6b2e20 100644
--- a/src/main/java/domain/TextProcessing.java
+++ b/src/main/java/domain/TextProcessing.java
@@ -2,6 +2,12 @@ package domain;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFTextShape;
import java.io.*;
import java.util.HashMap;
@@ -9,31 +15,44 @@ import java.util.HashMap;
public class TextProcessing {
public String formatToText(File file, String format) {
- PDDocument document;
try {
+ StringBuilder text = new StringBuilder();
if (file != null) {
switch (format) {
case "txt":
-
- break;
-
+ FileReader fileReader = new FileReader(file);
+ BufferedReader reader = new BufferedReader(fileReader);
+ String line;
+ while((line = reader.readLine()) != null) {
+ text.append(line).append("\n");
+ }
+ return text.toString();
case "pdf":
- document = PDDocument.load(file);
+ PDDocument document = PDDocument.load(file);
PDFTextStripper pdfStripper = new PDFTextStripper();
return pdfStripper.getText(document);
case "docx":
-
- break;
-
+ XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
+ for(XWPFParagraph paragraph : officeDocument.getParagraphs()) {
+ text.append(paragraph.getText()).append("\n");
+ }
+ return text.toString();
case "pptx":
-
- break;
+ XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
+ for (XSLFSlide slide : ppt.getSlides()) {
+ for (XSLFShape shape : slide.getShapes()) {
+ if (shape instanceof XSLFTextShape) {
+ text.append(((XSLFTextShape) shape).getText()).append("\n");
+ }
+ }
+ }
+ return text.toString();
}
}
}
- catch (IOException ex) {
- throw new RuntimeException(ex);
+ catch (IOException e) {
+ throw new RuntimeException(e);
}
return "Nothing found!";
}
@@ -53,18 +72,20 @@ public class TextProcessing {
public HashMap tokenizingText(String text){
HashMap filteredWords = new HashMap<>();
try {
+ if(!text.isEmpty()) {
//Tokenizing der Wörter
- String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
+ String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
String[] textWords = text.split(splitter);
- for(String word : textWords){
+ for (String word : textWords) {
if (filteredWords.containsKey(word)) {
filteredWords.compute(word, (k, counter) -> counter + 1);
- }
- else {
+ } else {
filteredWords.put(word, 1);
}
}
- } catch (Exception ex) {
+ }
+ }
+ catch (Exception ex) {
throw new RuntimeException(ex);
}
return filteredWords;