<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
- <component name="NewModuleRootManager" inherit-compiler-output="true">
+ <component name="NewModuleRootManager">
+ <output url="file://$MODULE_DIR$/resources/tools/fwstk/bin" />
+ <output-test url="file://$MODULE_DIR$/out/test/fluidbook_tools" />
<exclude-output />
<content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$/resources/tools/fwstk/project_resources" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/resources/tools/fwstk/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" packagePrefix="Fluidbook\Tools\" />
<excludeFolder url="file://$MODULE_DIR$/vendor/composer" />
<excludeFolder url="file://$MODULE_DIR$/vendor/brick/math" />
<excludeFolder url="file://$MODULE_DIR$/vendor/symfony/debug" />
<excludeFolder url="file://$MODULE_DIR$/vendor/maximebf/debugbar" />
<excludeFolder url="file://$MODULE_DIR$/vendor/barryvdh/laravel-debugbar" />
+ <excludeFolder url="file://$MODULE_DIR$/resources/tools/fwstk/bin" />
+ <excludeFolder url="file://$MODULE_DIR$/resources/tools/fwstk/out" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
+ <orderEntry type="module-library" exported="">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/avalon-framework-4.1.4.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/bcmail-jdk16-146.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/bcprov-jdk16-146.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/commons-io-2.6.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/commons-lang3-3.10.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/commons-logging-1.2.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/commons-text-1.8.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/fontbox-1.8.16.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/jempbox-1.8.16.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/resources/tools/fwstk/lib/pdfbox-1.8.16.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
</component>
</module>
\ No newline at end of file
import java.util.Calendar;
import java.util.List;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.StringEscapeUtils;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
public class Main {
- private static String version = "0.93";
- private static String date = "20110904";
+ private static String version = "0.94";
+ private static String date = "20210915";
static int threads = 1;
static String input = "";
static Float linkOffsetX = 0.0f;
static Float linkOffsetY = 0.0f;
public static void main(String[] args) throws IOException,
- COSVisitorException, ClassNotFoundException, NullPointerException,
- InterruptedException {
+ COSVisitorException, ClassNotFoundException, NullPointerException,
+ InterruptedException {
if (args.length < 0) {
printUsage();
printVersion();
return;
} else if (args[i].trim().compareTo("-h") == 0
- || args[i].trim().compareTo("--help") == 0) {
+ || args[i].trim().compareTo("--help") == 0) {
printUsage();
return;
} else if (args[i].trim().compareTo("--trim") == 0) {
stripper.process(nextPage, i);
Page layout = stripper.getLayout();
BufferedWriter out
- = new BufferedWriter(
- new OutputStreamWriter(
- new FileOutputStream(layoutOutput.replace("%d", ""
- + i)), "UTF-8"));
+ = new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(layoutOutput.replace("%d", ""
+ + i)), "UTF-8"));
out.write(layout.asJSON());
out.close();
}
private static Boolean getInfos(PDDocument doc) throws IOException, COSVisitorException {
ArrayList<String> res = new ArrayList<>();
+ res.add("Parsed by FWSTK: \t\t\t" + Main.version + " (" + Main.date + ")");
// General informations
String[] fields = {"Author", "Title", "CreationDate", "Creator",
- "Keywords", "ModificationDate", "Producer", "Subject",
- "Trapped", "Version"};
+ "Keywords", "ModificationDate", "Producer", "Subject",
+ "Trapped", "Version"};
PDDocumentInformation infos = doc.getDocumentInformation();
for (String k : fields) {
String v = infos.getCustomMetadataValue(k);
List<PDPage> list = doc.getDocumentCatalog().getAllPages();
int pages = list.size();
+
res.add("Pages:\t\t\t" + pages);
Boolean changes = false;
for (int i = 0; i < pages; i++) {
// Size
if (page.getRotation() != null
- && (page.getRotation() == 90 || page.getRotation() == 270)) {
+ && (page.getRotation() == 90 || page.getRotation() == 270)) {
res.add("Page " + numero + " size:\t\t"
- + Math.abs(cropBox.getHeight()) + " pts x "
- + Math.abs(cropBox.getWidth()) + " pts");
+ + Math.abs(cropBox.getHeight()) + " pts x "
+ + Math.abs(cropBox.getWidth()) + " pts");
// Boxes
res.add("Page " + numero + " CropBox:\t"
- + cropBox.getLowerLeftY() + "\t"
- + cropBox.getUpperRightX() + "\t"
- + cropBox.getUpperRightY() + "\t"
- + cropBox.getLowerLeftX() + "\t");
+ + cropBox.getLowerLeftY() + "\t"
+ + cropBox.getUpperRightX() + "\t"
+ + cropBox.getUpperRightY() + "\t"
+ + cropBox.getLowerLeftX() + "\t");
res.add("Page " + numero + " MediaBox:\t"
- + mediaBox.getLowerLeftY() + "\t"
- + mediaBox.getUpperRightX() + "\t"
- + mediaBox.getUpperRightY() + "\t"
- + mediaBox.getLowerLeftX() + "\t");
+ + mediaBox.getLowerLeftY() + "\t"
+ + mediaBox.getUpperRightX() + "\t"
+ + mediaBox.getUpperRightY() + "\t"
+ + mediaBox.getLowerLeftX() + "\t");
res.add("Page " + numero + " TrimBox:\t"
- + trimBox.getLowerLeftY() + "\t"
- + trimBox.getUpperRightX() + "\t"
- + trimBox.getUpperRightY() + "\t"
- + trimBox.getLowerLeftX() + "\t");
+ + trimBox.getLowerLeftY() + "\t"
+ + trimBox.getUpperRightX() + "\t"
+ + trimBox.getUpperRightY() + "\t"
+ + trimBox.getLowerLeftX() + "\t");
} else {
res.add("Page " + numero + " size:\t\t"
- + Math.abs(cropBox.getWidth()) + " pts x "
- + Math.abs(cropBox.getHeight()) + " pts");
+ + Math.abs(cropBox.getWidth()) + " pts x "
+ + Math.abs(cropBox.getHeight()) + " pts");
// Boxes
res.add("Page " + numero + " CropBox:\t"
- + cropBox.getLowerLeftX() + "\t"
- + cropBox.getUpperRightY() + "\t"
- + cropBox.getUpperRightX() + "\t"
- + cropBox.getLowerLeftY() + "\t");
+ + cropBox.getLowerLeftX() + "\t"
+ + cropBox.getUpperRightY() + "\t"
+ + cropBox.getUpperRightX() + "\t"
+ + cropBox.getLowerLeftY() + "\t");
res.add("Page " + numero + " MediaBox:\t"
- + mediaBox.getLowerLeftX() + "\t"
- + mediaBox.getUpperRightY() + "\t"
- + mediaBox.getUpperRightX() + "\t"
- + mediaBox.getLowerLeftY() + "\t");
+ + mediaBox.getLowerLeftX() + "\t"
+ + mediaBox.getUpperRightY() + "\t"
+ + mediaBox.getUpperRightX() + "\t"
+ + mediaBox.getLowerLeftY() + "\t");
res.add("Page " + numero + " TrimBox:\t"
- + trimBox.getLowerLeftX() + "\t"
- + trimBox.getUpperRightY() + "\t"
- + trimBox.getUpperRightX() + "\t"
- + trimBox.getLowerLeftY() + "\t");
+ + trimBox.getLowerLeftX() + "\t"
+ + trimBox.getUpperRightY() + "\t"
+ + trimBox.getUpperRightX() + "\t"
+ + trimBox.getLowerLeftY() + "\t");
}
}
}
for (String s : res) {
- System.out.println(s);
+ System.out.println(StringUtils.trim(s));
}
return changes;
}
}
res.add("NumberSectionsDelimiters:\t\t"
- + delimiters.substring(0, delimiters.length() - 1));
+ + delimiters.substring(0, delimiters.length() - 1));
}
private static void addBookmark(PDDocument doc, ArrayList<String> res,
PDOutlineNode bookmark, int level) throws IOException {
PDOutlineItem current = bookmark.getFirstChild();
while (current != null) {
- res.add("BookmarkTitle:\t\t" + current.getTitle().trim());
+ res.add("BookmarkTitle:\t\t" + StringEscapeUtils.escapeHtml4(current.getTitle()));
res.add("BookmarkLevel:\t\t" + level);
res.add("BookmarkPage:\t\t"
- + getPageFromAction(doc, current.getAction()));
+ + getPageFromAction(doc, current.getAction()));
addBookmark(doc, res, current, level + 1);
current = current.getNextSibling();
}
private static void cutDocument(PDDocument doc, String input,
String output, String cutmode) throws COSVisitorException,
- IOException {
+ IOException {
System.out.println("Cut document of " + doc.getNumberOfPages()
- + " with mode " + cutmode);
+ + " with mode " + cutmode);
ArrayList<PDDocument> copies = duplicatePages(doc, input, cutmode);
cutPages(doc, cutmode);
newbox.move(decalage, 0f);
System.out.println("Set cropbox of page " + page + " from "
- + pdfPage.getCropBox() + " to " + newbox + " (offset : "
- + decalage + ")");
+ + pdfPage.getCropBox() + " to " + newbox + " (offset : "
+ + decalage + ")");
pdfPage.setCropBox(newbox);
pdfPage.setMediaBox(newbox);
continue;
}
System.out.println("Duplicate page " + page + " :: cursor is at "
- + cursor);
+ + cursor);
// Duplicate page
for (int j = 0; j < duplicateTime; j++) {
List<PDPage> l = copies.get(j).getDocumentCatalog().getAllPages();
private void extractTexts(PDDocument doc, String textsOutput, String method,
Integer[] pages, String ignoredSeparators, String input) throws IOException, ClassNotFoundException,
- NullPointerException, InterruptedException {
+ NullPointerException, InterruptedException {
long s = Calendar.getInstance().getTimeInMillis();
PDDocument d;
int totalThreads = Math.max(1,
- Math.min(Math.round(pages.length / 50.0f), Main.threads));
+ Math.min(Math.round(pages.length / 50.0f), Main.threads));
System.out.println("Total threads " + totalThreads);
}
System.out.println("Extraction des textes with " + method + " : "
- + ((Calendar.getInstance().getTimeInMillis() - s) / 1000)
- + "s");
+ + ((Calendar.getInstance().getTimeInMillis() - s) / 1000)
+ + "s");
}
public static void updateCropBox(PDDocument doc, String output,
String refbox, Integer[] pages, String defined) throws IOException,
- COSVisitorException {
+ COSVisitorException {
System.out.println("updateCropBox");
if (!"".equals(defined)) {
updateCropBoxDefined(doc, defined);
}
private static void updateCropBoxDefined(PDDocument doc, String defined)
- throws IOException, COSVisitorException {
+ throws IOException, COSVisitorException {
String[] e = defined.split("*");
for (int i = 0; i < e.length; i++) {
String[] e1 = e[i].split(",");
}
public static void saveLinks(String file, ArrayList<Link> listLinks)
- throws IOException {
+ throws IOException {
FileIO out = new FileIO(file);
out.open("w");
out.output.writeBytes(Link.header());
}
public static ArrayList<Link> extractLinksOfPage(PDDocument doc, int pageNumber, PDPage p)
- throws IOException {
+ throws IOException {
System.out.println(pageNumber);
ArrayList<Link> listLinks = new ArrayList<>();
Link myLink;
System.out.println(link.getRectangle().getHeight());
myLink.rect = link.getRectangle();
if (myLink.rect.getWidth() == 0.0
- || myLink.rect.getHeight() == 0.0) {
+ || myLink.rect.getHeight() == 0.0) {
System.out.println("Skip link :: surface == 0");
continue;
}