<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
- <list default="true" id="f146bc67-2578-4de3-9db2-94d2d43e9e83" name="Default" comment="wip #5410" />
+ <list default="true" id="f146bc67-2578-4de3-9db2-94d2d43e9e83" name="Default" comment="wip #5410">
+ <change beforePath="$PROJECT_DIR$/../../../src/PDFTools.php" beforeDir="false" afterPath="$PROJECT_DIR$/../../../src/PDFTools.php" afterDir="false" />
+ </list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<recent name="H:\Works\cubeExtranet\fluidbook\tools\fwstk" />
</key>
</component>
- <component name="RunManager" selected="Application.extract texts">
+ <component name="RunManager" selected="Application.extract layout">
<configuration default="true" type="Applet">
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" />
<method v="2">
<configuration name="extract layout" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME" value="com.fluidbook.fwstk.Main" />
<module name="fwstk" />
- <option name="PROGRAM_PARAMETERS" value="--input C:\Users\Vincent\Desktop\original.pdf --layout C:\Users\Vincent\Desktop\loutres\p%d.fby --threads 1" />
- <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
+ <option name="PROGRAM_PARAMETERS" value="--input C:\Users\vince\Desktop\20929.pdf --mode robust --layout C:\Users\vince\Desktop\20929\p%d.fby --threads 1" />
<method v="2">
<option name="Make" enabled="true" />
</method>
<workItem from="1694157597994" duration="9552000" />
<workItem from="1694187452028" duration="14000" />
<workItem from="1694189811041" duration="768000" />
- <workItem from="1694545035743" duration="798000" />
+ <workItem from="1694545035743" duration="1398000" />
+ <workItem from="1694674242867" duration="907000" />
</task>
<task id="LOCAL-00001" summary="wip #1111 @0.5">
<created>1487172253077</created>
while (pagesIter.hasNext()) {
stripper = new LayoutStripper();
- stripper.setRobust(robust);
+ stripper.setSplitAllChars(robust);
stripper.setIgnoredSeparators(ignoredSeparators);
PDPage nextPage = (PDPage) pagesIter.next();
i++;
try {
LayoutStripper layoutStripper;
layoutStripper = new LayoutStripper();
- layoutStripper.setRobust(this.robust);
layoutStripper.setIgnoredSeparators(ignoredSeparators);
layoutStripper.process((PDPage) doc.getDocumentCatalog().getAllPages().get(i - 1), i);
String fhfile = file.replaceFirst("%s", "fh");
fbtext = html2text(fbtext);
- System.out.println("this.robust :: "+this.robust);
if (this.robust) {
fbtext=StringUtil.removeSpaces(fbtext);
}
*/
public class Group extends LayoutElement {
- LinkedList<Word> words;
- float size;
- float rotation;
- protected float spaceWidth;
- protected Word currentWord;
- protected LinkedList<Word> condensedWords;
- protected LinkedList<Letter> letters;
- protected Boolean wordsMade;
- String ignoredSeparators;
-
- public Group(float size, float spaceWidth, String ignoredSeparators, float rotation) {
- this.size = size;
- this.rotation = rotation;
-
- this.wordsMade = false;
-
- this.ignoredSeparators = ignoredSeparators;
-
- this.spaceWidth = spaceWidth;
- this.words = new LinkedList<>();
- this.letters = new LinkedList<>();
- }
-
- public String asJSON(PDRectangle cropbox, float y, float scaleX, float scaleY, float rotation) {
- makeWords();
-
- ArrayList<String> res = new ArrayList<String>();
- for (Word word : words) {
- if (word.isSeparator()) {
- continue;
- }
- String json = word.asJSON(cropbox, y, scaleX, scaleY, rotation);
- if ("".equals(json)) {
- continue;
- }
- res.add(json);
- }
- return String.join(",", res);
- }
-
- public String asText(PDRectangle cropbox) {
- makeWords();
- ArrayList<String> res = new ArrayList<String>();
- for (Word word : words) {
- String text = word.asText(cropbox);
- if ("".equals(text)) {
- continue;
- }
- res.add(text);
- }
- return String.join("", res);
- }
-
- public boolean equals(float size) {
- return size == this.size;
- }
-
- public void addText(float x, float y, float width, float height, String text) {
- Letter added;
-
- if (" ".equals(text)) {
- added = new Space(x, y, width, height);
- } else if (text.matches(StringUtil.separatorsRegexp(ignoredSeparators))) {
- added = new Separator(text, x, y, width, height);
- } else {
- added = new Letter(text, x, y, width, height);
- }
- letters.add(added);
- }
-
- protected void makeWords() {
- if (this.wordsMade) {
- return;
- }
- this.wordsMade = true;
- Word currentWord = new Word();
-
- for (int i = 0; i < letters.size(); i++) {
- Letter l = letters.get(i);
- Boolean good = currentWord.goodCandidate(l, spaceWidth, rotation);
- if (good) {
- // Si la lettre est un bon candidat
- currentWord.addLetter(l);
- } else {
- // Sinon, on clôt le mot,
- words.add(currentWord);
- // On en recrée un nouveau
- currentWord = new Word();
- // Et on ajoute la lettre
- currentWord.addLetter(l);
- if (l.isSeparator()) {
- // Si le mot ajouté est un espace, on ferme
- // le mot directement
- words.add(currentWord);
- currentWord = new Word();
- }
- }
- }
-
- words.add(currentWord);
- addMissingSpaces();
- }
-
- protected void addMissingSpaces() {
- LinkedList<Word> wordsWithSpaces = new LinkedList<>();
-
- for (int i = 0; i < words.size(); i++) {
- Word w = words.get(i);
- Word future = null;
- if (i + 1 < words.size()) {
- future = words.get(i + 1);
- }
-
- wordsWithSpaces.add(w);
-
- if (!w.isSeparator() && future != null && !future.isSeparator()) {
- // Si le mot en cours n'est pas un espace,
- // quel mot précédent n'en était pas un
- // et que le mot suivant existe
-
- // On ajoute un espace à la liste des mots
- spaceWidth = future.startX() - w.nextPosition();
- if (spaceWidth == 0.0f) {
- continue;
- }
-
- Space sp = new Space(w.nextPosition(), 0.0f, spaceWidth, 1.0f);
- Word spw = new Word();
- spw.addLetter(sp);
- wordsWithSpaces.add(spw);
- }
- }
- words = wordsWithSpaces;
- }
+ LinkedList<Word> words;
+ float size;
+ float rotation;
+ protected float spaceWidth;
+ protected Word currentWord;
+ protected LinkedList<Word> condensedWords;
+ protected LinkedList<Letter> letters;
+ protected Boolean wordsMade;
+ String ignoredSeparators;
+ protected boolean splitAllChars = false;
+
+ public Group(float size, float spaceWidth, boolean splitAllChars, String ignoredSeparators, float rotation) {
+ this.size = size;
+ this.rotation = rotation;
+
+ this.wordsMade = false;
+
+ this.splitAllChars = splitAllChars;
+ this.ignoredSeparators = ignoredSeparators;
+
+ this.spaceWidth = spaceWidth;
+ this.words = new LinkedList<>();
+ this.letters = new LinkedList<>();
+ }
+
+ public String asJSON(PDRectangle cropbox, float y, float scaleX, float scaleY, float rotation) {
+ makeWords();
+
+ ArrayList<String> res = new ArrayList<String>();
+ for (Word word : words) {
+ if (word.isSeparator()) {
+ continue;
+ }
+ String json = word.asJSON(cropbox, y, scaleX, scaleY, rotation);
+ if ("".equals(json)) {
+ continue;
+ }
+ res.add(json);
+ }
+ return String.join(",", res);
+ }
+
+ public String asText(PDRectangle cropbox) {
+ makeWords();
+ ArrayList<String> res = new ArrayList<String>();
+ for (Word word : words) {
+ String text = word.asText(cropbox);
+ if ("".equals(text)) {
+ continue;
+ }
+ res.add(text);
+ }
+ return String.join("", res);
+ }
+
+ public boolean equals(float size) {
+ return size == this.size;
+ }
+
+ public void addText(float x, float y, float width, float height, String text) {
+ Letter added;
+
+ if (" ".equals(text)) {
+ added = new Space(x, y, width, height);
+ } else if (text.matches(StringUtil.separatorsRegexp(ignoredSeparators))) {
+ added = new Separator(text, x, y, width, height);
+ } else {
+ added = new Letter(text, x, y, width, height);
+ }
+ letters.add(added);
+ }
+
+ protected void makeWords() {
+ if (this.wordsMade) {
+ return;
+ }
+ this.wordsMade = true;
+ Word currentWord = new Word();
+
+ for (int i = 0; i < letters.size(); i++) {
+ Letter l = letters.get(i);
+ if (!splitAllChars && currentWord.goodCandidate(l, spaceWidth, rotation)) {
+ // Si la lettre est un bon candidat
+ currentWord.addLetter(l);
+ } else {
+ // Sinon, on clôt le mot,
+ words.add(currentWord);
+ // On en recrée un nouveau
+ currentWord = new Word();
+ // Et on ajoute la lettre
+ currentWord.addLetter(l);
+ if (l.isSeparator()) {
+ // Si le mot ajouté est un espace, on ferme
+ // le mot directement
+ words.add(currentWord);
+ currentWord = new Word();
+ }
+ }
+ }
+
+ words.add(currentWord);
+ addMissingSpaces();
+ }
+
+ protected void addMissingSpaces() {
+ LinkedList<Word> wordsWithSpaces = new LinkedList<>();
+
+ for (int i = 0; i < words.size(); i++) {
+ Word w = words.get(i);
+ Word future = null;
+ if (i + 1 < words.size()) {
+ future = words.get(i + 1);
+ }
+
+ wordsWithSpaces.add(w);
+
+ if (!w.isSeparator() && future != null && !future.isSeparator()) {
+ // Si le mot en cours n'est pas un espace,
+ // quel mot précédent n'en était pas un
+ // et que le mot suivant existe
+
+ // On ajoute un espace à la liste des mots
+ spaceWidth = future.startX() - w.nextPosition();
+ if (spaceWidth == 0.0f) {
+ continue;
+ }
+
+ Space sp = new Space(w.nextPosition(), 0.0f, spaceWidth, 1.0f);
+ Word spw = new Word();
+ spw.addLetter(sp);
+ wordsWithSpaces.add(spw);
+ }
+ }
+ words = wordsWithSpaces;
+ }
}
protected PDPage currentPage;
protected String ignoredSeparators;
- protected boolean robust = false;
+ protected boolean splitAllChars = false;
public Page layout;
public LayoutStripper() throws IOException {
this.resetEngine();
this.currentPage = page;
- layout = new Page(currentPage, i, this.ignoredSeparators);
+ layout = new Page(currentPage, i,this.splitAllChars, this.ignoredSeparators);
PDResources resources = currentPage.findResources();
PDStream contents = null;
return c;
}
- public void setRobust(boolean robust) {
- this.robust = robust;
+ public void setSplitAllChars(boolean splitAllChars) {
+ this.splitAllChars = splitAllChars;
}
public Page getLayout() {
*/
public class Line extends LayoutElement {
- LinkedList<Group> groups;
- //.
- float y;
- float rotation;
- float scaleX;
- float scaleY;
- String ignoredSeparators;
+ LinkedList<Group> groups;
+ //.
+ float y;
+ float rotation;
+ float scaleX;
+ float scaleY;
+ String ignoredSeparators;
+ protected boolean splitAllChars = false;
- public Line(float y, float rotation, float scaleX, float scaleY, String ignoredSeparators) {
- this.groups = new LinkedList<>();
+ public Line(float y, float rotation, float scaleX, float scaleY, boolean splitAllChars, String ignoredSeparators) {
+ this.groups = new LinkedList<>();
- this.y = y;
- this.rotation = rotation;
- this.scaleX = scaleX;
- this.scaleY = scaleY;
- this.ignoredSeparators = ignoredSeparators;
- }
+ this.y = y;
+ this.rotation = rotation;
+ this.scaleX = scaleX;
+ this.scaleY = scaleY;
+ this.splitAllChars = splitAllChars;
+ this.ignoredSeparators = ignoredSeparators;
+ }
- public boolean equals(Line other) {
- return (y == other.y && rotation == other.rotation && scaleX == other.scaleX && scaleY == other.scaleY);
- }
+ public boolean equals(Line other) {
+ return (y == other.y && rotation == other.rotation && scaleX == other.scaleX && scaleY == other.scaleY);
+ }
- public boolean equals(float y, float rotation, float scaleX, float scaleY) {
- if (rotation == 0.0f) {
- return this.y == y && this.rotation == rotation && this.scaleX == scaleX && this.scaleY == scaleY;
- } else {
- return this.rotation == rotation && this.scaleX == scaleX && this.scaleY == scaleY;
- }
- }
+ public boolean equals(float y, float rotation, float scaleX, float scaleY) {
+ if (rotation == 0.0f) {
+ return this.y == y && this.rotation == rotation && this.scaleX == scaleX && this.scaleY == scaleY;
+ } else {
+ return this.rotation == rotation && this.scaleX == scaleX && this.scaleY == scaleY;
+ }
+ }
- public void addText(float size, float x, float y, float width, float height, String text, float spaceWidth) {
- Group group = getGroup(size, spaceWidth, rotation);
- group.addText(x, y, width, height, text);
- }
+ public void addText(float size, float x, float y, float width, float height, String text, float spaceWidth) {
+ Group group = getGroup(size, spaceWidth, rotation);
+ group.addText(x, y, width, height, text);
+ }
- public String asJSON(PDRectangle cropbox) {
- if (groups.size() == 0) {
- return "";
- }
- ArrayList<String> res = new ArrayList<>();
- for (Group group : groups) {
- String g = group.asJSON(cropbox, y, scaleX, scaleY, rotation);
- if (!"".equals(g)) {
- res.add(g);
- }
- }
- return String.join(",", res);
- }
+ public String asJSON(PDRectangle cropbox) {
+ if (groups.size() == 0) {
+ return "";
+ }
+ ArrayList<String> res = new ArrayList<>();
+ for (Group group : groups) {
+ String g = group.asJSON(cropbox, y, scaleX, scaleY, rotation);
+ if (!"".equals(g)) {
+ res.add(g);
+ }
+ }
+ return String.join(",", res);
+ }
- public String asText(PDRectangle cropbox){
- if (groups.size() == 0) {
- return "";
- }
- ArrayList<String> res = new ArrayList<>();
- for (Group group : groups) {
- String g = group.asText(cropbox);
- if (!"".equals(g)) {
- res.add(g);
- }
- }
- return StringEscapeUtils.escapeXml11(StringUtil.trim(StringUtil.condenseWhite(String.join("", res))));
- }
+ public String asText(PDRectangle cropbox) {
+ if (groups.size() == 0) {
+ return "";
+ }
+ ArrayList<String> res = new ArrayList<>();
+ for (Group group : groups) {
+ String g = group.asText(cropbox);
+ if (!"".equals(g)) {
+ res.add(g);
+ }
+ }
+ return StringEscapeUtils.escapeXml11(StringUtil.trim(StringUtil.condenseWhite(String.join("", res))));
+ }
- private Group getGroup( float size, float spaceWidth, float rotation) {
- if (groups.size() == 0 || !groups.getLast().equals(size)) {
- Group newGroup = new Group(size,spaceWidth, ignoredSeparators, rotation);
- groups.add(newGroup);
- return newGroup;
- }
- return groups.getLast();
- }
+ private Group getGroup(float size, float spaceWidth, float rotation) {
+ if (groups.size() == 0 || !groups.getLast().equals(size)) {
+ Group newGroup = new Group(size, spaceWidth,splitAllChars, ignoredSeparators, rotation);
+ groups.add(newGroup);
+ return newGroup;
+ }
+ return groups.getLast();
+ }
}
*/
public class Page extends LayoutElement {
- public int pageNumber;
- public LinkedList<Line> lines;
- protected HashMap<String, ColorSpace> _cs = new HashMap<>();
- protected PDPage page;
- protected PDRectangle cropbox;
- protected String ignoredSeparators;
-
- public Page(PDPage page, int pageNumber, String ignoredSeparators) {
- this.page = page;
- this.cropbox = page.findCropBox();
- this.ignoredSeparators = ignoredSeparators;
-
-
- this.pageNumber = pageNumber;
- this.lines = new LinkedList<>();
- }
-
- public void addText(PDGraphicsState gs, Matrix textLineMatrix, Matrix textMatrix, TextPosition textPosition, String text) throws IOException {
- PDTextState ts = gs.getTextState();
- float rotation = new CubeMatrix(textLineMatrix).getRotation();
- float size = textPosition.getFontSize() * textMatrix.getXScale();
- float y = round(cropbox.getUpperRightY() - textPosition.getTextPos().getYPosition() - cropbox.getLowerLeftY());
- float x = textPosition.getTextPos().getXPosition();
- float width = textPosition.getWidth();
- float height = textPosition.getHeight();
- if (width == 0.0f) {
- width = textPosition.getWidthDirAdj();
- }
-
- if (size == 0.0f) {
- System.out.println(text);
- }
-
- // Determine l'espace normal dans cette font
- float spaceWidth = textPosition.getWidthOfSpace();
-
- float lineScaleX = textLineMatrix.getXScale();
- float lineScaleY = textLineMatrix.getYScale();
-
- // On normalise les échelles
- float minScale = Math.abs(lineScaleX);
- lineScaleX /= minScale;
- lineScaleY /= minScale;
-
- Line line = getLine(y, rotation, lineScaleX, lineScaleY);
- line.addText(size, x, y, width, height, text, spaceWidth);
- }
-
- public String asJSON() {
- String res = "";
- res += "[";
- ArrayList<String> jsonLines = new ArrayList<String>();
- for (Line line : lines) {
- String lineJson = line.asJSON(cropbox);
- if ("".equals(lineJson)) {
- continue;
- }
- jsonLines.add(lineJson);
- }
- res += String.join(",", jsonLines);
- res += "]";
- return res;
- }
-
- public String asText() {
- ArrayList<String> textLines = new ArrayList<String>();
- for (Line line : lines) {
- String lineText = line.asText(this.cropbox);
- if ("".equals(lineText)) {
- continue;
- }
- textLines.add(lineText);
- }
- return String.join(" ", textLines);
- }
-
- public String asHTML() {
- ArrayList<String> textLines = new ArrayList<String>();
- for (Line line : lines) {
- String lineText = line.asText(this.cropbox);
- if ("".equals(lineText)) {
- continue;
- }
- textLines.add(lineText);
- }
- return "<div>\n\t<p>" +String.join("</p>\n\t<p>", textLines) + "</p>\n</div>";
- }
-
- protected Line getLine(float y, float rotation, float scaleX, float scaleY) {
- if (lines.size() == 0 || !lines.getLast().equals(y, rotation, scaleX, scaleY)) {
- Line newLine = new Line(y, rotation, scaleX, scaleY, ignoredSeparators);
- lines.add(newLine);
- return newLine;
- }
- return lines.getLast();
- }
-
- protected String parseColor(PDTextState ts, PDGraphicsState gs)
- throws IOException {
- PDColorState pcs;
-
- if (ts.getRenderingMode() == PDTextState.RENDERING_MODE_FILL_TEXT) {
- pcs = gs.getNonStrokingColor();
- } else if (ts.getRenderingMode() == PDTextState.RENDERING_MODE_STROKE_TEXT) {
- pcs = gs.getStrokingColor();
- } else if (ts.getRenderingMode() == PDTextState.RENDERING_MODE_NEITHER_FILL_NOR_STROKE_TEXT) {
- pcs = gs.getStrokingColor();
- } else {
- pcs = gs.getStrokingColor();
- }
-
- ColorSpace cs = getColorSpace(pcs.getColorSpace());
-
- float[] components = pcs.getJavaColor().getColorComponents(null);
- float[] componentsRGB = cs.toRGB(components);
-
- Color c = new Color(0, 0, 0);
-
- if (componentsRGB.length == 3) {
- c = new Color(componentsRGB[0], componentsRGB[1], componentsRGB[2]);
- } else if (components.length == 4) {
- c = new Color(componentsRGB[0], componentsRGB[1], componentsRGB[2],
- componentsRGB[3]);
- }
-
- String color = "#" + Integer.toHexString(c.getRGB());
- return color;
- }
-
- protected ColorSpace _loadColorSpace(String path) throws IOException {
- if (!_cs.containsKey(path)) {
- _cs.put(path,
- new ICC_ColorSpace(ICC_Profile.getInstance(ResourceLoader.loadResource(path))));
- }
-
- return _cs.get(path);
-
- }
-
- protected ColorSpace getColorSpace(PDColorSpace pdfCS) throws IOException {
- ColorSpace cs = pdfCS.getJavaColorSpace();
- if (pdfCS.getName().equals("DeviceCMYK")) {
- cs = _loadColorSpace("com/adobe/icc/cmyk/USWebCoatedSWOP.icc");
- }
-
- return cs;
-
- }
+ public int pageNumber;
+ public LinkedList<Line> lines;
+ protected HashMap<String, ColorSpace> _cs = new HashMap<>();
+ protected PDPage page;
+ protected PDRectangle cropbox;
+ protected String ignoredSeparators;
+ protected boolean splitAllChars = false;
+
+ public Page(PDPage page, int pageNumber, boolean splitAllChars, String ignoredSeparators) {
+ this.page = page;
+ this.cropbox = page.findCropBox();
+ this.ignoredSeparators = ignoredSeparators;
+ this.splitAllChars = splitAllChars;
+
+
+ this.pageNumber = pageNumber;
+ this.lines = new LinkedList<>();
+ }
+
+ public void addText(PDGraphicsState gs, Matrix textLineMatrix, Matrix textMatrix, TextPosition textPosition, String text) throws IOException {
+ PDTextState ts = gs.getTextState();
+ float rotation = new CubeMatrix(textLineMatrix).getRotation();
+ float size = textPosition.getFontSize() * textMatrix.getXScale();
+ float y = round(cropbox.getUpperRightY() - textPosition.getTextPos().getYPosition() - cropbox.getLowerLeftY());
+ float x = textPosition.getTextPos().getXPosition();
+ float width = textPosition.getWidth();
+ float height = textPosition.getHeight();
+ if (width == 0.0f) {
+ width = textPosition.getWidthDirAdj();
+ }
+
+ if (size == 0.0f) {
+ System.out.println(text);
+ }
+
+ // Determine l'espace normal dans cette font
+ float spaceWidth = textPosition.getWidthOfSpace();
+
+ float lineScaleX = textLineMatrix.getXScale();
+ float lineScaleY = textLineMatrix.getYScale();
+
+ // On normalise les échelles
+ float minScale = Math.abs(lineScaleX);
+ lineScaleX /= minScale;
+ lineScaleY /= minScale;
+
+ Line line = getLine(y, rotation, lineScaleX, lineScaleY);
+ line.addText(size, x, y, width, height, text, spaceWidth);
+ }
+
+ public String asJSON() {
+ String res = "";
+ res += "[";
+ ArrayList<String> jsonLines = new ArrayList<String>();
+ for (Line line : lines) {
+ String lineJson = line.asJSON(cropbox);
+ if ("".equals(lineJson)) {
+ continue;
+ }
+ jsonLines.add(lineJson);
+ }
+ res += String.join(",", jsonLines);
+ res += "]";
+ return res;
+ }
+
+ public String asText() {
+ ArrayList<String> textLines = new ArrayList<String>();
+ for (Line line : lines) {
+ String lineText = line.asText(this.cropbox);
+ if ("".equals(lineText)) {
+ continue;
+ }
+ textLines.add(lineText);
+ }
+ return String.join(" ", textLines);
+ }
+
+ public String asHTML() {
+ ArrayList<String> textLines = new ArrayList<String>();
+ for (Line line : lines) {
+ String lineText = line.asText(this.cropbox);
+ if ("".equals(lineText)) {
+ continue;
+ }
+ textLines.add(lineText);
+ }
+ return "<div>\n\t<p>" + String.join("</p>\n\t<p>", textLines) + "</p>\n</div>";
+ }
+
+ protected Line getLine(float y, float rotation, float scaleX, float scaleY) {
+ if (lines.size() == 0 || !lines.getLast().equals(y, rotation, scaleX, scaleY)) {
+ Line newLine = new Line(y, rotation, scaleX, scaleY,splitAllChars, ignoredSeparators);
+ lines.add(newLine);
+ return newLine;
+ }
+ return lines.getLast();
+ }
+
+ protected String parseColor(PDTextState ts, PDGraphicsState gs)
+ throws IOException {
+ PDColorState pcs;
+
+ if (ts.getRenderingMode() == PDTextState.RENDERING_MODE_FILL_TEXT) {
+ pcs = gs.getNonStrokingColor();
+ } else if (ts.getRenderingMode() == PDTextState.RENDERING_MODE_STROKE_TEXT) {
+ pcs = gs.getStrokingColor();
+ } else if (ts.getRenderingMode() == PDTextState.RENDERING_MODE_NEITHER_FILL_NOR_STROKE_TEXT) {
+ pcs = gs.getStrokingColor();
+ } else {
+ pcs = gs.getStrokingColor();
+ }
+
+ ColorSpace cs = getColorSpace(pcs.getColorSpace());
+
+ float[] components = pcs.getJavaColor().getColorComponents(null);
+ float[] componentsRGB = cs.toRGB(components);
+
+ Color c = new Color(0, 0, 0);
+
+ if (componentsRGB.length == 3) {
+ c = new Color(componentsRGB[0], componentsRGB[1], componentsRGB[2]);
+ } else if (components.length == 4) {
+ c = new Color(componentsRGB[0], componentsRGB[1], componentsRGB[2],
+ componentsRGB[3]);
+ }
+
+ String color = "#" + Integer.toHexString(c.getRGB());
+ return color;
+ }
+
+ protected ColorSpace _loadColorSpace(String path) throws IOException {
+ if (!_cs.containsKey(path)) {
+ _cs.put(path,
+ new ICC_ColorSpace(ICC_Profile.getInstance(ResourceLoader.loadResource(path))));
+ }
+
+ return _cs.get(path);
+
+ }
+
+ protected ColorSpace getColorSpace(PDColorSpace pdfCS) throws IOException {
+ ColorSpace cs = pdfCS.getJavaColorSpace();
+ if (pdfCS.getName().equals("DeviceCMYK")) {
+ cs = _loadColorSpace("com/adobe/icc/cmyk/USWebCoatedSWOP.icc");
+ }
+
+ return cs;
+
+ }
}
$fwstk->setArg('--ignoreSeparators "' . $ignoreSeparators . '"');
}
$fwstk->execute();
-
}
-
public static function extractHighlightsData($pdf, $out, $mode = 'standard')
{
$out .= 'texts';