From: vincent@cubedesigners.com Date: Sat, 30 Apr 2011 17:27:35 +0000 (+0000) Subject: (no commit message) X-Git-Url: http://git.cubedesigners.com/?a=commitdiff_plain;h=69291ec0477dfc4c9fd48d1b7b03adea968e45c1;p=cubeextranet.git --- diff --git a/fluidbook/tools/fwstk/project_resources/org/apache/pdfbox/resources/LayoutStripper.properties b/fluidbook/tools/fwstk/project_resources/org/apache/pdfbox/resources/LayoutStripper.properties new file mode 100644 index 000000000..360519a2f --- /dev/null +++ b/fluidbook/tools/fwstk/project_resources/org/apache/pdfbox/resources/LayoutStripper.properties @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This table is maps PDF stream operators to concrete OperatorProcessor +# subclasses that are used by the PDFStreamEngine class to interpret the +# PDF document. The classes configured here allow the PDFTextStripper +# subclass of PDFStreamEngine to extract text content of the document. + +BT = org.apache.pdfbox.util.operator.BeginText +cm = org.apache.pdfbox.util.operator.Concatenate +CS=org.apache.pdfbox.util.operator.SetStrokingColorSpace +cs=org.apache.pdfbox.util.operator.SetNonStrokingColorSpace +Do = org.apache.pdfbox.util.operator.Invoke +ET = org.apache.pdfbox.util.operator.EndText +gs = org.apache.pdfbox.util.operator.SetGraphicsStateParameters +G=org.apache.pdfbox.util.operator.SetStrokingGrayColor +g=org.apache.pdfbox.util.operator.SetNonStrokingGrayColor +q = org.apache.pdfbox.util.operator.GSave +Q = org.apache.pdfbox.util.operator.GRestore +K=org.apache.pdfbox.util.operator.SetStrokingCMYKColor +k=org.apache.pdfbox.util.operator.SetNonStrokingCMYKColor +RG=org.apache.pdfbox.util.operator.SetStrokingRGBColor +rg=org.apache.pdfbox.util.operator.SetNonStrokingRGBColor +SC=org.apache.pdfbox.util.operator.SetStrokingColor +sc=org.apache.pdfbox.util.operator.SetNonStrokingColor +SCN=org.apache.pdfbox.util.operator.SetStrokingColor +scn=org.apache.pdfbox.util.operator.SetNonStrokingColor +T* = org.apache.pdfbox.util.operator.NextLine +Tc = org.apache.pdfbox.util.operator.SetCharSpacing +Td = org.apache.pdfbox.util.operator.MoveText +TD = org.apache.pdfbox.util.operator.MoveTextSetLeading +Tf = org.apache.pdfbox.util.operator.SetTextFont +Tj = org.apache.pdfbox.util.operator.ShowText +TJ = org.apache.pdfbox.util.operator.ShowTextGlyph +TL = org.apache.pdfbox.util.operator.SetTextLeading +Tm = org.apache.pdfbox.util.operator.SetMatrix +Tr = org.apache.pdfbox.util.operator.SetTextRenderingMode +Ts = org.apache.pdfbox.util.operator.SetTextRise +Tw = org.apache.pdfbox.util.operator.SetWordSpacing +Tz = org.apache.pdfbox.util.operator.SetHorizontalTextScaling +w = org.apache.pdfbox.util.operator.SetLineWidth +\' = org.apache.pdfbox.util.operator.MoveAndShow +\" = org.apache.pdfbox.util.operator.SetMoveAndShow + +# The following operators are not relevant to text extraction, +# so we can silently ignore them. + +b +B +b* +B* +BDC +BI +BMC +BX +c +d +d0 +d1 +DP +El +EMC +EX +f +F +f* +h +i +ID +j +J +l +m +M +MP +n +re +ri +s +S +sh +v +W +W* +y