1 # Licensed to the Apache Software Foundation (ASF) under one or more
\r
2 # contributor license agreements. See the NOTICE file distributed with
\r
3 # this work for additional information regarding copyright ownership.
\r
4 # The ASF licenses this file to You under the Apache License, Version 2.0
\r
5 # (the "License"); you may not use this file except in compliance with
\r
6 # the License. You may obtain a copy of the License at
\r
8 # http://www.apache.org/licenses/LICENSE-2.0
\r
10 # Unless required by applicable law or agreed to in writing, software
\r
11 # distributed under the License is distributed on an "AS IS" BASIS,
\r
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 # See the License for the specific language governing permissions and
\r
14 # limitations under the License.
\r
16 # This table is maps PDF stream operators to concrete OperatorProcessor
\r
17 # subclasses that are used by the PDFStreamEngine class to interpret the
\r
18 # PDF document. The classes configured here allow the PDFTextStripper
\r
19 # subclass of PDFStreamEngine to extract text content of the document.
\r
21 BT = org.apache.pdfbox.util.operator.BeginText
\r
22 cm = org.apache.pdfbox.util.operator.Concatenate
\r
23 Do = org.apache.pdfbox.util.operator.Invoke
\r
24 ET = org.apache.pdfbox.util.operator.EndText
\r
25 gs = org.apache.pdfbox.util.operator.SetGraphicsStateParameters
\r
26 q = org.apache.pdfbox.util.operator.GSave
\r
27 Q = org.apache.pdfbox.util.operator.GRestore
\r
28 T* = org.apache.pdfbox.util.operator.NextLine
\r
29 Tc = org.apache.pdfbox.util.operator.SetCharSpacing
\r
30 Td = org.apache.pdfbox.util.operator.MoveText
\r
31 TD = org.apache.pdfbox.util.operator.MoveTextSetLeading
\r
32 Tf = org.apache.pdfbox.util.operator.SetTextFont
\r
33 Tj = org.apache.pdfbox.util.operator.ShowText
\r
34 TJ = org.apache.pdfbox.util.operator.ShowTextGlyph
\r
35 TL = org.apache.pdfbox.util.operator.SetTextLeading
\r
36 Tm = org.apache.pdfbox.util.operator.SetMatrix
\r
37 Tr = org.apache.pdfbox.util.operator.SetTextRenderingMode
\r
38 Ts = org.apache.pdfbox.util.operator.SetTextRise
\r
39 Tw = org.apache.pdfbox.util.operator.SetWordSpacing
\r
40 Tz = org.apache.pdfbox.util.operator.SetHorizontalTextScaling
\r
41 w = org.apache.pdfbox.util.operator.SetLineWidth
\r
42 \' = org.apache.pdfbox.util.operator.MoveAndShow
\r
43 \" = org.apache.pdfbox.util.operator.SetMoveAndShow
\r
45 # The following operators are not relevant to text extraction,
\r
46 # so we can silently ignore them.
\r