1 # Licensed to the Apache Software Foundation (ASF) under one or more
2 # contributor license agreements. See the NOTICE file distributed with
3 # this work for additional information regarding copyright ownership.
4 # The ASF licenses this file to You under the Apache License, Version 2.0
5 # (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 # This table is maps PDF stream operators to concrete OperatorProcessor
17 # subclasses that are used by the PDFStreamEngine class to interpret the
18 # PDF document. The classes configured here allow the PDFTextStripper
19 # subclass of PDFStreamEngine to extract text content of the document.
21 BT = org.apache.pdfbox.util.operator.BeginText
22 cm = org.apache.pdfbox.util.operator.Concatenate
23 Do = org.apache.pdfbox.util.operator.Invoke
24 ET = org.apache.pdfbox.util.operator.EndText
25 gs = org.apache.pdfbox.util.operator.SetGraphicsStateParameters
26 q = org.apache.pdfbox.util.operator.GSave
27 Q = org.apache.pdfbox.util.operator.GRestore
28 T* = org.apache.pdfbox.util.operator.NextLine
29 Tc = org.apache.pdfbox.util.operator.SetCharSpacing
30 Td = org.apache.pdfbox.util.operator.MoveText
31 TD = org.apache.pdfbox.util.operator.MoveTextSetLeading
32 Tf = org.apache.pdfbox.util.operator.SetTextFont
33 Tj = org.apache.pdfbox.util.operator.ShowText
34 TJ = org.apache.pdfbox.util.operator.ShowTextGlyph
35 TL = org.apache.pdfbox.util.operator.SetTextLeading
36 Tm = org.apache.pdfbox.util.operator.SetMatrix
37 Tr = org.apache.pdfbox.util.operator.SetTextRenderingMode
38 Ts = org.apache.pdfbox.util.operator.SetTextRise
39 Tw = org.apache.pdfbox.util.operator.SetWordSpacing
40 Tz = org.apache.pdfbox.util.operator.SetHorizontalTextScaling
41 w = org.apache.pdfbox.util.operator.SetLineWidth
42 \' = org.apache.pdfbox.util.operator.MoveAndShow
43 \" = org.apache.pdfbox.util.operator.SetMoveAndShow
45 BDC = org.apache.pdfbox.util.operator.BeginMarkedContentSequenceWithProperties
46 BMC = org.apache.pdfbox.util.operator.BeginMarkedContentSequence
47 EMC = org.apache.pdfbox.util.operator.EndMarkedContentSequence
49 # The following operators are not relevant to text extraction,
50 # so we can silently ignore them.