-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdfrects.h
298 lines (223 loc) · 10 KB
/
pdfrects.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/*
* pdfrects.h
*
* functions on poppler rectangles:
*
* - functions on individual rectangles (containment, join, etc.)
* - functions for rectangle lists representing the union of their areas
* - functions for blocks of text in a page
* - functions for short, recurring blocks of text
*/
#ifdef _PDFRECTS_H
#else
#define _PDFRECTS_H
/*
* functions on individual rectangles
*/
/* print a rectangle */
void rectangle_print(FILE *, PopplerRectangle *);
void rectangle_printyaml(FILE *, char *first, char *indent, PopplerRectangle *);
/* parse a rectangle */
PopplerRectangle *rectangle_parse(char *s);
/* normalize a rectangle: x1 <= x2 and y1 <= y2 */
void rectangle_normalize(PopplerRectangle *);
/* width, height and area of a rectangle */
double rectangle_width(PopplerRectangle *);
double rectangle_height(PopplerRectangle *);
double rectangle_area(PopplerRectangle *r);
/* check if two rectangles are the same */
gboolean rectangle_hequal(PopplerRectangle *a, PopplerRectangle *b);
gboolean rectangle_vequal(PopplerRectangle *a, PopplerRectangle *b);
gboolean rectangle_equal(PopplerRectangle *a, PopplerRectangle *b);
/* check whether the first rectangle contains the second */
gboolean rectangle_hcontain(PopplerRectangle *, PopplerRectangle *);
gboolean rectangle_vcontain(PopplerRectangle *, PopplerRectangle *);
gboolean rectangle_contain(PopplerRectangle *, PopplerRectangle *);
/* check if rectangles overlap */
gboolean rectangle_hoverlap(PopplerRectangle *, PopplerRectangle *);
gboolean rectangle_voverlap(PopplerRectangle *, PopplerRectangle *);
gboolean rectangle_overlap(PopplerRectangle *, PopplerRectangle *);
/* check if rectangles touch (meet or overlap) */
gboolean rectangle_htouch(PopplerRectangle *a, PopplerRectangle *b);
gboolean rectangle_vtouch(PopplerRectangle *a, PopplerRectangle *b);
gboolean rectangle_touch(PopplerRectangle *, PopplerRectangle *);
/* horizontal and vertical distance between rectangles */
gdouble rectangle_hdistance(PopplerRectangle *a, PopplerRectangle *b);
gdouble rectangle_vdistance(PopplerRectangle *a, PopplerRectangle *b);
/* copy and swap rectangles */
void rectangle_copy(PopplerRectangle *dest, PopplerRectangle *orig);
void rectangle_swap(PopplerRectangle *a, PopplerRectangle *b);
/* shift and expand a rectangle */
void rectangle_shift(PopplerRectangle *, gdouble x, gdouble y);
void rectangle_expand(PopplerRectangle *, gdouble dx, gdouble dy);
/* make the first rectangle the intersection of the other two */
void rectangle_intersect(PopplerRectangle *r,
PopplerRectangle *a, PopplerRectangle *b);
/* join rectangles: the first becomes the smallest rectangle containing both */
void rectangle_join(PopplerRectangle *, PopplerRectangle *);
/* compare the position of two rectangles */
int rectangle_hcompare(PopplerRectangle *a, PopplerRectangle *b);
int rectangle_vcompare(PopplerRectangle *a, PopplerRectangle *b);
int rectangle_compare(PopplerRectangle *, PopplerRectangle *);
/* compare the area of two rectangles */
int rectangle_areacompare(PopplerRectangle *a, PopplerRectangle *b);
/* a rectangle as large as the page */
void rectangle_page(PopplerPage *page, PopplerRectangle *rect);
/*
* functions on lists of rectangles
*/
#define MAXRECT 4096
typedef struct {
/* public */
PopplerRectangle *rect;
gint num;
/* private */
gint max;
} RectangleList;
/*
* minimal size for both dimensions of a rectangle and for each
*/
typedef struct {
gdouble both;
gdouble each;
} RectangleBound;
/* allocate a list with maximum number of elements, currently none */
RectangleList *rectanglelist_new(int);
/* make a copy of a rectangle list */
RectangleList *rectanglelist_copy(RectangleList *src);
/* thighten a rectangle list by deallocating the unused entries */
void rectanglelist_tighten(RectangleList *);
/* free a rectangle list */
void rectanglelist_free(RectangleList *);
/* print a rectangle list */
void rectanglelist_print(FILE *, RectangleList *);
void rectanglelist_printyaml(FILE *, char *first, char *indent,
RectangleList *);
/* remove a rectangle from a list */
void rectanglelist_delete(RectangleList *, gint);
/* append a rectangle to a list */
void rectanglelist_append(RectangleList *rl, PopplerRectangle *rect);
/* add a rectangle to a list, if not redundant */
gboolean rectanglelist_add(RectangleList *, PopplerRectangle *);
/* smallest rectangle enclosing all in a rectangle list */
PopplerRectangle *rectanglelist_joinall(RectangleList *);
/* horizontal or vertical extents of a rectangle list */
RectangleList *rectanglelist_hextents(RectangleList *);
RectangleList *rectanglelist_vextents(RectangleList *);
/* total width and height of a rectangle list */
double rectanglelist_sumwidth(RectangleList *rl);
double rectanglelist_sumheight(RectangleList *rl);
/* average width and height of a rectangle list */
double rectanglelist_averagewidth(RectangleList *rl);
double rectanglelist_averageheight(RectangleList *rl);
/* index of first rectangle in list in a relation to another rectangle */
gint rectanglelist_contain(RectangleList *, PopplerRectangle *);
gint rectanglelist_touch(RectangleList *, PopplerRectangle *);
gint rectanglelist_overlap(RectangleList *, PopplerRectangle *);
/* sort a rectangle list by position */
void rectanglelist_quicksort(RectangleList *, PopplerPage *);
void rectanglelist_twosort(RectangleList *, PopplerPage *);
void rectanglelist_charsort(RectangleList *, PopplerPage *);
/* find the largest rectangle in a list or sort by area */
PopplerRectangle *rectanglelist_largest(RectangleList *);
void rectanglelist_areasort(RectangleList *);
/* position a rectangle in a page partially filled by others */
gboolean rectanglelist_place(PopplerRectangle *page,
RectangleList *rl, PopplerRectangle *r,
PopplerRectangle *moved);
/* subtract a rectangle list from another: orig -= sub */
gboolean rectanglelist_subtract(RectangleList **orig, RectangleList *sub,
PopplerRectangle *cont, RectangleBound *b);
/* subtract a rectangle list from a single rectangle: res = r - rl */
RectangleList *rectanglelist_subtract1(PopplerRectangle *r, RectangleList *rl,
PopplerRectangle *cont, RectangleBound *b);
/*
* functions on text-enclosing rectangles
* rectangles of white spaces ' ' are made 0-width
*/
/* debug */
extern int debugtextrectangles;
/* the rectangles of the single characters in the page */
RectangleList *rectanglelist_characters(PopplerPage *);
/* area of text in a page */
RectangleList *rectanglelist_textarea(PopplerPage *);
/* area of text in a page, with minimal distance considered a white space */
RectangleList *rectanglelist_textarea_distance(PopplerPage *, gdouble);
/* bounding box of the page or document (NULL if no text) */
PopplerRectangle *rectanglelist_boundingbox(PopplerPage *);
PopplerRectangle *rectanglelist_boundingbox_document(PopplerDocument *doc);
/* largest box in a page or document (NULL if no text) */
PopplerRectangle *rectanglelist_pagelargest(PopplerPage *page);
PopplerRectangle *rectanglelist_largest_document(PopplerDocument *doc);
/* list of squares of a grid that are painted in a page */
RectangleList *rectanglelist_painted(PopplerPage *page, int distance);
/* area of painted squares in a page, with minimal distance of white space */
RectangleList *rectanglelist_paintedarea_distance(PopplerPage *, gdouble);
/* bounding box of a page, based on painted squares */
PopplerRectangle *rectanglelist_boundingbox_painted(PopplerPage *page, int d);
/* list of rows in a page */
RectangleList *rectanglelist_rows(PopplerPage *page, gdouble distance);
/*
* functions for recurring blocks of text (page numbers, headers and footers)
*/
typedef struct {
int num;
int size;
struct {
int rank;
PopplerRectangle rect;
} rect[];
} RectangleVector;
/* debug areas often taken by short blocks of text */
extern int debugfrequent;
/* create an empty rectangle vector of a given size */
RectangleVector *rectanglevector_create(int size);
/* print a rectangle vector */
void rectanglevector_print(FILE *fd, RectangleVector *v);
void rectanglevector_printyaml(FILE *fd, char *first, char *indent,
RectangleVector *v);
/* make a rectangle list out of a rectangle vector */
RectangleList *rectanglevector_list(RectangleVector *c);
/* insert a rectangle in a vector */
void rectanglevector_insert(RectangleVector *v, int rank, PopplerRectangle *r);
/* add a rectangle to a frequency vector, allowing horizontal containment */
void rectanglevector_add(RectangleVector *v, PopplerRectangle *r);
/* rectangles often taken by short blocks of text */
RectangleList *rectanglevector_frequent(PopplerDocument *doc,
gdouble height, gdouble distance);
/* a rectangle as large as the page minus headers and footers */
PopplerRectangle *rectanglevector_main(PopplerDocument *doc,
RectangleList *recur, gdouble height, gdouble distance);
/*
* drawing-related functions
*/
/* use rectangle in cairo */
void rectangle_cairo(cairo_t *cr, PopplerRectangle *rect, gdouble enlarge);
/* draw a rectangle, possibly filled or enclosing */
void rectangle_draw(cairo_t *, PopplerRectangle *,
gboolean randomcolor, gboolean fill, gboolean enclosing);
/* draw a rectangle list, possibly numbering each */
void rectanglelist_draw(cairo_t *, RectangleList *,
gboolean fill, gboolean enclosing, gboolean num, gboolean inside);
/* apply the current transformation to a rectangle */
void rectangle_transform(cairo_t *cr, PopplerRectangle *r);
/* map a poppler rectangle into a cairo surface */
void rectangle_map_to_cairo(cairo_t *cr,
PopplerRectangle *dst, PopplerRectangle *src,
gboolean horizontal, gboolean vertical,
gboolean ratio, gboolean topalign, gboolean leftalign);
/* clip out all textarea rectangles containing any in the remove list */
void rectanglelist_clip_containing(cairo_t *cr, PopplerPage *page,
RectangleList *textarea, RectangleList *rm);
/*
* helper functions
*/
/* from name to paper size (pointer to statically allocated structure) */
PopplerRectangle *get_papersize(char *name);
/* default paper size, from /etc/papersize */
char *defaultpapersize();
/* from file name to uri */
char *filenametouri(char *);
/* turn file.pdf into file-suffix.pdf */
char *pdfaddsuffix(char *infile, char *suffix);
#endif