Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.06% |
101 / 103 |
|
93.33% |
14 / 15 |
CRAP | |
0.00% |
0 / 1 |
Document | |
98.06% |
101 / 103 |
|
93.33% |
14 / 15 |
35 | |
0.00% |
0 / 1 |
read | |
100.00% |
29 / 29 |
|
100.00% |
1 / 1 |
7 | |||
markOpening | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
markClosing | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
markBackslash | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
markNewline | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
flush | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
flushControl | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
flushText | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
setControl | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
pushText | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
parseControl | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
3 | |||
readParagraph | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
readStyle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
readSkip | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
readText | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | /** |
4 | * This file is part of PHPWord - A pure PHP library for reading and writing |
5 | * word processing documents. |
6 | * |
7 | * PHPWord is free software distributed under the terms of the GNU Lesser |
8 | * General Public License version 3 as published by the Free Software Foundation. |
9 | * |
10 | * For the full copyright and license information, please read the LICENSE |
11 | * file that was distributed with this source code. For the full list of |
12 | * contributors, visit https://github.com/PHPOffice/PHPWord/contributors. |
13 | * |
14 | * @see https://github.com/PHPOffice/PHPWord |
15 | * |
16 | * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3 |
17 | */ |
18 | |
19 | namespace PhpOffice\PhpWord\Reader\RTF; |
20 | |
21 | use PhpOffice\PhpWord\PhpWord; |
22 | use PhpOffice\PhpWord\SimpleType\Jc; |
23 | |
24 | /** |
25 | * RTF document reader. |
26 | * |
27 | * References: |
28 | * - How to Write an RTF Reader http://latex2rtf.sourceforge.net/rtfspec_45.html |
29 | * - PHP rtfclass by Markus Fischer https://github.com/mfn/rtfclass |
30 | * - JavaScript RTF-parser by LazyGyu https://github.com/lazygyu/RTF-parser |
31 | * |
32 | * @since 0.11.0 |
33 | * |
34 | * @SuppressWarnings(PHPMD.UnusedPrivateMethod) |
35 | */ |
36 | class Document |
37 | { |
38 | /** @const int */ |
39 | const PARA = 'readParagraph'; |
40 | const STYL = 'readStyle'; |
41 | const SKIP = 'readSkip'; |
42 | |
43 | /** |
44 | * PhpWord object. |
45 | * |
46 | * @var PhpWord |
47 | */ |
48 | private $phpWord; |
49 | |
50 | /** |
51 | * Section object. |
52 | * |
53 | * @var \PhpOffice\PhpWord\Element\Section |
54 | */ |
55 | private $section; |
56 | |
57 | /** |
58 | * Textrun object. |
59 | * |
60 | * @var \PhpOffice\PhpWord\Element\TextRun |
61 | */ |
62 | private $textrun; |
63 | |
64 | /** |
65 | * RTF content. |
66 | * |
67 | * @var string |
68 | */ |
69 | public $rtf; |
70 | |
71 | /** |
72 | * Content length. |
73 | * |
74 | * @var int |
75 | */ |
76 | private $length = 0; |
77 | |
78 | /** |
79 | * Character index. |
80 | * |
81 | * @var int |
82 | */ |
83 | private $offset = 0; |
84 | |
85 | /** |
86 | * Current control word. |
87 | * |
88 | * @var string |
89 | */ |
90 | private $control = ''; |
91 | |
92 | /** |
93 | * Text content. |
94 | * |
95 | * @var string |
96 | */ |
97 | private $text = ''; |
98 | |
99 | /** |
100 | * Parsing a control word flag. |
101 | * |
102 | * @var bool |
103 | */ |
104 | private $isControl = false; |
105 | |
106 | /** |
107 | * First character flag: watch out for control symbols. |
108 | * |
109 | * @var bool |
110 | */ |
111 | private $isFirst = false; |
112 | |
113 | /** |
114 | * Group groups. |
115 | * |
116 | * @var array |
117 | */ |
118 | private $groups = []; |
119 | |
120 | /** |
121 | * Parser flags; not used. |
122 | * |
123 | * @var array |
124 | */ |
125 | private $flags = []; |
126 | |
127 | /** |
128 | * Parse RTF content. |
129 | * |
130 | * - Marks controlling characters `{`, `}`, and `\` |
131 | * - Removes line endings |
132 | * - Builds control words and control symbols |
133 | * - Pushes every other character into the text queue |
134 | * |
135 | * @todo Use `fread` stream for scalability |
136 | */ |
137 | public function read(PhpWord $phpWord): void |
138 | { |
139 | $markers = [ |
140 | 123 => 'markOpening', // { |
141 | 125 => 'markClosing', // } |
142 | 92 => 'markBackslash', // \ |
143 | 10 => 'markNewline', // LF |
144 | 13 => 'markNewline', // CR |
145 | ]; |
146 | |
147 | $this->phpWord = $phpWord; |
148 | $this->section = $phpWord->addSection(); |
149 | $this->textrun = $this->section->addTextRun(); |
150 | $this->length = strlen($this->rtf); |
151 | |
152 | $this->flags['paragraph'] = true; // Set paragraph flag from the beginning |
153 | |
154 | // Walk each characters |
155 | while ($this->offset < $this->length) { |
156 | $char = $this->rtf[$this->offset]; |
157 | $ascii = ord($char); |
158 | |
159 | if (isset($markers[$ascii])) { // Marker found: {, }, \, LF, or CR |
160 | $markerFunction = $markers[$ascii]; |
161 | $this->$markerFunction(); |
162 | } else { |
163 | if (false === $this->isControl) { // Non control word: Push character |
164 | $this->pushText($char); |
165 | } else { |
166 | if (preg_match('/^[a-zA-Z0-9-]?$/', $char)) { // No delimiter: Buffer control |
167 | $this->control .= $char; |
168 | $this->isFirst = false; |
169 | } else { // Delimiter found: Parse buffered control |
170 | if ($this->isFirst) { |
171 | $this->isFirst = false; |
172 | } else { |
173 | if (' ' == $char) { // Discard space as a control word delimiter |
174 | $this->flushControl(true); |
175 | } |
176 | } |
177 | } |
178 | } |
179 | } |
180 | ++$this->offset; |
181 | } |
182 | $this->flushText(); |
183 | } |
184 | |
185 | /** |
186 | * Mark opening braket `{` character. |
187 | */ |
188 | private function markOpening(): void |
189 | { |
190 | $this->flush(true); |
191 | array_push($this->groups, $this->flags); |
192 | } |
193 | |
194 | /** |
195 | * Mark closing braket `}` character. |
196 | */ |
197 | private function markClosing(): void |
198 | { |
199 | $this->flush(true); |
200 | $this->flags = array_pop($this->groups); |
201 | } |
202 | |
203 | /** |
204 | * Mark backslash `\` character. |
205 | */ |
206 | private function markBackslash(): void |
207 | { |
208 | if ($this->isFirst) { |
209 | $this->setControl(false); |
210 | $this->text .= '\\'; |
211 | } else { |
212 | $this->flush(); |
213 | $this->setControl(true); |
214 | $this->control = ''; |
215 | } |
216 | } |
217 | |
218 | /** |
219 | * Mark newline character: Flush control word because it's not possible to span multiline. |
220 | */ |
221 | private function markNewline(): void |
222 | { |
223 | if ($this->isControl) { |
224 | $this->flushControl(true); |
225 | } |
226 | } |
227 | |
228 | /** |
229 | * Flush control word or text. |
230 | * |
231 | * @param bool $isControl |
232 | */ |
233 | private function flush($isControl = false): void |
234 | { |
235 | if ($this->isControl) { |
236 | $this->flushControl($isControl); |
237 | } else { |
238 | $this->flushText(); |
239 | } |
240 | } |
241 | |
242 | /** |
243 | * Flush control word. |
244 | * |
245 | * @param bool $isControl |
246 | */ |
247 | private function flushControl($isControl = false): void |
248 | { |
249 | if (1 === preg_match('/^([A-Za-z]+)(-?[0-9]*) ?$/', $this->control, $match)) { |
250 | [, $control, $parameter] = $match; |
251 | $this->parseControl($control, $parameter); |
252 | } |
253 | |
254 | if (true === $isControl) { |
255 | $this->setControl(false); |
256 | } |
257 | } |
258 | |
259 | /** |
260 | * Flush text in queue. |
261 | */ |
262 | private function flushText(): void |
263 | { |
264 | if ($this->text != '') { |
265 | if (isset($this->flags['property'])) { // Set property |
266 | $this->flags['value'] = $this->text; |
267 | } else { // Set text |
268 | if (true === $this->flags['paragraph']) { |
269 | $this->flags['paragraph'] = false; |
270 | $this->flags['text'] = $this->text; |
271 | } |
272 | } |
273 | |
274 | // Add text if it's not flagged as skipped |
275 | if (!isset($this->flags['skipped'])) { |
276 | $this->readText(); |
277 | } |
278 | |
279 | $this->text = ''; |
280 | } |
281 | } |
282 | |
283 | /** |
284 | * Reset control word and first char state. |
285 | * |
286 | * @param bool $value |
287 | */ |
288 | private function setControl($value): void |
289 | { |
290 | $this->isControl = $value; |
291 | $this->isFirst = $value; |
292 | } |
293 | |
294 | /** |
295 | * Push text into queue. |
296 | * |
297 | * @param string $char |
298 | */ |
299 | private function pushText($char): void |
300 | { |
301 | if ('<' == $char) { |
302 | $this->text .= '<'; |
303 | } elseif ('>' == $char) { |
304 | $this->text .= '>'; |
305 | } else { |
306 | $this->text .= $char; |
307 | } |
308 | } |
309 | |
310 | /** |
311 | * Parse control. |
312 | * |
313 | * @param string $control |
314 | * @param string $parameter |
315 | */ |
316 | private function parseControl($control, $parameter): void |
317 | { |
318 | $controls = [ |
319 | 'par' => [self::PARA, 'paragraph', true], |
320 | 'b' => [self::STYL, 'font', 'bold', true], |
321 | 'i' => [self::STYL, 'font', 'italic', true], |
322 | 'u' => [self::STYL, 'font', 'underline', true], |
323 | 'strike' => [self::STYL, 'font', 'strikethrough', true], |
324 | 'fs' => [self::STYL, 'font', 'size', $parameter], |
325 | 'qc' => [self::STYL, 'paragraph', 'alignment', Jc::CENTER], |
326 | 'sa' => [self::STYL, 'paragraph', 'spaceAfter', $parameter], |
327 | 'fonttbl' => [self::SKIP, 'fonttbl', null], |
328 | 'colortbl' => [self::SKIP, 'colortbl', null], |
329 | 'info' => [self::SKIP, 'info', null], |
330 | 'generator' => [self::SKIP, 'generator', null], |
331 | 'title' => [self::SKIP, 'title', null], |
332 | 'subject' => [self::SKIP, 'subject', null], |
333 | 'category' => [self::SKIP, 'category', null], |
334 | 'keywords' => [self::SKIP, 'keywords', null], |
335 | 'comment' => [self::SKIP, 'comment', null], |
336 | 'shppict' => [self::SKIP, 'pic', null], |
337 | 'fldinst' => [self::SKIP, 'link', null], |
338 | ]; |
339 | |
340 | if (isset($controls[$control])) { |
341 | [$function] = $controls[$control]; |
342 | if (method_exists($this, $function)) { |
343 | $directives = $controls[$control]; |
344 | array_shift($directives); // remove the function variable; we won't need it |
345 | $this->$function($directives); |
346 | } |
347 | } |
348 | } |
349 | |
350 | /** |
351 | * Read paragraph. |
352 | * |
353 | * @param array $directives |
354 | */ |
355 | private function readParagraph($directives): void |
356 | { |
357 | [$property, $value] = $directives; |
358 | $this->textrun = $this->section->addTextRun(); |
359 | $this->flags[$property] = $value; |
360 | } |
361 | |
362 | /** |
363 | * Read style. |
364 | * |
365 | * @param array $directives |
366 | */ |
367 | private function readStyle($directives): void |
368 | { |
369 | [$style, $property, $value] = $directives; |
370 | $this->flags['styles'][$style][$property] = $value; |
371 | } |
372 | |
373 | /** |
374 | * Read skip. |
375 | * |
376 | * @param array $directives |
377 | */ |
378 | private function readSkip($directives): void |
379 | { |
380 | [$property] = $directives; |
381 | $this->flags['property'] = $property; |
382 | $this->flags['skipped'] = true; |
383 | } |
384 | |
385 | /** |
386 | * Read text. |
387 | */ |
388 | private function readText(): void |
389 | { |
390 | $text = $this->textrun->addText($this->text); |
391 | if (isset($this->flags['styles']['font'])) { |
392 | $text->getFontStyle()->setStyleByArray($this->flags['styles']['font']); |
393 | } |
394 | } |
395 | } |