Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.06% |
101 / 103 |
|
93.33% |
14 / 15 |
CRAP | |
0.00% |
0 / 1 |
Document | |
98.06% |
101 / 103 |
|
93.33% |
14 / 15 |
35 | |
0.00% |
0 / 1 |
read | |
100.00% |
29 / 29 |
|
100.00% |
1 / 1 |
7 | |||
markOpening | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
markClosing | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
markBackslash | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
markNewline | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
flush | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
flushControl | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
flushText | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
setControl | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
pushText | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
parseControl | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
3 | |||
readParagraph | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
readStyle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
readSkip | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
readText | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This file is part of PHPWord - A pure PHP library for reading and writing |
4 | * word processing documents. |
5 | * |
6 | * PHPWord is free software distributed under the terms of the GNU Lesser |
7 | * General Public License version 3 as published by the Free Software Foundation. |
8 | * |
9 | * For the full copyright and license information, please read the LICENSE |
10 | * file that was distributed with this source code. For the full list of |
11 | * contributors, visit https://github.com/PHPOffice/PHPWord/contributors. |
12 | * |
13 | * @see https://github.com/PHPOffice/PHPWord |
14 | * |
15 | * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3 |
16 | */ |
17 | |
18 | namespace PhpOffice\PhpWord\Reader\RTF; |
19 | |
20 | use PhpOffice\PhpWord\PhpWord; |
21 | use PhpOffice\PhpWord\SimpleType\Jc; |
22 | |
23 | /** |
24 | * RTF document reader. |
25 | * |
26 | * References: |
27 | * - How to Write an RTF Reader http://latex2rtf.sourceforge.net/rtfspec_45.html |
28 | * - PHP rtfclass by Markus Fischer https://github.com/mfn/rtfclass |
29 | * - JavaScript RTF-parser by LazyGyu https://github.com/lazygyu/RTF-parser |
30 | * |
31 | * @since 0.11.0 |
32 | * |
33 | * @SuppressWarnings(PHPMD.UnusedPrivateMethod) |
34 | */ |
35 | class Document |
36 | { |
37 | /** @const int */ |
38 | const PARA = 'readParagraph'; |
39 | const STYL = 'readStyle'; |
40 | const SKIP = 'readSkip'; |
41 | |
42 | /** |
43 | * PhpWord object. |
44 | * |
45 | * @var \PhpOffice\PhpWord\PhpWord |
46 | */ |
47 | private $phpWord; |
48 | |
49 | /** |
50 | * Section object. |
51 | * |
52 | * @var \PhpOffice\PhpWord\Element\Section |
53 | */ |
54 | private $section; |
55 | |
56 | /** |
57 | * Textrun object. |
58 | * |
59 | * @var \PhpOffice\PhpWord\Element\TextRun |
60 | */ |
61 | private $textrun; |
62 | |
63 | /** |
64 | * RTF content. |
65 | * |
66 | * @var string |
67 | */ |
68 | public $rtf; |
69 | |
70 | /** |
71 | * Content length. |
72 | * |
73 | * @var int |
74 | */ |
75 | private $length = 0; |
76 | |
77 | /** |
78 | * Character index. |
79 | * |
80 | * @var int |
81 | */ |
82 | private $offset = 0; |
83 | |
84 | /** |
85 | * Current control word. |
86 | * |
87 | * @var string |
88 | */ |
89 | private $control = ''; |
90 | |
91 | /** |
92 | * Text content. |
93 | * |
94 | * @var string |
95 | */ |
96 | private $text = ''; |
97 | |
98 | /** |
99 | * Parsing a control word flag. |
100 | * |
101 | * @var bool |
102 | */ |
103 | private $isControl = false; |
104 | |
105 | /** |
106 | * First character flag: watch out for control symbols. |
107 | * |
108 | * @var bool |
109 | */ |
110 | private $isFirst = false; |
111 | |
112 | /** |
113 | * Group groups. |
114 | * |
115 | * @var array |
116 | */ |
117 | private $groups = []; |
118 | |
119 | /** |
120 | * Parser flags; not used. |
121 | * |
122 | * @var array |
123 | */ |
124 | private $flags = []; |
125 | |
126 | /** |
127 | * Parse RTF content. |
128 | * |
129 | * - Marks controlling characters `{`, `}`, and `\` |
130 | * - Removes line endings |
131 | * - Builds control words and control symbols |
132 | * - Pushes every other character into the text queue |
133 | * |
134 | * @todo Use `fread` stream for scalability |
135 | */ |
136 | public function read(PhpWord $phpWord): void |
137 | { |
138 | $markers = [ |
139 | 123 => 'markOpening', // { |
140 | 125 => 'markClosing', // } |
141 | 92 => 'markBackslash', // \ |
142 | 10 => 'markNewline', // LF |
143 | 13 => 'markNewline', // CR |
144 | ]; |
145 | |
146 | $this->phpWord = $phpWord; |
147 | $this->section = $phpWord->addSection(); |
148 | $this->textrun = $this->section->addTextRun(); |
149 | $this->length = strlen($this->rtf); |
150 | |
151 | $this->flags['paragraph'] = true; // Set paragraph flag from the beginning |
152 | |
153 | // Walk each characters |
154 | while ($this->offset < $this->length) { |
155 | $char = $this->rtf[$this->offset]; |
156 | $ascii = ord($char); |
157 | |
158 | if (isset($markers[$ascii])) { // Marker found: {, }, \, LF, or CR |
159 | $markerFunction = $markers[$ascii]; |
160 | $this->$markerFunction(); |
161 | } else { |
162 | if (false === $this->isControl) { // Non control word: Push character |
163 | $this->pushText($char); |
164 | } else { |
165 | if (preg_match('/^[a-zA-Z0-9-]?$/', $char)) { // No delimiter: Buffer control |
166 | $this->control .= $char; |
167 | $this->isFirst = false; |
168 | } else { // Delimiter found: Parse buffered control |
169 | if ($this->isFirst) { |
170 | $this->isFirst = false; |
171 | } else { |
172 | if (' ' == $char) { // Discard space as a control word delimiter |
173 | $this->flushControl(true); |
174 | } |
175 | } |
176 | } |
177 | } |
178 | } |
179 | ++$this->offset; |
180 | } |
181 | $this->flushText(); |
182 | } |
183 | |
184 | /** |
185 | * Mark opening braket `{` character. |
186 | */ |
187 | private function markOpening(): void |
188 | { |
189 | $this->flush(true); |
190 | array_push($this->groups, $this->flags); |
191 | } |
192 | |
193 | /** |
194 | * Mark closing braket `}` character. |
195 | */ |
196 | private function markClosing(): void |
197 | { |
198 | $this->flush(true); |
199 | $this->flags = array_pop($this->groups); |
200 | } |
201 | |
202 | /** |
203 | * Mark backslash `\` character. |
204 | */ |
205 | private function markBackslash(): void |
206 | { |
207 | if ($this->isFirst) { |
208 | $this->setControl(false); |
209 | $this->text .= '\\'; |
210 | } else { |
211 | $this->flush(); |
212 | $this->setControl(true); |
213 | $this->control = ''; |
214 | } |
215 | } |
216 | |
217 | /** |
218 | * Mark newline character: Flush control word because it's not possible to span multiline. |
219 | */ |
220 | private function markNewline(): void |
221 | { |
222 | if ($this->isControl) { |
223 | $this->flushControl(true); |
224 | } |
225 | } |
226 | |
227 | /** |
228 | * Flush control word or text. |
229 | * |
230 | * @param bool $isControl |
231 | */ |
232 | private function flush($isControl = false): void |
233 | { |
234 | if ($this->isControl) { |
235 | $this->flushControl($isControl); |
236 | } else { |
237 | $this->flushText(); |
238 | } |
239 | } |
240 | |
241 | /** |
242 | * Flush control word. |
243 | * |
244 | * @param bool $isControl |
245 | */ |
246 | private function flushControl($isControl = false): void |
247 | { |
248 | if (1 === preg_match('/^([A-Za-z]+)(-?[0-9]*) ?$/', $this->control, $match)) { |
249 | [, $control, $parameter] = $match; |
250 | $this->parseControl($control, $parameter); |
251 | } |
252 | |
253 | if (true === $isControl) { |
254 | $this->setControl(false); |
255 | } |
256 | } |
257 | |
258 | /** |
259 | * Flush text in queue. |
260 | */ |
261 | private function flushText(): void |
262 | { |
263 | if ($this->text != '') { |
264 | if (isset($this->flags['property'])) { // Set property |
265 | $this->flags['value'] = $this->text; |
266 | } else { // Set text |
267 | if (true === $this->flags['paragraph']) { |
268 | $this->flags['paragraph'] = false; |
269 | $this->flags['text'] = $this->text; |
270 | } |
271 | } |
272 | |
273 | // Add text if it's not flagged as skipped |
274 | if (!isset($this->flags['skipped'])) { |
275 | $this->readText(); |
276 | } |
277 | |
278 | $this->text = ''; |
279 | } |
280 | } |
281 | |
282 | /** |
283 | * Reset control word and first char state. |
284 | * |
285 | * @param bool $value |
286 | */ |
287 | private function setControl($value): void |
288 | { |
289 | $this->isControl = $value; |
290 | $this->isFirst = $value; |
291 | } |
292 | |
293 | /** |
294 | * Push text into queue. |
295 | * |
296 | * @param string $char |
297 | */ |
298 | private function pushText($char): void |
299 | { |
300 | if ('<' == $char) { |
301 | $this->text .= '<'; |
302 | } elseif ('>' == $char) { |
303 | $this->text .= '>'; |
304 | } else { |
305 | $this->text .= $char; |
306 | } |
307 | } |
308 | |
309 | /** |
310 | * Parse control. |
311 | * |
312 | * @param string $control |
313 | * @param string $parameter |
314 | */ |
315 | private function parseControl($control, $parameter): void |
316 | { |
317 | $controls = [ |
318 | 'par' => [self::PARA, 'paragraph', true], |
319 | 'b' => [self::STYL, 'font', 'bold', true], |
320 | 'i' => [self::STYL, 'font', 'italic', true], |
321 | 'u' => [self::STYL, 'font', 'underline', true], |
322 | 'strike' => [self::STYL, 'font', 'strikethrough', true], |
323 | 'fs' => [self::STYL, 'font', 'size', $parameter], |
324 | 'qc' => [self::STYL, 'paragraph', 'alignment', Jc::CENTER], |
325 | 'sa' => [self::STYL, 'paragraph', 'spaceAfter', $parameter], |
326 | 'fonttbl' => [self::SKIP, 'fonttbl', null], |
327 | 'colortbl' => [self::SKIP, 'colortbl', null], |
328 | 'info' => [self::SKIP, 'info', null], |
329 | 'generator' => [self::SKIP, 'generator', null], |
330 | 'title' => [self::SKIP, 'title', null], |
331 | 'subject' => [self::SKIP, 'subject', null], |
332 | 'category' => [self::SKIP, 'category', null], |
333 | 'keywords' => [self::SKIP, 'keywords', null], |
334 | 'comment' => [self::SKIP, 'comment', null], |
335 | 'shppict' => [self::SKIP, 'pic', null], |
336 | 'fldinst' => [self::SKIP, 'link', null], |
337 | ]; |
338 | |
339 | if (isset($controls[$control])) { |
340 | [$function] = $controls[$control]; |
341 | if (method_exists($this, $function)) { |
342 | $directives = $controls[$control]; |
343 | array_shift($directives); // remove the function variable; we won't need it |
344 | $this->$function($directives); |
345 | } |
346 | } |
347 | } |
348 | |
349 | /** |
350 | * Read paragraph. |
351 | * |
352 | * @param array $directives |
353 | */ |
354 | private function readParagraph($directives): void |
355 | { |
356 | [$property, $value] = $directives; |
357 | $this->textrun = $this->section->addTextRun(); |
358 | $this->flags[$property] = $value; |
359 | } |
360 | |
361 | /** |
362 | * Read style. |
363 | * |
364 | * @param array $directives |
365 | */ |
366 | private function readStyle($directives): void |
367 | { |
368 | [$style, $property, $value] = $directives; |
369 | $this->flags['styles'][$style][$property] = $value; |
370 | } |
371 | |
372 | /** |
373 | * Read skip. |
374 | * |
375 | * @param array $directives |
376 | */ |
377 | private function readSkip($directives): void |
378 | { |
379 | [$property] = $directives; |
380 | $this->flags['property'] = $property; |
381 | $this->flags['skipped'] = true; |
382 | } |
383 | |
384 | /** |
385 | * Read text. |
386 | */ |
387 | private function readText(): void |
388 | { |
389 | $text = $this->textrun->addText($this->text); |
390 | if (isset($this->flags['styles']['font'])) { |
391 | $text->getFontStyle()->setStyleByArray($this->flags['styles']['font']); |
392 | } |
393 | } |
394 | } |