Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.06% covered (success)
98.06%
101 / 103
93.33% covered (success)
93.33%
14 / 15
CRAP
0.00% covered (danger)
0.00%
0 / 1
Document
98.06% covered (success)
98.06%
101 / 103
93.33% covered (success)
93.33%
14 / 15
35
0.00% covered (danger)
0.00%
0 / 1
 read
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
1 / 1
7
 markOpening
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 markClosing
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 markBackslash
66.67% covered (warning)
66.67%
4 / 6
0.00% covered (danger)
0.00%
0 / 1
2.15
 markNewline
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 flush
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 flushControl
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 flushText
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
5
 setControl
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 pushText
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 parseControl
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
1 / 1
3
 readParagraph
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 readStyle
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 readSkip
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 readText
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2/**
3 * This file is part of PHPWord - A pure PHP library for reading and writing
4 * word processing documents.
5 *
6 * PHPWord is free software distributed under the terms of the GNU Lesser
7 * General Public License version 3 as published by the Free Software Foundation.
8 *
9 * For the full copyright and license information, please read the LICENSE
10 * file that was distributed with this source code. For the full list of
11 * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12 *
13 * @see         https://github.com/PHPOffice/PHPWord
14 *
15 * @license     http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16 */
17
18namespace PhpOffice\PhpWord\Reader\RTF;
19
20use PhpOffice\PhpWord\PhpWord;
21use PhpOffice\PhpWord\SimpleType\Jc;
22
23/**
24 * RTF document reader.
25 *
26 * References:
27 * - How to Write an RTF Reader http://latex2rtf.sourceforge.net/rtfspec_45.html
28 * - PHP rtfclass by Markus Fischer https://github.com/mfn/rtfclass
29 * - JavaScript RTF-parser by LazyGyu https://github.com/lazygyu/RTF-parser
30 *
31 * @since 0.11.0
32 *
33 * @SuppressWarnings(PHPMD.UnusedPrivateMethod)
34 */
35class Document
36{
37    /** @const int */
38    const PARA = 'readParagraph';
39    const STYL = 'readStyle';
40    const SKIP = 'readSkip';
41
42    /**
43     * PhpWord object.
44     *
45     * @var \PhpOffice\PhpWord\PhpWord
46     */
47    private $phpWord;
48
49    /**
50     * Section object.
51     *
52     * @var \PhpOffice\PhpWord\Element\Section
53     */
54    private $section;
55
56    /**
57     * Textrun object.
58     *
59     * @var \PhpOffice\PhpWord\Element\TextRun
60     */
61    private $textrun;
62
63    /**
64     * RTF content.
65     *
66     * @var string
67     */
68    public $rtf;
69
70    /**
71     * Content length.
72     *
73     * @var int
74     */
75    private $length = 0;
76
77    /**
78     * Character index.
79     *
80     * @var int
81     */
82    private $offset = 0;
83
84    /**
85     * Current control word.
86     *
87     * @var string
88     */
89    private $control = '';
90
91    /**
92     * Text content.
93     *
94     * @var string
95     */
96    private $text = '';
97
98    /**
99     * Parsing a control word flag.
100     *
101     * @var bool
102     */
103    private $isControl = false;
104
105    /**
106     * First character flag: watch out for control symbols.
107     *
108     * @var bool
109     */
110    private $isFirst = false;
111
112    /**
113     * Group groups.
114     *
115     * @var array
116     */
117    private $groups = [];
118
119    /**
120     * Parser flags; not used.
121     *
122     * @var array
123     */
124    private $flags = [];
125
126    /**
127     * Parse RTF content.
128     *
129     * - Marks controlling characters `{`, `}`, and `\`
130     * - Removes line endings
131     * - Builds control words and control symbols
132     * - Pushes every other character into the text queue
133     *
134     * @todo Use `fread` stream for scalability
135     */
136    public function read(PhpWord $phpWord): void
137    {
138        $markers = [
139            123 => 'markOpening',   // {
140            125 => 'markClosing',   // }
141            92 => 'markBackslash', // \
142            10 => 'markNewline',   // LF
143            13 => 'markNewline',   // CR
144        ];
145
146        $this->phpWord = $phpWord;
147        $this->section = $phpWord->addSection();
148        $this->textrun = $this->section->addTextRun();
149        $this->length = strlen($this->rtf);
150
151        $this->flags['paragraph'] = true; // Set paragraph flag from the beginning
152
153        // Walk each characters
154        while ($this->offset < $this->length) {
155            $char = $this->rtf[$this->offset];
156            $ascii = ord($char);
157
158            if (isset($markers[$ascii])) { // Marker found: {, }, \, LF, or CR
159                $markerFunction = $markers[$ascii];
160                $this->$markerFunction();
161            } else {
162                if (false === $this->isControl) { // Non control word: Push character
163                    $this->pushText($char);
164                } else {
165                    if (preg_match('/^[a-zA-Z0-9-]?$/', $char)) { // No delimiter: Buffer control
166                        $this->control .= $char;
167                        $this->isFirst = false;
168                    } else { // Delimiter found: Parse buffered control
169                        if ($this->isFirst) {
170                            $this->isFirst = false;
171                        } else {
172                            if (' ' == $char) { // Discard space as a control word delimiter
173                                $this->flushControl(true);
174                            }
175                        }
176                    }
177                }
178            }
179            ++$this->offset;
180        }
181        $this->flushText();
182    }
183
184    /**
185     * Mark opening braket `{` character.
186     */
187    private function markOpening(): void
188    {
189        $this->flush(true);
190        array_push($this->groups, $this->flags);
191    }
192
193    /**
194     * Mark closing braket `}` character.
195     */
196    private function markClosing(): void
197    {
198        $this->flush(true);
199        $this->flags = array_pop($this->groups);
200    }
201
202    /**
203     * Mark backslash `\` character.
204     */
205    private function markBackslash(): void
206    {
207        if ($this->isFirst) {
208            $this->setControl(false);
209            $this->text .= '\\';
210        } else {
211            $this->flush();
212            $this->setControl(true);
213            $this->control = '';
214        }
215    }
216
217    /**
218     * Mark newline character: Flush control word because it's not possible to span multiline.
219     */
220    private function markNewline(): void
221    {
222        if ($this->isControl) {
223            $this->flushControl(true);
224        }
225    }
226
227    /**
228     * Flush control word or text.
229     *
230     * @param bool $isControl
231     */
232    private function flush($isControl = false): void
233    {
234        if ($this->isControl) {
235            $this->flushControl($isControl);
236        } else {
237            $this->flushText();
238        }
239    }
240
241    /**
242     * Flush control word.
243     *
244     * @param bool $isControl
245     */
246    private function flushControl($isControl = false): void
247    {
248        if (1 === preg_match('/^([A-Za-z]+)(-?[0-9]*) ?$/', $this->control, $match)) {
249            [, $control, $parameter] = $match;
250            $this->parseControl($control, $parameter);
251        }
252
253        if (true === $isControl) {
254            $this->setControl(false);
255        }
256    }
257
258    /**
259     * Flush text in queue.
260     */
261    private function flushText(): void
262    {
263        if ($this->text != '') {
264            if (isset($this->flags['property'])) { // Set property
265                $this->flags['value'] = $this->text;
266            } else { // Set text
267                if (true === $this->flags['paragraph']) {
268                    $this->flags['paragraph'] = false;
269                    $this->flags['text'] = $this->text;
270                }
271            }
272
273            // Add text if it's not flagged as skipped
274            if (!isset($this->flags['skipped'])) {
275                $this->readText();
276            }
277
278            $this->text = '';
279        }
280    }
281
282    /**
283     * Reset control word and first char state.
284     *
285     * @param bool $value
286     */
287    private function setControl($value): void
288    {
289        $this->isControl = $value;
290        $this->isFirst = $value;
291    }
292
293    /**
294     * Push text into queue.
295     *
296     * @param string $char
297     */
298    private function pushText($char): void
299    {
300        if ('<' == $char) {
301            $this->text .= '&lt;';
302        } elseif ('>' == $char) {
303            $this->text .= '&gt;';
304        } else {
305            $this->text .= $char;
306        }
307    }
308
309    /**
310     * Parse control.
311     *
312     * @param string $control
313     * @param string $parameter
314     */
315    private function parseControl($control, $parameter): void
316    {
317        $controls = [
318            'par' => [self::PARA,    'paragraph',    true],
319            'b' => [self::STYL,    'font',         'bold',          true],
320            'i' => [self::STYL,    'font',         'italic',        true],
321            'u' => [self::STYL,    'font',         'underline',     true],
322            'strike' => [self::STYL,    'font',         'strikethrough', true],
323            'fs' => [self::STYL,    'font',         'size',          $parameter],
324            'qc' => [self::STYL,    'paragraph',    'alignment',     Jc::CENTER],
325            'sa' => [self::STYL,    'paragraph',    'spaceAfter',    $parameter],
326            'fonttbl' => [self::SKIP,    'fonttbl',      null],
327            'colortbl' => [self::SKIP,    'colortbl',     null],
328            'info' => [self::SKIP,    'info',         null],
329            'generator' => [self::SKIP,    'generator',    null],
330            'title' => [self::SKIP,    'title',        null],
331            'subject' => [self::SKIP,    'subject',      null],
332            'category' => [self::SKIP,    'category',     null],
333            'keywords' => [self::SKIP,    'keywords',     null],
334            'comment' => [self::SKIP,    'comment',      null],
335            'shppict' => [self::SKIP,    'pic',          null],
336            'fldinst' => [self::SKIP,    'link',         null],
337        ];
338
339        if (isset($controls[$control])) {
340            [$function] = $controls[$control];
341            if (method_exists($this, $function)) {
342                $directives = $controls[$control];
343                array_shift($directives); // remove the function variable; we won't need it
344                $this->$function($directives);
345            }
346        }
347    }
348
349    /**
350     * Read paragraph.
351     *
352     * @param array $directives
353     */
354    private function readParagraph($directives): void
355    {
356        [$property, $value] = $directives;
357        $this->textrun = $this->section->addTextRun();
358        $this->flags[$property] = $value;
359    }
360
361    /**
362     * Read style.
363     *
364     * @param array $directives
365     */
366    private function readStyle($directives): void
367    {
368        [$style, $property, $value] = $directives;
369        $this->flags['styles'][$style][$property] = $value;
370    }
371
372    /**
373     * Read skip.
374     *
375     * @param array $directives
376     */
377    private function readSkip($directives): void
378    {
379        [$property] = $directives;
380        $this->flags['property'] = $property;
381        $this->flags['skipped'] = true;
382    }
383
384    /**
385     * Read text.
386     */
387    private function readText(): void
388    {
389        $text = $this->textrun->addText($this->text);
390        if (isset($this->flags['styles']['font'])) {
391            $text->getFontStyle()->setStyleByArray($this->flags['styles']['font']);
392        }
393    }
394}