Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.06% covered (success)
98.06%
101 / 103
93.33% covered (success)
93.33%
14 / 15
CRAP
0.00% covered (danger)
0.00%
0 / 1
Document
98.06% covered (success)
98.06%
101 / 103
93.33% covered (success)
93.33%
14 / 15
35
0.00% covered (danger)
0.00%
0 / 1
 read
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
1 / 1
7
 markOpening
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 markClosing
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 markBackslash
66.67% covered (warning)
66.67%
4 / 6
0.00% covered (danger)
0.00%
0 / 1
2.15
 markNewline
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
 flush
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 flushControl
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 flushText
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
5
 setControl
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 pushText
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 parseControl
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
1 / 1
3
 readParagraph
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 readStyle
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 readSkip
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 readText
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3/**
4 * This file is part of PHPWord - A pure PHP library for reading and writing
5 * word processing documents.
6 *
7 * PHPWord is free software distributed under the terms of the GNU Lesser
8 * General Public License version 3 as published by the Free Software Foundation.
9 *
10 * For the full copyright and license information, please read the LICENSE
11 * file that was distributed with this source code. For the full list of
12 * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
13 *
14 * @see         https://github.com/PHPOffice/PHPWord
15 *
16 * @license     http://www.gnu.org/licenses/lgpl.txt LGPL version 3
17 */
18
19namespace PhpOffice\PhpWord\Reader\RTF;
20
21use PhpOffice\PhpWord\PhpWord;
22use PhpOffice\PhpWord\SimpleType\Jc;
23
24/**
25 * RTF document reader.
26 *
27 * References:
28 * - How to Write an RTF Reader http://latex2rtf.sourceforge.net/rtfspec_45.html
29 * - PHP rtfclass by Markus Fischer https://github.com/mfn/rtfclass
30 * - JavaScript RTF-parser by LazyGyu https://github.com/lazygyu/RTF-parser
31 *
32 * @since 0.11.0
33 *
34 * @SuppressWarnings(PHPMD.UnusedPrivateMethod)
35 */
36class Document
37{
38    /** @const int */
39    const PARA = 'readParagraph';
40    const STYL = 'readStyle';
41    const SKIP = 'readSkip';
42
43    /**
44     * PhpWord object.
45     *
46     * @var PhpWord
47     */
48    private $phpWord;
49
50    /**
51     * Section object.
52     *
53     * @var \PhpOffice\PhpWord\Element\Section
54     */
55    private $section;
56
57    /**
58     * Textrun object.
59     *
60     * @var \PhpOffice\PhpWord\Element\TextRun
61     */
62    private $textrun;
63
64    /**
65     * RTF content.
66     *
67     * @var string
68     */
69    public $rtf;
70
71    /**
72     * Content length.
73     *
74     * @var int
75     */
76    private $length = 0;
77
78    /**
79     * Character index.
80     *
81     * @var int
82     */
83    private $offset = 0;
84
85    /**
86     * Current control word.
87     *
88     * @var string
89     */
90    private $control = '';
91
92    /**
93     * Text content.
94     *
95     * @var string
96     */
97    private $text = '';
98
99    /**
100     * Parsing a control word flag.
101     *
102     * @var bool
103     */
104    private $isControl = false;
105
106    /**
107     * First character flag: watch out for control symbols.
108     *
109     * @var bool
110     */
111    private $isFirst = false;
112
113    /**
114     * Group groups.
115     *
116     * @var array
117     */
118    private $groups = [];
119
120    /**
121     * Parser flags; not used.
122     *
123     * @var array
124     */
125    private $flags = [];
126
127    /**
128     * Parse RTF content.
129     *
130     * - Marks controlling characters `{`, `}`, and `\`
131     * - Removes line endings
132     * - Builds control words and control symbols
133     * - Pushes every other character into the text queue
134     *
135     * @todo Use `fread` stream for scalability
136     */
137    public function read(PhpWord $phpWord): void
138    {
139        $markers = [
140            123 => 'markOpening',   // {
141            125 => 'markClosing',   // }
142            92 => 'markBackslash', // \
143            10 => 'markNewline',   // LF
144            13 => 'markNewline',   // CR
145        ];
146
147        $this->phpWord = $phpWord;
148        $this->section = $phpWord->addSection();
149        $this->textrun = $this->section->addTextRun();
150        $this->length = strlen($this->rtf);
151
152        $this->flags['paragraph'] = true; // Set paragraph flag from the beginning
153
154        // Walk each characters
155        while ($this->offset < $this->length) {
156            $char = $this->rtf[$this->offset];
157            $ascii = ord($char);
158
159            if (isset($markers[$ascii])) { // Marker found: {, }, \, LF, or CR
160                $markerFunction = $markers[$ascii];
161                $this->$markerFunction();
162            } else {
163                if (false === $this->isControl) { // Non control word: Push character
164                    $this->pushText($char);
165                } else {
166                    if (preg_match('/^[a-zA-Z0-9-]?$/', $char)) { // No delimiter: Buffer control
167                        $this->control .= $char;
168                        $this->isFirst = false;
169                    } else { // Delimiter found: Parse buffered control
170                        if ($this->isFirst) {
171                            $this->isFirst = false;
172                        } else {
173                            if (' ' == $char) { // Discard space as a control word delimiter
174                                $this->flushControl(true);
175                            }
176                        }
177                    }
178                }
179            }
180            ++$this->offset;
181        }
182        $this->flushText();
183    }
184
185    /**
186     * Mark opening braket `{` character.
187     */
188    private function markOpening(): void
189    {
190        $this->flush(true);
191        array_push($this->groups, $this->flags);
192    }
193
194    /**
195     * Mark closing braket `}` character.
196     */
197    private function markClosing(): void
198    {
199        $this->flush(true);
200        $this->flags = array_pop($this->groups);
201    }
202
203    /**
204     * Mark backslash `\` character.
205     */
206    private function markBackslash(): void
207    {
208        if ($this->isFirst) {
209            $this->setControl(false);
210            $this->text .= '\\';
211        } else {
212            $this->flush();
213            $this->setControl(true);
214            $this->control = '';
215        }
216    }
217
218    /**
219     * Mark newline character: Flush control word because it's not possible to span multiline.
220     */
221    private function markNewline(): void
222    {
223        if ($this->isControl) {
224            $this->flushControl(true);
225        }
226    }
227
228    /**
229     * Flush control word or text.
230     *
231     * @param bool $isControl
232     */
233    private function flush($isControl = false): void
234    {
235        if ($this->isControl) {
236            $this->flushControl($isControl);
237        } else {
238            $this->flushText();
239        }
240    }
241
242    /**
243     * Flush control word.
244     *
245     * @param bool $isControl
246     */
247    private function flushControl($isControl = false): void
248    {
249        if (1 === preg_match('/^([A-Za-z]+)(-?[0-9]*) ?$/', $this->control, $match)) {
250            [, $control, $parameter] = $match;
251            $this->parseControl($control, $parameter);
252        }
253
254        if (true === $isControl) {
255            $this->setControl(false);
256        }
257    }
258
259    /**
260     * Flush text in queue.
261     */
262    private function flushText(): void
263    {
264        if ($this->text != '') {
265            if (isset($this->flags['property'])) { // Set property
266                $this->flags['value'] = $this->text;
267            } else { // Set text
268                if (true === $this->flags['paragraph']) {
269                    $this->flags['paragraph'] = false;
270                    $this->flags['text'] = $this->text;
271                }
272            }
273
274            // Add text if it's not flagged as skipped
275            if (!isset($this->flags['skipped'])) {
276                $this->readText();
277            }
278
279            $this->text = '';
280        }
281    }
282
283    /**
284     * Reset control word and first char state.
285     *
286     * @param bool $value
287     */
288    private function setControl($value): void
289    {
290        $this->isControl = $value;
291        $this->isFirst = $value;
292    }
293
294    /**
295     * Push text into queue.
296     *
297     * @param string $char
298     */
299    private function pushText($char): void
300    {
301        if ('<' == $char) {
302            $this->text .= '&lt;';
303        } elseif ('>' == $char) {
304            $this->text .= '&gt;';
305        } else {
306            $this->text .= $char;
307        }
308    }
309
310    /**
311     * Parse control.
312     *
313     * @param string $control
314     * @param string $parameter
315     */
316    private function parseControl($control, $parameter): void
317    {
318        $controls = [
319            'par' => [self::PARA,    'paragraph',    true],
320            'b' => [self::STYL,    'font',         'bold',          true],
321            'i' => [self::STYL,    'font',         'italic',        true],
322            'u' => [self::STYL,    'font',         'underline',     true],
323            'strike' => [self::STYL,    'font',         'strikethrough', true],
324            'fs' => [self::STYL,    'font',         'size',          $parameter],
325            'qc' => [self::STYL,    'paragraph',    'alignment',     Jc::CENTER],
326            'sa' => [self::STYL,    'paragraph',    'spaceAfter',    $parameter],
327            'fonttbl' => [self::SKIP,    'fonttbl',      null],
328            'colortbl' => [self::SKIP,    'colortbl',     null],
329            'info' => [self::SKIP,    'info',         null],
330            'generator' => [self::SKIP,    'generator',    null],
331            'title' => [self::SKIP,    'title',        null],
332            'subject' => [self::SKIP,    'subject',      null],
333            'category' => [self::SKIP,    'category',     null],
334            'keywords' => [self::SKIP,    'keywords',     null],
335            'comment' => [self::SKIP,    'comment',      null],
336            'shppict' => [self::SKIP,    'pic',          null],
337            'fldinst' => [self::SKIP,    'link',         null],
338        ];
339
340        if (isset($controls[$control])) {
341            [$function] = $controls[$control];
342            if (method_exists($this, $function)) {
343                $directives = $controls[$control];
344                array_shift($directives); // remove the function variable; we won't need it
345                $this->$function($directives);
346            }
347        }
348    }
349
350    /**
351     * Read paragraph.
352     *
353     * @param array $directives
354     */
355    private function readParagraph($directives): void
356    {
357        [$property, $value] = $directives;
358        $this->textrun = $this->section->addTextRun();
359        $this->flags[$property] = $value;
360    }
361
362    /**
363     * Read style.
364     *
365     * @param array $directives
366     */
367    private function readStyle($directives): void
368    {
369        [$style, $property, $value] = $directives;
370        $this->flags['styles'][$style][$property] = $value;
371    }
372
373    /**
374     * Read skip.
375     *
376     * @param array $directives
377     */
378    private function readSkip($directives): void
379    {
380        [$property] = $directives;
381        $this->flags['property'] = $property;
382        $this->flags['skipped'] = true;
383    }
384
385    /**
386     * Read text.
387     */
388    private function readText(): void
389    {
390        $text = $this->textrun->addText($this->text);
391        if (isset($this->flags['styles']['font'])) {
392            $text->getFontStyle()->setStyleByArray($this->flags['styles']['font']);
393        }
394    }
395}