Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.33% covered (success)
97.33%
474 / 487
75.00% covered (warning)
75.00%
21 / 28
CRAP
0.00% covered (danger)
0.00%
0 / 1
AbstractPart
97.33% covered (success)
97.33%
474 / 487
75.00% covered (warning)
75.00%
21 / 28
168
0.00% covered (danger)
0.00%
0 / 1
 read
n/a
0 / 0
n/a
0 / 0
0
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setRels
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setImageLoading
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 hasImageLoading
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getCommentReferences
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setCommentReferences
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setCommentReference
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
3.01
 getCommentReference
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 readParagraph
98.82% covered (success)
98.82%
84 / 85
0.00% covered (danger)
0.00%
0 / 1
32
 readFormField
95.24% covered (success)
95.24%
60 / 63
0.00% covered (danger)
0.00%
0 / 1
27
 getHeadingDepth
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
5
 readRun
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
7
 readRunChild
94.67% covered (success)
94.67%
71 / 75
0.00% covered (danger)
0.00%
0 / 1
28.12
 readRubyProperties
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
1
 readTable
100.00% covered (success)
100.00%
32 / 32
100.00% covered (success)
100.00%
1 / 1
12
 readParagraphStyle
100.00% covered (success)
100.00%
35 / 35
100.00% covered (success)
100.00%
1 / 1
2
 readFontStyle
93.10% covered (success)
93.10%
27 / 29
0.00% covered (danger)
0.00%
0 / 1
4.01
 readTableStyle
96.30% covered (success)
96.30%
26 / 27
0.00% covered (danger)
0.00%
0 / 1
7
 readTablePosition
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
1
 readTableIndent
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 readCellStyle
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
3
 findPossibleElement
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 findPossibleAttribute
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 readStyleDefs
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
6
 readStyleDef
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
5
 isOn
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
4
 getMediaTarget
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getTargetMode
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3/**
4 * This file is part of PHPWord - A pure PHP library for reading and writing
5 * word processing documents.
6 *
7 * PHPWord is free software distributed under the terms of the GNU Lesser
8 * General Public License version 3 as published by the Free Software Foundation.
9 *
10 * For the full copyright and license information, please read the LICENSE
11 * file that was distributed with this source code. For the full list of
12 * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
13 *
14 * @see         https://github.com/PHPOffice/PHPWord
15 *
16 * @license     http://www.gnu.org/licenses/lgpl.txt LGPL version 3
17 */
18
19namespace PhpOffice\PhpWord\Reader\Word2007;
20
21use DateTime;
22use DOMElement;
23use InvalidArgumentException;
24use PhpOffice\Math\Reader\OfficeMathML;
25use PhpOffice\PhpWord\ComplexType\RubyProperties;
26use PhpOffice\PhpWord\ComplexType\TblWidth as TblWidthComplexType;
27use PhpOffice\PhpWord\Element\AbstractContainer;
28use PhpOffice\PhpWord\Element\AbstractElement;
29use PhpOffice\PhpWord\Element\FormField;
30use PhpOffice\PhpWord\Element\Ruby;
31use PhpOffice\PhpWord\Element\Text;
32use PhpOffice\PhpWord\Element\TextRun;
33use PhpOffice\PhpWord\Element\TrackChange;
34use PhpOffice\PhpWord\PhpWord;
35use PhpOffice\PhpWord\Shared\XMLReader;
36
37/**
38 * Abstract part reader.
39 *
40 * This class is inherited by ODText reader
41 *
42 * @since 0.10.0
43 */
44abstract class AbstractPart
45{
46    /**
47     * Conversion method.
48     *
49     * @const int
50     */
51    const READ_VALUE = 'attributeValue';            // Read attribute value
52    const READ_EQUAL = 'attributeEquals';           // Read `true` when attribute value equals specified value
53    const READ_TRUE = 'attributeTrue';              // Read `true` when element exists
54    const READ_FALSE = 'attributeFalse';            // Read `false` when element exists
55    const READ_SIZE = 'attributeMultiplyByTwo';     // Read special attribute value for Font::$size
56
57    /**
58     * Document file.
59     *
60     * @var string
61     */
62    protected $docFile;
63
64    /**
65     * XML file.
66     *
67     * @var string
68     */
69    protected $xmlFile;
70
71    /**
72     * Part relationships.
73     *
74     * @var array
75     */
76    protected $rels = [];
77
78    /**
79     * Comment references.
80     *
81     * @var array<string, array<string, AbstractElement>>
82     */
83    protected $commentRefs = [];
84
85    /**
86     * Image Loading.
87     *
88     * @var bool
89     */
90    protected $imageLoading = true;
91
92    /**
93     * Read part.
94     */
95    abstract public function read(PhpWord $phpWord);
96
97    /**
98     * Create new instance.
99     *
100     * @param string $docFile
101     * @param string $xmlFile
102     */
103    public function __construct($docFile, $xmlFile)
104    {
105        $this->docFile = $docFile;
106        $this->xmlFile = $xmlFile;
107    }
108
109    /**
110     * Set relationships.
111     *
112     * @param array $value
113     */
114    public function setRels($value): void
115    {
116        $this->rels = $value;
117    }
118
119    public function setImageLoading(bool $value): self
120    {
121        $this->imageLoading = $value;
122
123        return $this;
124    }
125
126    public function hasImageLoading(): bool
127    {
128        return $this->imageLoading;
129    }
130
131    /**
132     * Get comment references.
133     *
134     * @return array<string, array<string, null|AbstractElement>>
135     */
136    public function getCommentReferences(): array
137    {
138        return $this->commentRefs;
139    }
140
141    /**
142     * Set comment references.
143     *
144     * @param array<string, array<string, null|AbstractElement>> $commentRefs
145     */
146    public function setCommentReferences(array $commentRefs): self
147    {
148        $this->commentRefs = $commentRefs;
149
150        return $this;
151    }
152
153    /**
154     * Set comment reference.
155     */
156    private function setCommentReference(string $type, string $id, AbstractElement $element): self
157    {
158        if (!in_array($type, ['start', 'end'])) {
159            throw new InvalidArgumentException('Type must be "start" or "end"');
160        }
161
162        if (!array_key_exists($id, $this->commentRefs)) {
163            $this->commentRefs[$id] = [
164                'start' => null,
165                'end' => null,
166            ];
167        }
168        $this->commentRefs[$id][$type] = $element;
169
170        return $this;
171    }
172
173    /**
174     * Get comment reference.
175     *
176     * @return array<string, null|AbstractElement>
177     */
178    protected function getCommentReference(string $id): array
179    {
180        if (!array_key_exists($id, $this->commentRefs)) {
181            throw new InvalidArgumentException(sprintf('Comment with id %s isn\'t referenced in document', $id));
182        }
183
184        return $this->commentRefs[$id];
185    }
186
187    /**
188     * Read w:p.
189     *
190     * @param AbstractContainer $parent
191     * @param string $docPart
192     *
193     * @todo Get font style for preserve text
194     */
195    protected function readParagraph(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart = 'document'): void
196    {
197        // Paragraph style
198        $paragraphStyle = $xmlReader->elementExists('w:pPr', $domNode) ? $this->readParagraphStyle($xmlReader, $domNode) : null;
199
200        if ($xmlReader->elementExists('w:r/w:fldChar/w:ffData', $domNode)) {
201            // FormField
202            $partOfFormField = false;
203            $formNodes = [];
204            $formType = null;
205            $textRunContainers = $xmlReader->countElements('w:r|w:ins|w:del|w:hyperlink|w:smartTag', $domNode);
206            if ($textRunContainers > 0) {
207                $nodes = $xmlReader->getElements('*', $domNode);
208                $paragraph = $parent->addTextRun($paragraphStyle);
209                foreach ($nodes as $node) {
210                    if ($xmlReader->elementExists('w:fldChar/w:ffData', $node)) {
211                        $partOfFormField = true;
212                        $formNodes[] = $node;
213                        if ($xmlReader->elementExists('w:fldChar/w:ffData/w:ddList', $node)) {
214                            $formType = 'dropdown';
215                        } elseif ($xmlReader->elementExists('w:fldChar/w:ffData/w:textInput', $node)) {
216                            $formType = 'textinput';
217                        } elseif ($xmlReader->elementExists('w:fldChar/w:ffData/w:checkBox', $node)) {
218                            $formType = 'checkbox';
219                        }
220                    } elseif ($partOfFormField &&
221                        $xmlReader->elementExists('w:fldChar', $node) &&
222                        'end' == $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar')
223                    ) {
224                        $formNodes[] = $node;
225                        $partOfFormField = false;
226                        // Process the form fields
227                        $this->readFormField($xmlReader, $formNodes, $paragraph, $paragraphStyle, $formType);
228                    } elseif ($partOfFormField) {
229                        $formNodes[] = $node;
230                    } else {
231                        // normal runs
232                        $this->readRun($xmlReader, $node, $paragraph, $docPart, $paragraphStyle);
233                    }
234                }
235            }
236        } elseif ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
237            // PreserveText
238            $ignoreText = false;
239            $textContent = '';
240            $fontStyle = $this->readFontStyle($xmlReader, $domNode);
241            $nodes = $xmlReader->getElements('w:r', $domNode);
242            foreach ($nodes as $node) {
243                if ($xmlReader->elementExists('w:lastRenderedPageBreak', $node)) {
244                    $parent->addPageBreak();
245                }
246                $instrText = $xmlReader->getValue('w:instrText', $node);
247                if (null !== $instrText) {
248                    $textContent .= '{' . $instrText . '}';
249                } else {
250                    if ($xmlReader->elementExists('w:fldChar', $node)) {
251                        $fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
252                        if ('begin' == $fldCharType) {
253                            $ignoreText = true;
254                        } elseif ('end' == $fldCharType) {
255                            $ignoreText = false;
256                        }
257                    }
258                    if (false === $ignoreText) {
259                        $textContent .= $xmlReader->getValue('w:t', $node);
260                    }
261                }
262            }
263            $parent->addPreserveText(htmlspecialchars($textContent, ENT_QUOTES, 'UTF-8'), $fontStyle, $paragraphStyle);
264
265            return;
266        }
267
268        // Formula
269        $xmlReader->registerNamespace('m', 'http://schemas.openxmlformats.org/officeDocument/2006/math');
270        if ($xmlReader->elementExists('m:oMath', $domNode)) {
271            $mathElement = $xmlReader->getElement('m:oMath', $domNode);
272            $mathXML = $mathElement->ownerDocument->saveXML($mathElement);
273            if (is_string($mathXML)) {
274                $reader = new OfficeMathML();
275                $math = $reader->read($mathXML);
276
277                $parent->addFormula($math);
278            }
279
280            return;
281        }
282
283        // List item
284        if ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
285            $numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
286            $levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
287            $nodes = $xmlReader->getElements('*', $domNode);
288
289            $listItemRun = $parent->addListItemRun($levelId, "PHPWordList{$numId}", $paragraphStyle);
290
291            foreach ($nodes as $node) {
292                $this->readRun($xmlReader, $node, $listItemRun, $docPart, $paragraphStyle);
293            }
294
295            return;
296        }
297
298        // Heading or Title
299        $headingDepth = $xmlReader->elementExists('w:pPr', $domNode) ? $this->getHeadingDepth($paragraphStyle) : null;
300        if ($headingDepth !== null) {
301            $textContent = null;
302            $nodes = $xmlReader->getElements('w:r|w:hyperlink', $domNode);
303            $hasRubyElement = $xmlReader->elementExists('w:r/w:ruby', $domNode);
304            if ($nodes->length === 1 && !$hasRubyElement) {
305                $textContent = htmlspecialchars($xmlReader->getValue('w:t', $nodes->item(0)), ENT_QUOTES, 'UTF-8');
306            } else {
307                $textContent = new TextRun($paragraphStyle);
308                foreach ($nodes as $node) {
309                    $this->readRun($xmlReader, $node, $textContent, $docPart, $paragraphStyle);
310                }
311            }
312            $parent->addTitle($textContent, $headingDepth);
313
314            return;
315        }
316
317        // Text and TextRun
318        $textRunContainers = $xmlReader->countElements('w:r|w:ins|w:del|w:hyperlink|w:smartTag|w:commentReference|w:commentRangeStart|w:commentRangeEnd', $domNode);
319        if (0 === $textRunContainers) {
320            $parent->addTextBreak(1, $paragraphStyle);
321        } else {
322            $nodes = $xmlReader->getElements('*', $domNode);
323            $paragraph = $parent->addTextRun($paragraphStyle);
324            foreach ($nodes as $node) {
325                $this->readRun($xmlReader, $node, $paragraph, $docPart, $paragraphStyle);
326            }
327        }
328    }
329
330    /**
331     * @param DOMElement[] $domNodes
332     * @param AbstractContainer $parent
333     * @param mixed $paragraphStyle
334     * @param string $formType
335     */
336    private function readFormField(XMLReader $xmlReader, array $domNodes, $parent, $paragraphStyle, $formType): void
337    {
338        if (!in_array($formType, ['textinput', 'checkbox', 'dropdown'])) {
339            return;
340        }
341
342        $formField = $parent->addFormField($formType, null, $paragraphStyle);
343        $ffData = $xmlReader->getElement('w:fldChar/w:ffData', $domNodes[0]);
344
345        foreach ($xmlReader->getElements('*', $ffData) as $node) {
346            /** @var DOMElement $node */
347            switch ($node->localName) {
348                case 'name':
349                    $formField->setName($node->getAttribute('w:val'));
350
351                    break;
352                case 'ddList':
353                    $listEntries = [];
354                    foreach ($xmlReader->getElements('*', $node) as $ddListNode) {
355                        switch ($ddListNode->localName) {
356                            case 'result':
357                                $formField->setValue($xmlReader->getAttribute('w:val', $ddListNode));
358
359                                break;
360                            case 'default':
361                                $formField->setDefault($xmlReader->getAttribute('w:val', $ddListNode));
362
363                                break;
364                            case 'listEntry':
365                                $listEntries[] = $xmlReader->getAttribute('w:val', $ddListNode);
366
367                                break;
368                        }
369                    }
370                    $formField->setEntries($listEntries);
371                    if (null !== $formField->getValue()) {
372                        $formField->setText($listEntries[$formField->getValue()]);
373                    }
374
375                    break;
376                case 'textInput':
377                    foreach ($xmlReader->getElements('*', $node) as $ddListNode) {
378                        switch ($ddListNode->localName) {
379                            case 'default':
380                                $formField->setDefault($xmlReader->getAttribute('w:val', $ddListNode));
381
382                                break;
383                            case 'format':
384                            case 'maxLength':
385                                break;
386                        }
387                    }
388
389                    break;
390                case 'checkBox':
391                    foreach ($xmlReader->getElements('*', $node) as $ddListNode) {
392                        switch ($ddListNode->localName) {
393                            case 'default':
394                                $formField->setDefault($xmlReader->getAttribute('w:val', $ddListNode));
395
396                                break;
397                            case 'checked':
398                                $formField->setValue($xmlReader->getAttribute('w:val', $ddListNode));
399
400                                break;
401                            case 'size':
402                            case 'sizeAuto':
403                                break;
404                        }
405                    }
406
407                    break;
408            }
409        }
410
411        if ('textinput' == $formType) {
412            $ignoreText = true;
413            $textContent = '';
414            foreach ($domNodes as $node) {
415                if ($xmlReader->elementExists('w:fldChar', $node)) {
416                    $fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
417                    if ('separate' == $fldCharType) {
418                        $ignoreText = false;
419                    } elseif ('end' == $fldCharType) {
420                        $ignoreText = true;
421                    }
422                }
423
424                if (false === $ignoreText) {
425                    $textContent .= $xmlReader->getValue('w:t', $node);
426                }
427            }
428            $formField->setValue(htmlspecialchars($textContent, ENT_QUOTES, 'UTF-8'));
429            $formField->setText(htmlspecialchars($textContent, ENT_QUOTES, 'UTF-8'));
430        }
431    }
432
433    /**
434     * Returns the depth of the Heading, returns 0 for a Title.
435     *
436     * @return null|number
437     */
438    private function getHeadingDepth(?array $paragraphStyle = null)
439    {
440        if (is_array($paragraphStyle) && isset($paragraphStyle['styleName'])) {
441            if ('Title' === $paragraphStyle['styleName']) {
442                return 0;
443            }
444
445            $headingMatches = [];
446            preg_match('/Heading(\d)/', $paragraphStyle['styleName'], $headingMatches);
447            if (!empty($headingMatches)) {
448                return $headingMatches[1];
449            }
450        }
451
452        return null;
453    }
454
455    /**
456     * Read w:r.
457     *
458     * @param AbstractContainer $parent
459     * @param string $docPart
460     * @param mixed $paragraphStyle
461     *
462     * @todo Footnote paragraph style
463     */
464    protected function readRun(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart, $paragraphStyle = null): void
465    {
466        if (in_array($domNode->nodeName, ['w:ins', 'w:del', 'w:smartTag', 'w:hyperlink', 'w:commentReference'])) {
467            $nodes = $xmlReader->getElements('*', $domNode);
468            foreach ($nodes as $node) {
469                $this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle);
470            }
471        } elseif ($domNode->nodeName == 'w:r') {
472            $fontStyle = $this->readFontStyle($xmlReader, $domNode);
473            $nodes = $xmlReader->getElements('*', $domNode);
474            foreach ($nodes as $node) {
475                $this->readRunChild($xmlReader, $node, $parent, $docPart, $paragraphStyle, $fontStyle);
476            }
477        }
478
479        if ($xmlReader->elementExists('.//*["commentReference"=local-name()]', $domNode)) {
480            $node = iterator_to_array($xmlReader->getElements('.//*["commentReference"=local-name()]', $domNode))[0];
481            $attributeIdentifier = $node->attributes->getNamedItem('id');
482            if ($attributeIdentifier) {
483                $id = $attributeIdentifier->nodeValue;
484
485                $this->setCommentReference('start', $id, $parent->getElement($parent->countElements() - 1));
486                $this->setCommentReference('end', $id, $parent->getElement($parent->countElements() - 1));
487            }
488        }
489    }
490
491    /**
492     * Parses nodes under w:r.
493     *
494     * @param string $docPart
495     * @param mixed $paragraphStyle
496     * @param mixed $fontStyle
497     */
498    protected function readRunChild(XMLReader $xmlReader, DOMElement $node, AbstractContainer $parent, $docPart, $paragraphStyle = null, $fontStyle = null): void
499    {
500        $runParent = $node->parentNode->parentNode;
501        if ($node->nodeName == 'w:footnoteReference') {
502            // Footnote
503            $wId = $xmlReader->getAttribute('w:id', $node);
504            $footnote = $parent->addFootnote();
505            $footnote->setRelationId($wId);
506        } elseif ($node->nodeName == 'w:endnoteReference') {
507            // Endnote
508            $wId = $xmlReader->getAttribute('w:id', $node);
509            $endnote = $parent->addEndnote();
510            $endnote->setRelationId($wId);
511        } elseif ($node->nodeName == 'w:pict') {
512            // Image
513            $rId = $xmlReader->getAttribute('r:id', $node, 'v:shape/v:imagedata');
514            $target = $this->getMediaTarget($docPart, $rId);
515            if ($this->hasImageLoading() && null !== $target) {
516                if ('External' == $this->getTargetMode($docPart, $rId)) {
517                    $imageSource = $target;
518                } else {
519                    $imageSource = "zip://{$this->docFile}#{$target}";
520                }
521                $parent->addImage($imageSource);
522            }
523        } elseif ($node->nodeName == 'w:drawing') {
524            // Office 2011 Image
525            $xmlReader->registerNamespace('wp', 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing');
526            $xmlReader->registerNamespace('r', 'http://schemas.openxmlformats.org/officeDocument/2006/relationships');
527            $xmlReader->registerNamespace('pic', 'http://schemas.openxmlformats.org/drawingml/2006/picture');
528            $xmlReader->registerNamespace('a', 'http://schemas.openxmlformats.org/drawingml/2006/main');
529
530            $name = $xmlReader->getAttribute('name', $node, 'wp:inline/a:graphic/a:graphicData/pic:pic/pic:nvPicPr/pic:cNvPr');
531            $embedId = $xmlReader->getAttribute('r:embed', $node, 'wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip');
532            if ($name === null && $embedId === null) { // some Converters puts images on a different path
533                $name = $xmlReader->getAttribute('name', $node, 'wp:anchor/a:graphic/a:graphicData/pic:pic/pic:nvPicPr/pic:cNvPr');
534                $embedId = $xmlReader->getAttribute('r:embed', $node, 'wp:anchor/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip');
535            }
536            $target = $this->getMediaTarget($docPart, $embedId);
537            if ($this->hasImageLoading() && null !== $target) {
538                $imageSource = "zip://{$this->docFile}#{$target}";
539                $parent->addImage($imageSource, null, false, $name);
540            }
541        } elseif ($node->nodeName == 'w:object') {
542            // Object
543            $rId = $xmlReader->getAttribute('r:id', $node, 'o:OLEObject');
544            // $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata');
545            $target = $this->getMediaTarget($docPart, $rId);
546            if (null !== $target) {
547                $textContent = "&lt;Object: {$target}>";
548                $parent->addText($textContent, $fontStyle, $paragraphStyle);
549            }
550        } elseif ($node->nodeName == 'w:br') {
551            $parent->addTextBreak();
552        } elseif ($node->nodeName == 'w:tab') {
553            $parent->addText("\t");
554        } elseif ($node->nodeName == 'mc:AlternateContent') {
555            if ($node->hasChildNodes()) {
556                // Get fallback instead of mc:Choice to make sure it is compatible
557                $fallbackElements = $node->getElementsByTagName('Fallback');
558
559                if ($fallbackElements->length) {
560                    $fallback = $fallbackElements->item(0);
561                    // TextRun
562                    $textContent = htmlspecialchars($fallback->nodeValue, ENT_QUOTES, 'UTF-8');
563
564                    $parent->addText($textContent, $fontStyle, $paragraphStyle);
565                }
566            }
567        } elseif ($node->nodeName == 'w:t' || $node->nodeName == 'w:delText') {
568            // TextRun
569            $textContent = htmlspecialchars($xmlReader->getValue('.', $node), ENT_QUOTES, 'UTF-8');
570
571            if ($runParent->nodeName == 'w:hyperlink') {
572                $rId = $xmlReader->getAttribute('r:id', $runParent);
573                $target = $this->getMediaTarget($docPart, $rId);
574                if (null !== $target) {
575                    $parent->addLink($target, $textContent, $fontStyle, $paragraphStyle);
576                } else {
577                    $parent->addText($textContent, $fontStyle, $paragraphStyle);
578                }
579            } else {
580                /** @var AbstractElement $element */
581                $element = $parent->addText($textContent, $fontStyle, $paragraphStyle);
582                if (in_array($runParent->nodeName, ['w:ins', 'w:del'])) {
583                    $type = ($runParent->nodeName == 'w:del') ? TrackChange::DELETED : TrackChange::INSERTED;
584                    $author = $runParent->getAttribute('w:author');
585                    $date = DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $runParent->getAttribute('w:date'));
586                    $date = $date instanceof DateTime ? $date : null;
587                    $element->setChangeInfo($type, $author, $date);
588                }
589            }
590        } elseif ($node->nodeName == 'w:softHyphen') {
591            $element = $parent->addText("\u{200c}", $fontStyle, $paragraphStyle);
592        } elseif ($node->nodeName == 'w:ruby') {
593            $rubyPropertiesNode = $xmlReader->getElement('w:rubyPr', $node);
594            $properties = $this->readRubyProperties($xmlReader, $rubyPropertiesNode);
595            // read base text node
596            $baseText = new TextRun($paragraphStyle);
597            $baseTextNode = $xmlReader->getElement('w:rubyBase/w:r', $node);
598            $this->readRun($xmlReader, $baseTextNode, $baseText, $docPart, $paragraphStyle);
599            // read the actual ruby text (e.g. furigana in Japanese)
600            $rubyText = new TextRun($paragraphStyle);
601            $rubyTextNode = $xmlReader->getElement('w:rt/w:r', $node);
602            $this->readRun($xmlReader, $rubyTextNode, $rubyText, $docPart, $paragraphStyle);
603            // add element to parent
604            $parent->addRuby($baseText, $rubyText, $properties);
605        }
606    }
607
608    /**
609     * Read w:rubyPr element.
610     *
611     * @param XMLReader $xmlReader reader for XML
612     * @param DOMElement $domNode w:RubyPr element
613     *
614     * @return RubyProperties ruby properties from element
615     */
616    protected function readRubyProperties(XMLReader $xmlReader, DOMElement $domNode): RubyProperties
617    {
618        $rubyAlignment = $xmlReader->getElement('w:rubyAlign', $domNode)->getAttribute('w:val');
619        $rubyHps = $xmlReader->getElement('w:hps', $domNode)->getAttribute('w:val'); // font face
620        $rubyHpsRaise = $xmlReader->getElement('w:hpsRaise', $domNode)->getAttribute('w:val'); // pts above base text
621        $rubyHpsBaseText = $xmlReader->getElement('w:hpsBaseText', $domNode)->getAttribute('w:val'); // base text size
622        $rubyLid = $xmlReader->getElement('w:lid', $domNode)->getAttribute('w:val'); // type of ruby
623        $properties = new RubyProperties();
624        $properties->setAlignment($rubyAlignment);
625        $properties->setFontFaceSize((float) $rubyHps);
626        $properties->setFontPointsAboveBaseText((float) $rubyHpsRaise);
627        $properties->setFontSizeForBaseText((float) $rubyHpsBaseText);
628        $properties->setLanguageId($rubyLid);
629
630        return $properties;
631    }
632
633    /**
634     * Read w:tbl.
635     *
636     * @param mixed $parent
637     * @param string $docPart
638     */
639    protected function readTable(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart = 'document'): void
640    {
641        // Table style
642        $tblStyle = null;
643        if ($xmlReader->elementExists('w:tblPr', $domNode)) {
644            $tblStyle = $this->readTableStyle($xmlReader, $domNode);
645        }
646
647        /** @var \PhpOffice\PhpWord\Element\Table $table Type hint */
648        $table = $parent->addTable($tblStyle);
649        $tblNodes = $xmlReader->getElements('*', $domNode);
650        foreach ($tblNodes as $tblNode) {
651            if ('w:tblGrid' == $tblNode->nodeName) { // Column
652                // @todo Do something with table columns
653            } elseif ('w:tr' == $tblNode->nodeName) { // Row
654                $rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
655                $rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
656                $rowHRule = $rowHRule == 'exact';
657                $rowStyle = [
658                    'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
659                    'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
660                    'exactHeight' => $rowHRule,
661                ];
662
663                $row = $table->addRow($rowHeight, $rowStyle);
664                $rowNodes = $xmlReader->getElements('*', $tblNode);
665                foreach ($rowNodes as $rowNode) {
666                    if ('w:trPr' == $rowNode->nodeName) { // Row style
667                        // @todo Do something with row style
668                    } elseif ('w:tc' == $rowNode->nodeName) { // Cell
669                        $cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
670                        $cellStyle = null;
671                        if ($xmlReader->elementExists('w:tcPr', $rowNode)) {
672                            $cellStyle = $this->readCellStyle($xmlReader, $rowNode);
673                        }
674
675                        $cell = $row->addCell($cellWidth, $cellStyle);
676                        $cellNodes = $xmlReader->getElements('*', $rowNode);
677                        foreach ($cellNodes as $cellNode) {
678                            if ('w:p' == $cellNode->nodeName) { // Paragraph
679                                $this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
680                            } elseif ($cellNode->nodeName == 'w:tbl') { // Table
681                                $this->readTable($xmlReader, $cellNode, $cell, $docPart);
682                            }
683                        }
684                    }
685                }
686            }
687        }
688    }
689
690    /**
691     * Read w:pPr.
692     *
693     * @return null|array
694     */
695    protected function readParagraphStyle(XMLReader $xmlReader, DOMElement $domNode)
696    {
697        if (!$xmlReader->elementExists('w:pPr', $domNode)) {
698            return null;
699        }
700
701        $styleNode = $xmlReader->getElement('w:pPr', $domNode);
702        $styleDefs = [
703            'styleName' => [self::READ_VALUE, ['w:pStyle', 'w:name']],
704            'alignment' => [self::READ_VALUE, 'w:jc'],
705            'basedOn' => [self::READ_VALUE, 'w:basedOn'],
706            'next' => [self::READ_VALUE, 'w:next'],
707            'indentLeft' => [self::READ_VALUE, 'w:ind', 'w:left'],
708            'indentRight' => [self::READ_VALUE, 'w:ind', 'w:right'],
709            'indentHanging' => [self::READ_VALUE, 'w:ind', 'w:hanging'],
710            'indentFirstLine' => [self::READ_VALUE, 'w:ind', 'w:firstLine'],
711            'spaceAfter' => [self::READ_VALUE, 'w:spacing', 'w:after'],
712            'spaceBefore' => [self::READ_VALUE, 'w:spacing', 'w:before'],
713            'widowControl' => [self::READ_FALSE, 'w:widowControl'],
714            'keepNext' => [self::READ_TRUE,  'w:keepNext'],
715            'keepLines' => [self::READ_TRUE,  'w:keepLines'],
716            'pageBreakBefore' => [self::READ_TRUE,  'w:pageBreakBefore'],
717            'contextualSpacing' => [self::READ_TRUE,  'w:contextualSpacing'],
718            'bidi' => [self::READ_TRUE,  'w:bidi'],
719            'suppressAutoHyphens' => [self::READ_TRUE,  'w:suppressAutoHyphens'],
720            'borderTopStyle' => [self::READ_VALUE, 'w:pBdr/w:top'],
721            'borderTopColor' => [self::READ_VALUE, 'w:pBdr/w:top', 'w:color'],
722            'borderTopSize' => [self::READ_VALUE, 'w:pBdr/w:top', 'w:sz'],
723            'borderRightStyle' => [self::READ_VALUE, 'w:pBdr/w:right'],
724            'borderRightColor' => [self::READ_VALUE, 'w:pBdr/w:right', 'w:color'],
725            'borderRightSize' => [self::READ_VALUE, 'w:pBdr/w:right', 'w:sz'],
726            'borderBottomStyle' => [self::READ_VALUE, 'w:pBdr/w:bottom'],
727            'borderBottomColor' => [self::READ_VALUE, 'w:pBdr/w:bottom', 'w:color'],
728            'borderBottomSize' => [self::READ_VALUE, 'w:pBdr/w:bottom', 'w:sz'],
729            'borderLeftStyle' => [self::READ_VALUE, 'w:pBdr/w:left'],
730            'borderLeftColor' => [self::READ_VALUE, 'w:pBdr/w:left', 'w:color'],
731            'borderLeftSize' => [self::READ_VALUE, 'w:pBdr/w:left', 'w:sz'],
732        ];
733
734        return $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
735    }
736
737    /**
738     * Read w:rPr.
739     *
740     * @return null|array
741     */
742    protected function readFontStyle(XMLReader $xmlReader, DOMElement $domNode)
743    {
744        if (null === $domNode) {
745            return null;
746        }
747        // Hyperlink has an extra w:r child
748        if ('w:hyperlink' == $domNode->nodeName) {
749            $domNode = $xmlReader->getElement('w:r', $domNode);
750        }
751        if (!$xmlReader->elementExists('w:rPr', $domNode)) {
752            return null;
753        }
754
755        $styleNode = $xmlReader->getElement('w:rPr', $domNode);
756        $styleDefs = [
757            'styleName' => [self::READ_VALUE, 'w:rStyle'],
758            'name' => [self::READ_VALUE, 'w:rFonts', ['w:ascii', 'w:hAnsi', 'w:eastAsia', 'w:cs']],
759            'hint' => [self::READ_VALUE, 'w:rFonts', 'w:hint'],
760            'size' => [self::READ_SIZE,  ['w:sz', 'w:szCs']],
761            'color' => [self::READ_VALUE, 'w:color'],
762            'underline' => [self::READ_VALUE, 'w:u'],
763            'bold' => [self::READ_TRUE,  'w:b'],
764            'italic' => [self::READ_TRUE,  'w:i'],
765            'strikethrough' => [self::READ_TRUE,  'w:strike'],
766            'doubleStrikethrough' => [self::READ_TRUE,  'w:dstrike'],
767            'smallCaps' => [self::READ_TRUE,  'w:smallCaps'],
768            'allCaps' => [self::READ_TRUE,  'w:caps'],
769            'superScript' => [self::READ_EQUAL, 'w:vertAlign', 'w:val', 'superscript'],
770            'subScript' => [self::READ_EQUAL, 'w:vertAlign', 'w:val', 'subscript'],
771            'fgColor' => [self::READ_VALUE, 'w:highlight'],
772            'rtl' => [self::READ_TRUE,  'w:rtl'],
773            'lang' => [self::READ_VALUE, 'w:lang'],
774            'position' => [self::READ_VALUE, 'w:position'],
775            'hidden' => [self::READ_TRUE,  'w:vanish'],
776        ];
777
778        return $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
779    }
780
781    /**
782     * Read w:tblPr.
783     *
784     * @return null|array|string
785     *
786     * @todo Capture w:tblStylePr w:type="firstRow"
787     */
788    protected function readTableStyle(XMLReader $xmlReader, DOMElement $domNode)
789    {
790        $style = null;
791        $margins = ['top', 'left', 'bottom', 'right'];
792        $borders = array_merge($margins, ['insideH', 'insideV']);
793
794        if ($xmlReader->elementExists('w:tblPr', $domNode)) {
795            if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) {
796                $style = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle');
797            } else {
798                $styleNode = $xmlReader->getElement('w:tblPr', $domNode);
799                $styleDefs = [];
800                foreach ($margins as $side) {
801                    $ucfSide = ucfirst($side);
802                    $styleDefs["cellMargin$ucfSide"] = [self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w'];
803                }
804                foreach ($borders as $side) {
805                    $ucfSide = ucfirst($side);
806                    $styleDefs["border{$ucfSide}Size"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz'];
807                    $styleDefs["border{$ucfSide}Color"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:color'];
808                    $styleDefs["border{$ucfSide}Style"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:val'];
809                }
810                $styleDefs['layout'] = [self::READ_VALUE, 'w:tblLayout', 'w:type'];
811                $styleDefs['bidiVisual'] = [self::READ_TRUE, 'w:bidiVisual'];
812                $styleDefs['cellSpacing'] = [self::READ_VALUE, 'w:tblCellSpacing', 'w:w'];
813                $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
814
815                $tablePositionNode = $xmlReader->getElement('w:tblpPr', $styleNode);
816                if ($tablePositionNode !== null) {
817                    $style['position'] = $this->readTablePosition($xmlReader, $tablePositionNode);
818                }
819
820                $indentNode = $xmlReader->getElement('w:tblInd', $styleNode);
821                if ($indentNode !== null) {
822                    $style['indent'] = $this->readTableIndent($xmlReader, $indentNode);
823                }
824            }
825        }
826
827        return $style;
828    }
829
830    /**
831     * Read w:tblpPr.
832     *
833     * @return array
834     */
835    private function readTablePosition(XMLReader $xmlReader, DOMElement $domNode)
836    {
837        $styleDefs = [
838            'leftFromText' => [self::READ_VALUE, '.', 'w:leftFromText'],
839            'rightFromText' => [self::READ_VALUE, '.', 'w:rightFromText'],
840            'topFromText' => [self::READ_VALUE, '.', 'w:topFromText'],
841            'bottomFromText' => [self::READ_VALUE, '.', 'w:bottomFromText'],
842            'vertAnchor' => [self::READ_VALUE, '.', 'w:vertAnchor'],
843            'horzAnchor' => [self::READ_VALUE, '.', 'w:horzAnchor'],
844            'tblpXSpec' => [self::READ_VALUE, '.', 'w:tblpXSpec'],
845            'tblpX' => [self::READ_VALUE, '.', 'w:tblpX'],
846            'tblpYSpec' => [self::READ_VALUE, '.', 'w:tblpYSpec'],
847            'tblpY' => [self::READ_VALUE, '.', 'w:tblpY'],
848        ];
849
850        return $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
851    }
852
853    /**
854     * Read w:tblInd.
855     *
856     * @return TblWidthComplexType
857     */
858    private function readTableIndent(XMLReader $xmlReader, DOMElement $domNode)
859    {
860        $styleDefs = [
861            'value' => [self::READ_VALUE, '.', 'w:w'],
862            'type' => [self::READ_VALUE, '.', 'w:type'],
863        ];
864        $styleDefs = $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
865
866        return new TblWidthComplexType((int) $styleDefs['value'], $styleDefs['type']);
867    }
868
869    /**
870     * Read w:tcPr.
871     *
872     * @return null|array
873     */
874    private function readCellStyle(XMLReader $xmlReader, DOMElement $domNode)
875    {
876        $styleDefs = [
877            'valign' => [self::READ_VALUE, 'w:vAlign'],
878            'textDirection' => [self::READ_VALUE, 'w:textDirection'],
879            'gridSpan' => [self::READ_VALUE, 'w:gridSpan'],
880            'vMerge' => [self::READ_VALUE, 'w:vMerge', null, null, 'continue'],
881            'bgColor' => [self::READ_VALUE, 'w:shd', 'w:fill'],
882            'noWrap' => [self::READ_VALUE, 'w:noWrap', null, null, true],
883        ];
884        $style = null;
885
886        if ($xmlReader->elementExists('w:tcPr', $domNode)) {
887            $styleNode = $xmlReader->getElement('w:tcPr', $domNode);
888
889            $borders = ['top', 'left', 'bottom', 'right'];
890            foreach ($borders as $side) {
891                $ucfSide = ucfirst($side);
892
893                $styleDefs['border' . $ucfSide . 'Size'] = [self::READ_VALUE, 'w:tcBorders/w:' . $side, 'w:sz'];
894                $styleDefs['border' . $ucfSide . 'Color'] = [self::READ_VALUE, 'w:tcBorders/w:' . $side, 'w:color'];
895                $styleDefs['border' . $ucfSide . 'Style'] = [self::READ_VALUE, 'w:tcBorders/w:' . $side, 'w:val'];
896            }
897
898            $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
899        }
900
901        return $style;
902    }
903
904    /**
905     * Returns the first child element found.
906     *
907     * @param null|array|string $elements
908     *
909     * @return null|string
910     */
911    private function findPossibleElement(XMLReader $xmlReader, ?DOMElement $parentNode = null, $elements = null)
912    {
913        if (is_array($elements)) {
914            //if element is an array, we take the first element that exists in the XML
915            foreach ($elements as $possibleElement) {
916                if ($xmlReader->elementExists($possibleElement, $parentNode)) {
917                    return $possibleElement;
918                }
919            }
920        } else {
921            return $elements;
922        }
923
924        return null;
925    }
926
927    /**
928     * Returns the first attribute found.
929     *
930     * @param array|string $attributes
931     *
932     * @return null|string
933     */
934    private function findPossibleAttribute(XMLReader $xmlReader, DOMElement $node, $attributes)
935    {
936        //if attribute is an array, we take the first attribute that exists in the XML
937        if (is_array($attributes)) {
938            foreach ($attributes as $possibleAttribute) {
939                if ($xmlReader->getAttribute($possibleAttribute, $node)) {
940                    return $possibleAttribute;
941                }
942            }
943
944            return null;
945        }
946
947        return $attributes;
948    }
949
950    /**
951     * Read style definition.
952     *
953     * @param array $styleDefs
954     *
955     * @ignoreScrutinizerPatch
956     *
957     * @return array
958     */
959    protected function readStyleDefs(XMLReader $xmlReader, ?DOMElement $parentNode = null, $styleDefs = [])
960    {
961        $styles = [];
962
963        foreach ($styleDefs as $styleProp => $styleVal) {
964            [$method, $element, $attribute, $expected, $default] = array_pad($styleVal, 5, null);
965
966            $element = $this->findPossibleElement($xmlReader, $parentNode, $element);
967            if ($element === null) {
968                continue;
969            }
970
971            if ($xmlReader->elementExists($element, $parentNode)) {
972                $node = $xmlReader->getElement($element, $parentNode);
973
974                $attribute = $this->findPossibleAttribute($xmlReader, $node, $attribute);
975
976                // Use w:val as default if no attribute assigned
977                $attribute = ($attribute === null) ? 'w:val' : $attribute;
978                $attributeValue = $xmlReader->getAttribute($attribute, $node) ?? $default;
979
980                $styleValue = $this->readStyleDef($method, $attributeValue, $expected);
981                if ($styleValue !== null) {
982                    $styles[$styleProp] = $styleValue;
983                }
984            }
985        }
986
987        return $styles;
988    }
989
990    /**
991     * Return style definition based on conversion method.
992     *
993     * @param string $method
994     *
995     * @ignoreScrutinizerPatch
996     *
997     * @param null|string $attributeValue
998     * @param mixed $expected
999     *
1000     * @return mixed
1001     */
1002    private function readStyleDef($method, $attributeValue, $expected)
1003    {
1004        $style = $attributeValue;
1005
1006        if (self::READ_SIZE == $method) {
1007            $style = $attributeValue / 2;
1008        } elseif (self::READ_TRUE == $method) {
1009            $style = $this->isOn($attributeValue);
1010        } elseif (self::READ_FALSE == $method) {
1011            $style = !$this->isOn($attributeValue);
1012        } elseif (self::READ_EQUAL == $method) {
1013            $style = $attributeValue == $expected;
1014        }
1015
1016        return $style;
1017    }
1018
1019    /**
1020     * Parses the value of the on/off value, null is considered true as it means the w:val attribute was not present.
1021     *
1022     * @see http://www.datypic.com/sc/ooxml/t-w_ST_OnOff.html
1023     *
1024     * @param string $value
1025     *
1026     * @return bool
1027     */
1028    private function isOn($value = null)
1029    {
1030        return $value === null || $value === '1' || $value === 'true' || $value === 'on';
1031    }
1032
1033    /**
1034     * Returns the target of image, object, or link as stored in ::readMainRels.
1035     *
1036     * @param string $docPart
1037     * @param string $rId
1038     *
1039     * @return null|string
1040     */
1041    private function getMediaTarget($docPart, $rId)
1042    {
1043        $target = null;
1044
1045        if (isset($this->rels[$docPart], $this->rels[$docPart][$rId])) {
1046            $target = $this->rels[$docPart][$rId]['target'];
1047        }
1048
1049        return $target;
1050    }
1051
1052    /**
1053     * Returns the target mode.
1054     *
1055     * @param string $docPart
1056     * @param string $rId
1057     *
1058     * @return null|string
1059     */
1060    private function getTargetMode($docPart, $rId)
1061    {
1062        $mode = null;
1063
1064        if (isset($this->rels[$docPart], $this->rels[$docPart][$rId])) {
1065            $mode = $this->rels[$docPart][$rId]['targetMode'];
1066        }
1067
1068        return $mode;
1069    }
1070}