Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.19% covered (success)
97.19%
449 / 462
74.07% covered (warning)
74.07%
20 / 27
CRAP
0.00% covered (danger)
0.00%
0 / 1
AbstractPart
97.19% covered (success)
97.19%
449 / 462
74.07% covered (warning)
74.07%
20 / 27
165
0.00% covered (danger)
0.00%
0 / 1
 read
n/a
0 / 0
n/a
0 / 0
0
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setRels
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setImageLoading
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 hasImageLoading
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getCommentReferences
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setCommentReferences
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 setCommentReference
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
3.01
 getCommentReference
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 readParagraph
98.81% covered (success)
98.81%
83 / 84
0.00% covered (danger)
0.00%
0 / 1
31
 readFormField
95.24% covered (success)
95.24%
60 / 63
0.00% covered (danger)
0.00%
0 / 1
27
 getHeadingDepth
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
5
 readRun
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
7
 readRunChild
93.85% covered (success)
93.85%
61 / 65
0.00% covered (danger)
0.00%
0 / 1
27.17
 readTable
100.00% covered (success)
100.00%
32 / 32
100.00% covered (success)
100.00%
1 / 1
12
 readParagraphStyle
100.00% covered (success)
100.00%
33 / 33
100.00% covered (success)
100.00%
1 / 1
2
 readFontStyle
93.10% covered (success)
93.10%
27 / 29
0.00% covered (danger)
0.00%
0 / 1
4.01
 readTableStyle
96.30% covered (success)
96.30%
26 / 27
0.00% covered (danger)
0.00%
0 / 1
7
 readTablePosition
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
1
 readTableIndent
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 readCellStyle
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
3
 findPossibleElement
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 findPossibleAttribute
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 readStyleDefs
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
6
 readStyleDef
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
5
 isOn
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
4
 getMediaTarget
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getTargetMode
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2/**
3 * This file is part of PHPWord - A pure PHP library for reading and writing
4 * word processing documents.
5 *
6 * PHPWord is free software distributed under the terms of the GNU Lesser
7 * General Public License version 3 as published by the Free Software Foundation.
8 *
9 * For the full copyright and license information, please read the LICENSE
10 * file that was distributed with this source code. For the full list of
11 * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
12 *
13 * @see         https://github.com/PHPOffice/PHPWord
14 *
15 * @license     http://www.gnu.org/licenses/lgpl.txt LGPL version 3
16 */
17
18namespace PhpOffice\PhpWord\Reader\Word2007;
19
20use DateTime;
21use DOMElement;
22use InvalidArgumentException;
23use PhpOffice\Math\Reader\OfficeMathML;
24use PhpOffice\PhpWord\ComplexType\TblWidth as TblWidthComplexType;
25use PhpOffice\PhpWord\Element\AbstractContainer;
26use PhpOffice\PhpWord\Element\AbstractElement;
27use PhpOffice\PhpWord\Element\FormField;
28use PhpOffice\PhpWord\Element\TextRun;
29use PhpOffice\PhpWord\Element\TrackChange;
30use PhpOffice\PhpWord\PhpWord;
31use PhpOffice\PhpWord\Shared\XMLReader;
32
33/**
34 * Abstract part reader.
35 *
36 * This class is inherited by ODText reader
37 *
38 * @since 0.10.0
39 */
40abstract class AbstractPart
41{
42    /**
43     * Conversion method.
44     *
45     * @const int
46     */
47    const READ_VALUE = 'attributeValue';            // Read attribute value
48    const READ_EQUAL = 'attributeEquals';           // Read `true` when attribute value equals specified value
49    const READ_TRUE = 'attributeTrue';              // Read `true` when element exists
50    const READ_FALSE = 'attributeFalse';            // Read `false` when element exists
51    const READ_SIZE = 'attributeMultiplyByTwo';     // Read special attribute value for Font::$size
52
53    /**
54     * Document file.
55     *
56     * @var string
57     */
58    protected $docFile;
59
60    /**
61     * XML file.
62     *
63     * @var string
64     */
65    protected $xmlFile;
66
67    /**
68     * Part relationships.
69     *
70     * @var array
71     */
72    protected $rels = [];
73
74    /**
75     * Comment references.
76     *
77     * @var array<string, array<string, AbstractElement>>
78     */
79    protected $commentRefs = [];
80
81    /**
82     * Image Loading.
83     *
84     * @var bool
85     */
86    protected $imageLoading = true;
87
88    /**
89     * Read part.
90     */
91    abstract public function read(PhpWord $phpWord);
92
93    /**
94     * Create new instance.
95     *
96     * @param string $docFile
97     * @param string $xmlFile
98     */
99    public function __construct($docFile, $xmlFile)
100    {
101        $this->docFile = $docFile;
102        $this->xmlFile = $xmlFile;
103    }
104
105    /**
106     * Set relationships.
107     *
108     * @param array $value
109     */
110    public function setRels($value): void
111    {
112        $this->rels = $value;
113    }
114
115    public function setImageLoading(bool $value): self
116    {
117        $this->imageLoading = $value;
118
119        return $this;
120    }
121
122    public function hasImageLoading(): bool
123    {
124        return $this->imageLoading;
125    }
126
127    /**
128     * Get comment references.
129     *
130     * @return array<string, array<string, null|AbstractElement>>
131     */
132    public function getCommentReferences(): array
133    {
134        return $this->commentRefs;
135    }
136
137    /**
138     * Set comment references.
139     *
140     * @param array<string, array<string, null|AbstractElement>> $commentRefs
141     */
142    public function setCommentReferences(array $commentRefs): self
143    {
144        $this->commentRefs = $commentRefs;
145
146        return $this;
147    }
148
149    /**
150     * Set comment reference.
151     */
152    private function setCommentReference(string $type, string $id, AbstractElement $element): self
153    {
154        if (!in_array($type, ['start', 'end'])) {
155            throw new InvalidArgumentException('Type must be "start" or "end"');
156        }
157
158        if (!array_key_exists($id, $this->commentRefs)) {
159            $this->commentRefs[$id] = [
160                'start' => null,
161                'end' => null,
162            ];
163        }
164        $this->commentRefs[$id][$type] = $element;
165
166        return $this;
167    }
168
169    /**
170     * Get comment reference.
171     *
172     * @return array<string, null|AbstractElement>
173     */
174    protected function getCommentReference(string $id): array
175    {
176        if (!array_key_exists($id, $this->commentRefs)) {
177            throw new InvalidArgumentException(sprintf('Comment with id %s isn\'t referenced in document', $id));
178        }
179
180        return $this->commentRefs[$id];
181    }
182
183    /**
184     * Read w:p.
185     *
186     * @param \PhpOffice\PhpWord\Element\AbstractContainer $parent
187     * @param string $docPart
188     *
189     * @todo Get font style for preserve text
190     */
191    protected function readParagraph(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart = 'document'): void
192    {
193        // Paragraph style
194        $paragraphStyle = $xmlReader->elementExists('w:pPr', $domNode) ? $this->readParagraphStyle($xmlReader, $domNode) : null;
195
196        if ($xmlReader->elementExists('w:r/w:fldChar/w:ffData', $domNode)) {
197            // FormField
198            $partOfFormField = false;
199            $formNodes = [];
200            $formType = null;
201            $textRunContainers = $xmlReader->countElements('w:r|w:ins|w:del|w:hyperlink|w:smartTag', $domNode);
202            if ($textRunContainers > 0) {
203                $nodes = $xmlReader->getElements('*', $domNode);
204                $paragraph = $parent->addTextRun($paragraphStyle);
205                foreach ($nodes as $node) {
206                    if ($xmlReader->elementExists('w:fldChar/w:ffData', $node)) {
207                        $partOfFormField = true;
208                        $formNodes[] = $node;
209                        if ($xmlReader->elementExists('w:fldChar/w:ffData/w:ddList', $node)) {
210                            $formType = 'dropdown';
211                        } elseif ($xmlReader->elementExists('w:fldChar/w:ffData/w:textInput', $node)) {
212                            $formType = 'textinput';
213                        } elseif ($xmlReader->elementExists('w:fldChar/w:ffData/w:checkBox', $node)) {
214                            $formType = 'checkbox';
215                        }
216                    } elseif ($partOfFormField &&
217                        $xmlReader->elementExists('w:fldChar', $node) &&
218                        'end' == $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar')
219                    ) {
220                        $formNodes[] = $node;
221                        $partOfFormField = false;
222                        // Process the form fields
223                        $this->readFormField($xmlReader, $formNodes, $paragraph, $paragraphStyle, $formType);
224                    } elseif ($partOfFormField) {
225                        $formNodes[] = $node;
226                    } else {
227                        // normal runs
228                        $this->readRun($xmlReader, $node, $paragraph, $docPart, $paragraphStyle);
229                    }
230                }
231            }
232        } elseif ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
233            // PreserveText
234            $ignoreText = false;
235            $textContent = '';
236            $fontStyle = $this->readFontStyle($xmlReader, $domNode);
237            $nodes = $xmlReader->getElements('w:r', $domNode);
238            foreach ($nodes as $node) {
239                if ($xmlReader->elementExists('w:lastRenderedPageBreak', $node)) {
240                    $parent->addPageBreak();
241                }
242                $instrText = $xmlReader->getValue('w:instrText', $node);
243                if (null !== $instrText) {
244                    $textContent .= '{' . $instrText . '}';
245                } else {
246                    if ($xmlReader->elementExists('w:fldChar', $node)) {
247                        $fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
248                        if ('begin' == $fldCharType) {
249                            $ignoreText = true;
250                        } elseif ('end' == $fldCharType) {
251                            $ignoreText = false;
252                        }
253                    }
254                    if (false === $ignoreText) {
255                        $textContent .= $xmlReader->getValue('w:t', $node);
256                    }
257                }
258            }
259            $parent->addPreserveText(htmlspecialchars($textContent, ENT_QUOTES, 'UTF-8'), $fontStyle, $paragraphStyle);
260
261            return;
262        }
263
264        // Formula
265        $xmlReader->registerNamespace('m', 'http://schemas.openxmlformats.org/officeDocument/2006/math');
266        if ($xmlReader->elementExists('m:oMath', $domNode)) {
267            $mathElement = $xmlReader->getElement('m:oMath', $domNode);
268            $mathXML = $mathElement->ownerDocument->saveXML($mathElement);
269            if (is_string($mathXML)) {
270                $reader = new OfficeMathML();
271                $math = $reader->read($mathXML);
272
273                $parent->addFormula($math);
274            }
275
276            return;
277        }
278
279        // List item
280        if ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
281            $numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
282            $levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
283            $nodes = $xmlReader->getElements('*', $domNode);
284
285            $listItemRun = $parent->addListItemRun($levelId, "PHPWordList{$numId}", $paragraphStyle);
286
287            foreach ($nodes as $node) {
288                $this->readRun($xmlReader, $node, $listItemRun, $docPart, $paragraphStyle);
289            }
290
291            return;
292        }
293
294        // Heading or Title
295        $headingDepth = $xmlReader->elementExists('w:pPr', $domNode) ? $this->getHeadingDepth($paragraphStyle) : null;
296        if ($headingDepth !== null) {
297            $textContent = null;
298            $nodes = $xmlReader->getElements('w:r|w:hyperlink', $domNode);
299            if ($nodes->length === 1) {
300                $textContent = htmlspecialchars($xmlReader->getValue('w:t', $nodes->item(0)), ENT_QUOTES, 'UTF-8');
301            } else {
302                $textContent = new TextRun($paragraphStyle);
303                foreach ($nodes as $node) {
304                    $this->readRun($xmlReader, $node, $textContent, $docPart, $paragraphStyle);
305                }
306            }
307            $parent->addTitle($textContent, $headingDepth);
308
309            return;
310        }
311
312        // Text and TextRun
313        $textRunContainers = $xmlReader->countElements('w:r|w:ins|w:del|w:hyperlink|w:smartTag|w:commentReference|w:commentRangeStart|w:commentRangeEnd', $domNode);
314        if (0 === $textRunContainers) {
315            $parent->addTextBreak(1, $paragraphStyle);
316        } else {
317            $nodes = $xmlReader->getElements('*', $domNode);
318            $paragraph = $parent->addTextRun($paragraphStyle);
319            foreach ($nodes as $node) {
320                $this->readRun($xmlReader, $node, $paragraph, $docPart, $paragraphStyle);
321            }
322        }
323    }
324
325    /**
326     * @param DOMElement[] $domNodes
327     * @param AbstractContainer $parent
328     * @param mixed $paragraphStyle
329     * @param string $formType
330     */
331    private function readFormField(XMLReader $xmlReader, array $domNodes, $parent, $paragraphStyle, $formType): void
332    {
333        if (!in_array($formType, ['textinput', 'checkbox', 'dropdown'])) {
334            return;
335        }
336
337        $formField = $parent->addFormField($formType, null, $paragraphStyle);
338        $ffData = $xmlReader->getElement('w:fldChar/w:ffData', $domNodes[0]);
339
340        foreach ($xmlReader->getElements('*', $ffData) as $node) {
341            /** @var DOMElement $node */
342            switch ($node->localName) {
343                case 'name':
344                    $formField->setName($node->getAttribute('w:val'));
345
346                    break;
347                case 'ddList':
348                    $listEntries = [];
349                    foreach ($xmlReader->getElements('*', $node) as $ddListNode) {
350                        switch ($ddListNode->localName) {
351                            case 'result':
352                                $formField->setValue($xmlReader->getAttribute('w:val', $ddListNode));
353
354                                break;
355                            case 'default':
356                                $formField->setDefault($xmlReader->getAttribute('w:val', $ddListNode));
357
358                                break;
359                            case 'listEntry':
360                                $listEntries[] = $xmlReader->getAttribute('w:val', $ddListNode);
361
362                                break;
363                        }
364                    }
365                    $formField->setEntries($listEntries);
366                    if (null !== $formField->getValue()) {
367                        $formField->setText($listEntries[$formField->getValue()]);
368                    }
369
370                    break;
371                case 'textInput':
372                    foreach ($xmlReader->getElements('*', $node) as $ddListNode) {
373                        switch ($ddListNode->localName) {
374                            case 'default':
375                                $formField->setDefault($xmlReader->getAttribute('w:val', $ddListNode));
376
377                                break;
378                            case 'format':
379                            case 'maxLength':
380                                break;
381                        }
382                    }
383
384                    break;
385                case 'checkBox':
386                    foreach ($xmlReader->getElements('*', $node) as $ddListNode) {
387                        switch ($ddListNode->localName) {
388                            case 'default':
389                                $formField->setDefault($xmlReader->getAttribute('w:val', $ddListNode));
390
391                                break;
392                            case 'checked':
393                                $formField->setValue($xmlReader->getAttribute('w:val', $ddListNode));
394
395                                break;
396                            case 'size':
397                            case 'sizeAuto':
398                                break;
399                        }
400                    }
401
402                    break;
403            }
404        }
405
406        if ('textinput' == $formType) {
407            $ignoreText = true;
408            $textContent = '';
409            foreach ($domNodes as $node) {
410                if ($xmlReader->elementExists('w:fldChar', $node)) {
411                    $fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
412                    if ('separate' == $fldCharType) {
413                        $ignoreText = false;
414                    } elseif ('end' == $fldCharType) {
415                        $ignoreText = true;
416                    }
417                }
418
419                if (false === $ignoreText) {
420                    $textContent .= $xmlReader->getValue('w:t', $node);
421                }
422            }
423            $formField->setValue(htmlspecialchars($textContent, ENT_QUOTES, 'UTF-8'));
424            $formField->setText(htmlspecialchars($textContent, ENT_QUOTES, 'UTF-8'));
425        }
426    }
427
428    /**
429     * Returns the depth of the Heading, returns 0 for a Title.
430     *
431     * @return null|number
432     */
433    private function getHeadingDepth(?array $paragraphStyle = null)
434    {
435        if (is_array($paragraphStyle) && isset($paragraphStyle['styleName'])) {
436            if ('Title' === $paragraphStyle['styleName']) {
437                return 0;
438            }
439
440            $headingMatches = [];
441            preg_match('/Heading(\d)/', $paragraphStyle['styleName'], $headingMatches);
442            if (!empty($headingMatches)) {
443                return $headingMatches[1];
444            }
445        }
446
447        return null;
448    }
449
450    /**
451     * Read w:r.
452     *
453     * @param \PhpOffice\PhpWord\Element\AbstractContainer $parent
454     * @param string $docPart
455     * @param mixed $paragraphStyle
456     *
457     * @todo Footnote paragraph style
458     */
459    protected function readRun(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart, $paragraphStyle = null): void
460    {
461        if (in_array($domNode->nodeName, ['w:ins', 'w:del', 'w:smartTag', 'w:hyperlink', 'w:commentReference'])) {
462            $nodes = $xmlReader->getElements('*', $domNode);
463            foreach ($nodes as $node) {
464                $this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle);
465            }
466        } elseif ($domNode->nodeName == 'w:r') {
467            $fontStyle = $this->readFontStyle($xmlReader, $domNode);
468            $nodes = $xmlReader->getElements('*', $domNode);
469            foreach ($nodes as $node) {
470                $this->readRunChild($xmlReader, $node, $parent, $docPart, $paragraphStyle, $fontStyle);
471            }
472        }
473
474        if ($xmlReader->elementExists('.//*["commentReference"=local-name()]', $domNode)) {
475            $node = iterator_to_array($xmlReader->getElements('.//*["commentReference"=local-name()]', $domNode))[0];
476            $attributeIdentifier = $node->attributes->getNamedItem('id');
477            if ($attributeIdentifier) {
478                $id = $attributeIdentifier->nodeValue;
479
480                $this->setCommentReference('start', $id, $parent->getElement($parent->countElements() - 1));
481                $this->setCommentReference('end', $id, $parent->getElement($parent->countElements() - 1));
482            }
483        }
484    }
485
486    /**
487     * Parses nodes under w:r.
488     *
489     * @param string $docPart
490     * @param mixed $paragraphStyle
491     * @param mixed $fontStyle
492     */
493    protected function readRunChild(XMLReader $xmlReader, DOMElement $node, AbstractContainer $parent, $docPart, $paragraphStyle = null, $fontStyle = null): void
494    {
495        $runParent = $node->parentNode->parentNode;
496        if ($node->nodeName == 'w:footnoteReference') {
497            // Footnote
498            $wId = $xmlReader->getAttribute('w:id', $node);
499            $footnote = $parent->addFootnote();
500            $footnote->setRelationId($wId);
501        } elseif ($node->nodeName == 'w:endnoteReference') {
502            // Endnote
503            $wId = $xmlReader->getAttribute('w:id', $node);
504            $endnote = $parent->addEndnote();
505            $endnote->setRelationId($wId);
506        } elseif ($node->nodeName == 'w:pict') {
507            // Image
508            $rId = $xmlReader->getAttribute('r:id', $node, 'v:shape/v:imagedata');
509            $target = $this->getMediaTarget($docPart, $rId);
510            if ($this->hasImageLoading() && null !== $target) {
511                if ('External' == $this->getTargetMode($docPart, $rId)) {
512                    $imageSource = $target;
513                } else {
514                    $imageSource = "zip://{$this->docFile}#{$target}";
515                }
516                $parent->addImage($imageSource);
517            }
518        } elseif ($node->nodeName == 'w:drawing') {
519            // Office 2011 Image
520            $xmlReader->registerNamespace('wp', 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing');
521            $xmlReader->registerNamespace('r', 'http://schemas.openxmlformats.org/officeDocument/2006/relationships');
522            $xmlReader->registerNamespace('pic', 'http://schemas.openxmlformats.org/drawingml/2006/picture');
523            $xmlReader->registerNamespace('a', 'http://schemas.openxmlformats.org/drawingml/2006/main');
524
525            $name = $xmlReader->getAttribute('name', $node, 'wp:inline/a:graphic/a:graphicData/pic:pic/pic:nvPicPr/pic:cNvPr');
526            $embedId = $xmlReader->getAttribute('r:embed', $node, 'wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip');
527            if ($name === null && $embedId === null) { // some Converters puts images on a different path
528                $name = $xmlReader->getAttribute('name', $node, 'wp:anchor/a:graphic/a:graphicData/pic:pic/pic:nvPicPr/pic:cNvPr');
529                $embedId = $xmlReader->getAttribute('r:embed', $node, 'wp:anchor/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip');
530            }
531            $target = $this->getMediaTarget($docPart, $embedId);
532            if ($this->hasImageLoading() && null !== $target) {
533                $imageSource = "zip://{$this->docFile}#{$target}";
534                $parent->addImage($imageSource, null, false, $name);
535            }
536        } elseif ($node->nodeName == 'w:object') {
537            // Object
538            $rId = $xmlReader->getAttribute('r:id', $node, 'o:OLEObject');
539            // $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata');
540            $target = $this->getMediaTarget($docPart, $rId);
541            if (null !== $target) {
542                $textContent = "&lt;Object: {$target}>";
543                $parent->addText($textContent, $fontStyle, $paragraphStyle);
544            }
545        } elseif ($node->nodeName == 'w:br') {
546            $parent->addTextBreak();
547        } elseif ($node->nodeName == 'w:tab') {
548            $parent->addText("\t");
549        } elseif ($node->nodeName == 'mc:AlternateContent') {
550            if ($node->hasChildNodes()) {
551                // Get fallback instead of mc:Choice to make sure it is compatible
552                $fallbackElements = $node->getElementsByTagName('Fallback');
553
554                if ($fallbackElements->length) {
555                    $fallback = $fallbackElements->item(0);
556                    // TextRun
557                    $textContent = htmlspecialchars($fallback->nodeValue, ENT_QUOTES, 'UTF-8');
558
559                    $parent->addText($textContent, $fontStyle, $paragraphStyle);
560                }
561            }
562        } elseif ($node->nodeName == 'w:t' || $node->nodeName == 'w:delText') {
563            // TextRun
564            $textContent = htmlspecialchars($xmlReader->getValue('.', $node), ENT_QUOTES, 'UTF-8');
565
566            if ($runParent->nodeName == 'w:hyperlink') {
567                $rId = $xmlReader->getAttribute('r:id', $runParent);
568                $target = $this->getMediaTarget($docPart, $rId);
569                if (null !== $target) {
570                    $parent->addLink($target, $textContent, $fontStyle, $paragraphStyle);
571                } else {
572                    $parent->addText($textContent, $fontStyle, $paragraphStyle);
573                }
574            } else {
575                /** @var AbstractElement $element */
576                $element = $parent->addText($textContent, $fontStyle, $paragraphStyle);
577                if (in_array($runParent->nodeName, ['w:ins', 'w:del'])) {
578                    $type = ($runParent->nodeName == 'w:del') ? TrackChange::DELETED : TrackChange::INSERTED;
579                    $author = $runParent->getAttribute('w:author');
580                    $date = DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $runParent->getAttribute('w:date'));
581                    $date = $date instanceof DateTime ? $date : null;
582                    $element->setChangeInfo($type, $author, $date);
583                }
584            }
585        } elseif ($node->nodeName == 'w:softHyphen') {
586            $element = $parent->addText("\u{200c}", $fontStyle, $paragraphStyle);
587        }
588    }
589
590    /**
591     * Read w:tbl.
592     *
593     * @param mixed $parent
594     * @param string $docPart
595     */
596    protected function readTable(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart = 'document'): void
597    {
598        // Table style
599        $tblStyle = null;
600        if ($xmlReader->elementExists('w:tblPr', $domNode)) {
601            $tblStyle = $this->readTableStyle($xmlReader, $domNode);
602        }
603
604        /** @var \PhpOffice\PhpWord\Element\Table $table Type hint */
605        $table = $parent->addTable($tblStyle);
606        $tblNodes = $xmlReader->getElements('*', $domNode);
607        foreach ($tblNodes as $tblNode) {
608            if ('w:tblGrid' == $tblNode->nodeName) { // Column
609                // @todo Do something with table columns
610            } elseif ('w:tr' == $tblNode->nodeName) { // Row
611                $rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
612                $rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
613                $rowHRule = $rowHRule == 'exact';
614                $rowStyle = [
615                    'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
616                    'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
617                    'exactHeight' => $rowHRule,
618                ];
619
620                $row = $table->addRow($rowHeight, $rowStyle);
621                $rowNodes = $xmlReader->getElements('*', $tblNode);
622                foreach ($rowNodes as $rowNode) {
623                    if ('w:trPr' == $rowNode->nodeName) { // Row style
624                        // @todo Do something with row style
625                    } elseif ('w:tc' == $rowNode->nodeName) { // Cell
626                        $cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
627                        $cellStyle = null;
628                        if ($xmlReader->elementExists('w:tcPr', $rowNode)) {
629                            $cellStyle = $this->readCellStyle($xmlReader, $rowNode);
630                        }
631
632                        $cell = $row->addCell($cellWidth, $cellStyle);
633                        $cellNodes = $xmlReader->getElements('*', $rowNode);
634                        foreach ($cellNodes as $cellNode) {
635                            if ('w:p' == $cellNode->nodeName) { // Paragraph
636                                $this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
637                            } elseif ($cellNode->nodeName == 'w:tbl') { // Table
638                                $this->readTable($xmlReader, $cellNode, $cell, $docPart);
639                            }
640                        }
641                    }
642                }
643            }
644        }
645    }
646
647    /**
648     * Read w:pPr.
649     *
650     * @return null|array
651     */
652    protected function readParagraphStyle(XMLReader $xmlReader, DOMElement $domNode)
653    {
654        if (!$xmlReader->elementExists('w:pPr', $domNode)) {
655            return null;
656        }
657
658        $styleNode = $xmlReader->getElement('w:pPr', $domNode);
659        $styleDefs = [
660            'styleName' => [self::READ_VALUE, ['w:pStyle', 'w:name']],
661            'alignment' => [self::READ_VALUE, 'w:jc'],
662            'basedOn' => [self::READ_VALUE, 'w:basedOn'],
663            'next' => [self::READ_VALUE, 'w:next'],
664            'indent' => [self::READ_VALUE, 'w:ind', 'w:left'],
665            'hanging' => [self::READ_VALUE, 'w:ind', 'w:hanging'],
666            'spaceAfter' => [self::READ_VALUE, 'w:spacing', 'w:after'],
667            'spaceBefore' => [self::READ_VALUE, 'w:spacing', 'w:before'],
668            'widowControl' => [self::READ_FALSE, 'w:widowControl'],
669            'keepNext' => [self::READ_TRUE,  'w:keepNext'],
670            'keepLines' => [self::READ_TRUE,  'w:keepLines'],
671            'pageBreakBefore' => [self::READ_TRUE,  'w:pageBreakBefore'],
672            'contextualSpacing' => [self::READ_TRUE,  'w:contextualSpacing'],
673            'bidi' => [self::READ_TRUE,  'w:bidi'],
674            'suppressAutoHyphens' => [self::READ_TRUE,  'w:suppressAutoHyphens'],
675            'borderTopStyle' => [self::READ_VALUE, 'w:pBdr/w:top'],
676            'borderTopColor' => [self::READ_VALUE, 'w:pBdr/w:top', 'w:color'],
677            'borderTopSize' => [self::READ_VALUE, 'w:pBdr/w:top', 'w:sz'],
678            'borderRightStyle' => [self::READ_VALUE, 'w:pBdr/w:right'],
679            'borderRightColor' => [self::READ_VALUE, 'w:pBdr/w:right', 'w:color'],
680            'borderRightSize' => [self::READ_VALUE, 'w:pBdr/w:right', 'w:sz'],
681            'borderBottomStyle' => [self::READ_VALUE, 'w:pBdr/w:bottom'],
682            'borderBottomColor' => [self::READ_VALUE, 'w:pBdr/w:bottom', 'w:color'],
683            'borderBottomSize' => [self::READ_VALUE, 'w:pBdr/w:bottom', 'w:sz'],
684            'borderLeftStyle' => [self::READ_VALUE, 'w:pBdr/w:left'],
685            'borderLeftColor' => [self::READ_VALUE, 'w:pBdr/w:left', 'w:color'],
686            'borderLeftSize' => [self::READ_VALUE, 'w:pBdr/w:left', 'w:sz'],
687        ];
688
689        return $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
690    }
691
692    /**
693     * Read w:rPr.
694     *
695     * @return null|array
696     */
697    protected function readFontStyle(XMLReader $xmlReader, DOMElement $domNode)
698    {
699        if (null === $domNode) {
700            return null;
701        }
702        // Hyperlink has an extra w:r child
703        if ('w:hyperlink' == $domNode->nodeName) {
704            $domNode = $xmlReader->getElement('w:r', $domNode);
705        }
706        if (!$xmlReader->elementExists('w:rPr', $domNode)) {
707            return null;
708        }
709
710        $styleNode = $xmlReader->getElement('w:rPr', $domNode);
711        $styleDefs = [
712            'styleName' => [self::READ_VALUE, 'w:rStyle'],
713            'name' => [self::READ_VALUE, 'w:rFonts', ['w:ascii', 'w:hAnsi', 'w:eastAsia', 'w:cs']],
714            'hint' => [self::READ_VALUE, 'w:rFonts', 'w:hint'],
715            'size' => [self::READ_SIZE,  ['w:sz', 'w:szCs']],
716            'color' => [self::READ_VALUE, 'w:color'],
717            'underline' => [self::READ_VALUE, 'w:u'],
718            'bold' => [self::READ_TRUE,  'w:b'],
719            'italic' => [self::READ_TRUE,  'w:i'],
720            'strikethrough' => [self::READ_TRUE,  'w:strike'],
721            'doubleStrikethrough' => [self::READ_TRUE,  'w:dstrike'],
722            'smallCaps' => [self::READ_TRUE,  'w:smallCaps'],
723            'allCaps' => [self::READ_TRUE,  'w:caps'],
724            'superScript' => [self::READ_EQUAL, 'w:vertAlign', 'w:val', 'superscript'],
725            'subScript' => [self::READ_EQUAL, 'w:vertAlign', 'w:val', 'subscript'],
726            'fgColor' => [self::READ_VALUE, 'w:highlight'],
727            'rtl' => [self::READ_TRUE,  'w:rtl'],
728            'lang' => [self::READ_VALUE, 'w:lang'],
729            'position' => [self::READ_VALUE, 'w:position'],
730            'hidden' => [self::READ_TRUE,  'w:vanish'],
731        ];
732
733        return $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
734    }
735
736    /**
737     * Read w:tblPr.
738     *
739     * @return null|array|string
740     *
741     * @todo Capture w:tblStylePr w:type="firstRow"
742     */
743    protected function readTableStyle(XMLReader $xmlReader, DOMElement $domNode)
744    {
745        $style = null;
746        $margins = ['top', 'left', 'bottom', 'right'];
747        $borders = array_merge($margins, ['insideH', 'insideV']);
748
749        if ($xmlReader->elementExists('w:tblPr', $domNode)) {
750            if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) {
751                $style = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle');
752            } else {
753                $styleNode = $xmlReader->getElement('w:tblPr', $domNode);
754                $styleDefs = [];
755                foreach ($margins as $side) {
756                    $ucfSide = ucfirst($side);
757                    $styleDefs["cellMargin$ucfSide"] = [self::READ_VALUE, "w:tblCellMar/w:$side", 'w:w'];
758                }
759                foreach ($borders as $side) {
760                    $ucfSide = ucfirst($side);
761                    $styleDefs["border{$ucfSide}Size"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:sz'];
762                    $styleDefs["border{$ucfSide}Color"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:color'];
763                    $styleDefs["border{$ucfSide}Style"] = [self::READ_VALUE, "w:tblBorders/w:$side", 'w:val'];
764                }
765                $styleDefs['layout'] = [self::READ_VALUE, 'w:tblLayout', 'w:type'];
766                $styleDefs['bidiVisual'] = [self::READ_TRUE, 'w:bidiVisual'];
767                $styleDefs['cellSpacing'] = [self::READ_VALUE, 'w:tblCellSpacing', 'w:w'];
768                $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
769
770                $tablePositionNode = $xmlReader->getElement('w:tblpPr', $styleNode);
771                if ($tablePositionNode !== null) {
772                    $style['position'] = $this->readTablePosition($xmlReader, $tablePositionNode);
773                }
774
775                $indentNode = $xmlReader->getElement('w:tblInd', $styleNode);
776                if ($indentNode !== null) {
777                    $style['indent'] = $this->readTableIndent($xmlReader, $indentNode);
778                }
779            }
780        }
781
782        return $style;
783    }
784
785    /**
786     * Read w:tblpPr.
787     *
788     * @return array
789     */
790    private function readTablePosition(XMLReader $xmlReader, DOMElement $domNode)
791    {
792        $styleDefs = [
793            'leftFromText' => [self::READ_VALUE, '.', 'w:leftFromText'],
794            'rightFromText' => [self::READ_VALUE, '.', 'w:rightFromText'],
795            'topFromText' => [self::READ_VALUE, '.', 'w:topFromText'],
796            'bottomFromText' => [self::READ_VALUE, '.', 'w:bottomFromText'],
797            'vertAnchor' => [self::READ_VALUE, '.', 'w:vertAnchor'],
798            'horzAnchor' => [self::READ_VALUE, '.', 'w:horzAnchor'],
799            'tblpXSpec' => [self::READ_VALUE, '.', 'w:tblpXSpec'],
800            'tblpX' => [self::READ_VALUE, '.', 'w:tblpX'],
801            'tblpYSpec' => [self::READ_VALUE, '.', 'w:tblpYSpec'],
802            'tblpY' => [self::READ_VALUE, '.', 'w:tblpY'],
803        ];
804
805        return $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
806    }
807
808    /**
809     * Read w:tblInd.
810     *
811     * @return TblWidthComplexType
812     */
813    private function readTableIndent(XMLReader $xmlReader, DOMElement $domNode)
814    {
815        $styleDefs = [
816            'value' => [self::READ_VALUE, '.', 'w:w'],
817            'type' => [self::READ_VALUE, '.', 'w:type'],
818        ];
819        $styleDefs = $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
820
821        return new TblWidthComplexType((int) $styleDefs['value'], $styleDefs['type']);
822    }
823
824    /**
825     * Read w:tcPr.
826     *
827     * @return null|array
828     */
829    private function readCellStyle(XMLReader $xmlReader, DOMElement $domNode)
830    {
831        $styleDefs = [
832            'valign' => [self::READ_VALUE, 'w:vAlign'],
833            'textDirection' => [self::READ_VALUE, 'w:textDirection'],
834            'gridSpan' => [self::READ_VALUE, 'w:gridSpan'],
835            'vMerge' => [self::READ_VALUE, 'w:vMerge', null, null, 'continue'],
836            'bgColor' => [self::READ_VALUE, 'w:shd', 'w:fill'],
837            'noWrap' => [self::READ_VALUE, 'w:noWrap', null, null, true],
838        ];
839        $style = null;
840
841        if ($xmlReader->elementExists('w:tcPr', $domNode)) {
842            $styleNode = $xmlReader->getElement('w:tcPr', $domNode);
843
844            $borders = ['top', 'left', 'bottom', 'right'];
845            foreach ($borders as $side) {
846                $ucfSide = ucfirst($side);
847
848                $styleDefs['border' . $ucfSide . 'Size'] = [self::READ_VALUE, 'w:tcBorders/w:' . $side, 'w:sz'];
849                $styleDefs['border' . $ucfSide . 'Color'] = [self::READ_VALUE, 'w:tcBorders/w:' . $side, 'w:color'];
850                $styleDefs['border' . $ucfSide . 'Style'] = [self::READ_VALUE, 'w:tcBorders/w:' . $side, 'w:val'];
851            }
852
853            $style = $this->readStyleDefs($xmlReader, $styleNode, $styleDefs);
854        }
855
856        return $style;
857    }
858
859    /**
860     * Returns the first child element found.
861     *
862     * @param null|array|string $elements
863     *
864     * @return null|string
865     */
866    private function findPossibleElement(XMLReader $xmlReader, ?DOMElement $parentNode = null, $elements = null)
867    {
868        if (is_array($elements)) {
869            //if element is an array, we take the first element that exists in the XML
870            foreach ($elements as $possibleElement) {
871                if ($xmlReader->elementExists($possibleElement, $parentNode)) {
872                    return $possibleElement;
873                }
874            }
875        } else {
876            return $elements;
877        }
878
879        return null;
880    }
881
882    /**
883     * Returns the first attribute found.
884     *
885     * @param array|string $attributes
886     *
887     * @return null|string
888     */
889    private function findPossibleAttribute(XMLReader $xmlReader, DOMElement $node, $attributes)
890    {
891        //if attribute is an array, we take the first attribute that exists in the XML
892        if (is_array($attributes)) {
893            foreach ($attributes as $possibleAttribute) {
894                if ($xmlReader->getAttribute($possibleAttribute, $node)) {
895                    return $possibleAttribute;
896                }
897            }
898
899            return null;
900        }
901
902        return $attributes;
903    }
904
905    /**
906     * Read style definition.
907     *
908     * @param array $styleDefs
909     *
910     * @ignoreScrutinizerPatch
911     *
912     * @return array
913     */
914    protected function readStyleDefs(XMLReader $xmlReader, ?DOMElement $parentNode = null, $styleDefs = [])
915    {
916        $styles = [];
917
918        foreach ($styleDefs as $styleProp => $styleVal) {
919            [$method, $element, $attribute, $expected, $default] = array_pad($styleVal, 5, null);
920
921            $element = $this->findPossibleElement($xmlReader, $parentNode, $element);
922            if ($element === null) {
923                continue;
924            }
925
926            if ($xmlReader->elementExists($element, $parentNode)) {
927                $node = $xmlReader->getElement($element, $parentNode);
928
929                $attribute = $this->findPossibleAttribute($xmlReader, $node, $attribute);
930
931                // Use w:val as default if no attribute assigned
932                $attribute = ($attribute === null) ? 'w:val' : $attribute;
933                $attributeValue = $xmlReader->getAttribute($attribute, $node) ?? $default;
934
935                $styleValue = $this->readStyleDef($method, $attributeValue, $expected);
936                if ($styleValue !== null) {
937                    $styles[$styleProp] = $styleValue;
938                }
939            }
940        }
941
942        return $styles;
943    }
944
945    /**
946     * Return style definition based on conversion method.
947     *
948     * @param string $method
949     *
950     * @ignoreScrutinizerPatch
951     *
952     * @param null|string $attributeValue
953     * @param mixed $expected
954     *
955     * @return mixed
956     */
957    private function readStyleDef($method, $attributeValue, $expected)
958    {
959        $style = $attributeValue;
960
961        if (self::READ_SIZE == $method) {
962            $style = $attributeValue / 2;
963        } elseif (self::READ_TRUE == $method) {
964            $style = $this->isOn($attributeValue);
965        } elseif (self::READ_FALSE == $method) {
966            $style = !$this->isOn($attributeValue);
967        } elseif (self::READ_EQUAL == $method) {
968            $style = $attributeValue == $expected;
969        }
970
971        return $style;
972    }
973
974    /**
975     * Parses the value of the on/off value, null is considered true as it means the w:val attribute was not present.
976     *
977     * @see http://www.datypic.com/sc/ooxml/t-w_ST_OnOff.html
978     *
979     * @param string $value
980     *
981     * @return bool
982     */
983    private function isOn($value = null)
984    {
985        return $value === null || $value === '1' || $value === 'true' || $value === 'on';
986    }
987
988    /**
989     * Returns the target of image, object, or link as stored in ::readMainRels.
990     *
991     * @param string $docPart
992     * @param string $rId
993     *
994     * @return null|string
995     */
996    private function getMediaTarget($docPart, $rId)
997    {
998        $target = null;
999
1000        if (isset($this->rels[$docPart], $this->rels[$docPart][$rId])) {
1001            $target = $this->rels[$docPart][$rId]['target'];
1002        }
1003
1004        return $target;
1005    }
1006
1007    /**
1008     * Returns the target mode.
1009     *
1010     * @param string $docPart
1011     * @param string $rId
1012     *
1013     * @return null|string
1014     */
1015    private function getTargetMode($docPart, $rId)
1016    {
1017        $mode = null;
1018
1019        if (isset($this->rels[$docPart], $this->rels[$docPart][$rId])) {
1020            $mode = $this->rels[$docPart][$rId]['targetMode'];
1021        }
1022
1023        return $mode;
1024    }
1025}