Documentation

Html extends BaseReader

Table of Contents

TEST_SAMPLE_SIZE  = 2048
Sample size to read to determine if it's HTML or not.
BORDER_MAPPINGS  = ['dash-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DASHDOT, 'dash-dot-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DASHDOTDOT, 'dashed' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DASHED, 'dotted' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DOTTED, 'double' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DOUBLE, 'hair' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_HAIR, 'medium' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUM, 'medium-dashed' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUMDASHED, 'medium-dash-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUMDASHDOT, 'medium-dash-dot-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUMDASHDOTDOT, 'none' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_NONE, 'slant-dash-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_SLANTDASHDOT, 'solid' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_THIN, 'thick' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_THICK]
H1_ETC  = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p']
SPAN_ETC  = ['span', 'div', 'font', 'i', 'em', 'strong', 'b']
$dataArray  : array<string|int, mixed>
$fileHandle  : resource
$formats  : array<string|int, mixed>
Formats.
$includeCharts  : bool
Read charts that are defined in the workbook? Identifies whether the Reader should read the definitions for any charts that exist in the workbook;.
$inputEncoding  : string
Input encoding.
$loadSheetsOnly  : null|array<string|int, string>
Restrict which sheets should be loaded? This property holds an array of worksheet names to be loaded. If null, then all worksheets will be loaded.
$nestedColumn  : array<string|int, mixed>
$readDataOnly  : bool
Read data only? Identifies whether the Reader should only read data values for cells, and ignore any formatting information; or whether it should read both data and formatting.
$readEmptyCells  : bool
Read empty cells? Identifies whether the Reader should read data values for cells all cells, or should ignore cells containing null value or empty string.
$readFilter  : IReadFilter
IReadFilter instance.
$rowspan  : array<string|int, mixed>
$securityScanner  : XmlScanner|null
$sheetIndex  : int
Sheet index to read.
$tableLevel  : int
__construct()  : mixed
Create a new HTML Reader instance.
canRead()  : bool
Validate that the current file is an HTML file.
getBorderMappings()  : array<string|int, mixed>
getBorderStyle()  : null|string
Map html border style to PhpSpreadsheet border style.
getIncludeCharts()  : bool
Read charts in workbook? If this is true, then the Reader will include any charts that exist in the workbook.
getInputEncoding()  : string
Get input encoding.
getLoadSheetsOnly()  : mixed
Get which sheets to load Returns either an array of worksheet names (the list of worksheets that should be loaded), or a null indicating that all worksheets in the workbook should be loaded.
getReadDataOnly()  : bool
Read data only? If this is true, then the Reader will only read data values for cells, it will not read any formatting or structural information (like merges).
getReadEmptyCells()  : bool
Read empty cells? If this is true (the default), then the Reader will read data values for all cells, irrespective of value.
getReadFilter()  : IReadFilter
Read filter.
getSecurityScanner()  : XmlScanner|null
getSecurityScannerOrThrow()  : XmlScanner
getSheetIndex()  : int
Get sheet index.
getStyleColor()  : null|string
Check if has #, so we can get clean hex.
load()  : Spreadsheet
Loads Spreadsheet from file.
loadFromString()  : Spreadsheet
Spreadsheet from content.
loadIntoExisting()  : Spreadsheet
Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
loadSpreadsheetFromFile()  : Spreadsheet
Loads Spreadsheet from file.
setIncludeCharts()  : IReader
Set read charts in workbook Set to true, to advise the Reader to include any charts that exist in the workbook.
setInputEncoding()  : $this
Set input encoding.
setLoadAllSheets()  : IReader
Set all sheets to load Tells the Reader to load all worksheets from the workbook.
setLoadSheetsOnly()  : IReader
Set which sheets to load.
setReadDataOnly()  : IReader
Set read data only Set to true, to advise the Reader only to read data values for cells, and to ignore any formatting or structural information (like merges).
setReadEmptyCells()  : IReader
Set read empty cells Set to true (the default) to advise the Reader read data values for all cells, irrespective of value.
setReadFilter()  : IReader
Set read filter.
setSheetIndex()  : $this
Set sheet index.
flushCell()  : void
Flush cell.
getTableStartColumn()  : string
openFile()  : void
Open file for reading.
processDomElement()  : void
processFlags()  : void
releaseTableStartColumn()  : string
setTableStartColumn()  : string
applyInlineStyle()  : void
Apply inline css inline style.
containsTags()  : bool
endsWithTag()  : bool
insertImage()  : void
loadDocument()  : Spreadsheet
Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
processDomElementA()  : void
processDomElementAlign()  : void
processDomElementBgcolor()  : void
processDomElementBody()  : void
processDomElementBr()  : void
processDomElementDataFormat()  : void
processDomElementH1Etc()  : void
processDomElementHeight()  : void
processDomElementHr()  : void
processDomElementImg()  : void
processDomElementLi()  : void
processDomElementSpanEtc()  : void
processDomElementTable()  : void
processDomElementThTd()  : void
processDomElementThTdOther()  : void
processDomElementTitle()  : void
processDomElementTr()  : void
processDomElementVAlign()  : void
processDomElementWidth()  : void
readBeginning()  : string
readEnding()  : string
replaceNonAscii()  : string
setBorderStyle()  : void
startsWithTag()  : bool

Constants

TEST_SAMPLE_SIZE

Sample size to read to determine if it's HTML or not.

public mixed TEST_SAMPLE_SIZE = 2048

BORDER_MAPPINGS

private mixed BORDER_MAPPINGS = ['dash-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DASHDOT, 'dash-dot-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DASHDOTDOT, 'dashed' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DASHED, 'dotted' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DOTTED, 'double' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_DOUBLE, 'hair' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_HAIR, 'medium' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUM, 'medium-dashed' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUMDASHED, 'medium-dash-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUMDASHDOT, 'medium-dash-dot-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_MEDIUMDASHDOTDOT, 'none' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_NONE, 'slant-dash-dot' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_SLANTDASHDOT, 'solid' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_THIN, 'thick' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_THICK]

H1_ETC

private mixed H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p']

SPAN_ETC

private mixed SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b']

Properties

$dataArray

protected array<string|int, mixed> $dataArray = []

$formats

Formats.

protected array<string|int, mixed> $formats = [ 'h1' => ['font' => ['bold' => true, 'size' => 24]], // Bold, 24pt 'h2' => ['font' => ['bold' => true, 'size' => 18]], // Bold, 18pt 'h3' => ['font' => ['bold' => true, 'size' => 13.5]], // Bold, 13.5pt 'h4' => ['font' => ['bold' => true, 'size' => 12]], // Bold, 12pt 'h5' => ['font' => ['bold' => true, 'size' => 10]], // Bold, 10pt 'h6' => ['font' => ['bold' => true, 'size' => 7.5]], // Bold, 7.5pt 'a' => ['font' => ['underline' => true, 'color' => ['argb' => PhpOfficePhpSpreadsheetStyleColor::COLOR_BLUE]]], // Blue underlined 'hr' => ['borders' => ['bottom' => ['borderStyle' => PhpOfficePhpSpreadsheetStyleBorder::BORDER_THIN, 'color' => [PhpOfficePhpSpreadsheetStyleColor::COLOR_BLACK]]]], // Bottom border 'strong' => ['font' => ['bold' => true]], // Bold 'b' => ['font' => ['bold' => true]], // Bold 'i' => ['font' => ['italic' => true]], // Italic 'em' => ['font' => ['italic' => true]], ]

$includeCharts

Read charts that are defined in the workbook? Identifies whether the Reader should read the definitions for any charts that exist in the workbook;.

protected bool $includeCharts = false

$inputEncoding

Input encoding.

protected string $inputEncoding = 'ANSI'

$loadSheetsOnly

Restrict which sheets should be loaded? This property holds an array of worksheet names to be loaded. If null, then all worksheets will be loaded.

protected null|array<string|int, string> $loadSheetsOnly

$nestedColumn

protected array<string|int, mixed> $nestedColumn = ['A']

$readDataOnly

Read data only? Identifies whether the Reader should only read data values for cells, and ignore any formatting information; or whether it should read both data and formatting.

protected bool $readDataOnly = false

$readEmptyCells

Read empty cells? Identifies whether the Reader should read data values for cells all cells, or should ignore cells containing null value or empty string.

protected bool $readEmptyCells = true

$rowspan

protected array<string|int, mixed> $rowspan = []

$sheetIndex

Sheet index to read.

protected int $sheetIndex = 0

$tableLevel

protected int $tableLevel = 0

Methods

__construct()

Create a new HTML Reader instance.

public __construct() : mixed
Return values
mixed

canRead()

Validate that the current file is an HTML file.

public canRead(string $filename) : bool
Parameters
$filename : string
Return values
bool

getBorderMappings()

public static getBorderMappings() : array<string|int, mixed>
Return values
array<string|int, mixed>

getBorderStyle()

Map html border style to PhpSpreadsheet border style.

public getBorderStyle(string $style) : null|string
Parameters
$style : string
Return values
null|string

getIncludeCharts()

Read charts in workbook? If this is true, then the Reader will include any charts that exist in the workbook.

public getIncludeCharts() : bool

Note that a ReadDataOnly value of false overrides, and charts won't be read regardless of the IncludeCharts value. If false (the default) it will ignore any charts defined in the workbook file.

Return values
bool

getInputEncoding()

Get input encoding.

public getInputEncoding() : string
Tags
codeCoverageIgnore
deprecated

no use is made of this property

Return values
string

getLoadSheetsOnly()

Get which sheets to load Returns either an array of worksheet names (the list of worksheets that should be loaded), or a null indicating that all worksheets in the workbook should be loaded.

public getLoadSheetsOnly() : mixed
Return values
mixed

getReadDataOnly()

Read data only? If this is true, then the Reader will only read data values for cells, it will not read any formatting or structural information (like merges).

public getReadDataOnly() : bool

If false (the default) it will read data and formatting.

Return values
bool

getReadEmptyCells()

Read empty cells? If this is true (the default), then the Reader will read data values for all cells, irrespective of value.

public getReadEmptyCells() : bool

If false it will not read data for cells containing a null value or an empty string.

Return values
bool

getSheetIndex()

Get sheet index.

public getSheetIndex() : int
Return values
int

getStyleColor()

Check if has #, so we can get clean hex.

public getStyleColor(mixed $value) : null|string
Parameters
$value : mixed
Return values
null|string

load()

Loads Spreadsheet from file.

public load(string $filename, int $flags) : Spreadsheet
Parameters
$filename : string

The name of the file to load

$flags : int

the optional second parameter flags may be used to identify specific elements that should be loaded, but which won't be loaded by default, using these values: IReader::LOAD_WITH_CHARTS - Include any charts that are defined in the loaded file

Return values
Spreadsheet

loadIntoExisting()

Loads PhpSpreadsheet from file into PhpSpreadsheet instance.

public loadIntoExisting(string $filename, Spreadsheet $spreadsheet) : Spreadsheet
Parameters
$filename : string
$spreadsheet : Spreadsheet
Return values
Spreadsheet

loadSpreadsheetFromFile()

Loads Spreadsheet from file.

public loadSpreadsheetFromFile(string $filename) : Spreadsheet
Parameters
$filename : string
Return values
Spreadsheet

setIncludeCharts()

Set read charts in workbook Set to true, to advise the Reader to include any charts that exist in the workbook.

public setIncludeCharts(mixed $includeCharts) : IReader

Note that a ReadDataOnly value of false overrides, and charts won't be read regardless of the IncludeCharts value. Set to false (the default) to discard charts.

Parameters
$includeCharts : mixed
Return values
IReader

setInputEncoding()

Set input encoding.

public setInputEncoding(string $inputEncoding) : $this
Parameters
$inputEncoding : string

Input encoding, eg: 'ANSI'

Tags
codeCoverageIgnore
deprecated

no use is made of this property

Return values
$this

setLoadAllSheets()

Set all sheets to load Tells the Reader to load all worksheets from the workbook.

public setLoadAllSheets() : IReader
Return values
IReader

setLoadSheetsOnly()

Set which sheets to load.

public setLoadSheetsOnly(mixed $sheetList) : IReader
Parameters
$sheetList : mixed
Return values
IReader

setReadDataOnly()

Set read data only Set to true, to advise the Reader only to read data values for cells, and to ignore any formatting or structural information (like merges).

public setReadDataOnly(mixed $readCellValuesOnly) : IReader

Set to false (the default) to advise the Reader to read both data and formatting for cells.

Parameters
$readCellValuesOnly : mixed
Return values
IReader

setReadEmptyCells()

Set read empty cells Set to true (the default) to advise the Reader read data values for all cells, irrespective of value.

public setReadEmptyCells(mixed $readEmptyCells) : IReader

Set to false to advise the Reader to ignore cells containing a null value or an empty string.

Parameters
$readEmptyCells : mixed
Return values
IReader

setSheetIndex()

Set sheet index.

public setSheetIndex(int $sheetIndex) : $this
Parameters
$sheetIndex : int

Sheet index

Return values
$this

flushCell()

Flush cell.

protected flushCell(Worksheet $sheet, string $column, int|string $row, mixed &$cellContent) : void
Parameters
$sheet : Worksheet
$column : string
$row : int|string
$cellContent : mixed
Return values
void

getTableStartColumn()

protected getTableStartColumn() : string
Return values
string

openFile()

Open file for reading.

protected openFile(string $filename) : void
Parameters
$filename : string
Return values
void

processDomElement()

protected processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent) : void
Parameters
$element : DOMNode
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
Return values
void

processFlags()

protected processFlags(int $flags) : void
Parameters
$flags : int
Return values
void

releaseTableStartColumn()

protected releaseTableStartColumn() : string
Return values
string

setTableStartColumn()

protected setTableStartColumn(string $column) : string
Parameters
$column : string
Return values
string

applyInlineStyle()

Apply inline css inline style.

private applyInlineStyle(Worksheet &$sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void

NOTES : Currently only intended for td & th element, and only takes 'background-color' and 'color'; property with HEX color

TODO :

  • Implement to other propertie, such as border
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>
Return values
void

containsTags()

private static containsTags(string $data) : bool
Parameters
$data : string
Return values
bool

endsWithTag()

private static endsWithTag(string $data) : bool
Parameters
$data : string
Return values
bool

insertImage()

private insertImage(Worksheet $sheet, string $column, int $row, array<string|int, mixed> $attributes) : void
Parameters
$sheet : Worksheet
$column : string
$row : int
$attributes : array<string|int, mixed>
Return values
void

loadDocument()

Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.

private loadDocument(DOMDocument $document, Spreadsheet $spreadsheet) : Spreadsheet
Parameters
$document : DOMDocument
$spreadsheet : Spreadsheet
Return values
Spreadsheet

processDomElementA()

private processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementAlign()

private processDomElementAlign(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementBgcolor()

private processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementBody()

private processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
Return values
void

processDomElementBr()

private processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementDataFormat()

private processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementH1Etc()

private processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementHeight()

private processDomElementHeight(Worksheet $sheet, int $row, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementHr()

private processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementImg()

private processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementLi()

private processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementSpanEtc()

private processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementTable()

private processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementThTd()

private processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementThTdOther()

private processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementTitle()

private processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementTr()

private processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementVAlign()

private processDomElementVAlign(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>
Return values
void

processDomElementWidth()

private processDomElementWidth(Worksheet $sheet, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$column : string
$attributeArray : array<string|int, mixed>
Return values
void

readBeginning()

private readBeginning() : string
Return values
string

readEnding()

private readEnding() : string
Return values
string

replaceNonAscii()

private static replaceNonAscii(array<string|int, mixed> $matches) : string
Parameters
$matches : array<string|int, mixed>
Return values
string

setBorderStyle()

private setBorderStyle(Style $cellStyle, string $styleValue, string $type) : void
Parameters
$cellStyle : Style
$styleValue : string
$type : string
Return values
void

startsWithTag()

private static startsWithTag(string $data) : bool
Parameters
$data : string
Return values
bool

Search results