Documentation

Html extends BaseReader
in package

Table of Contents

Constants

TEST_SAMPLE_SIZE  = 2048
Sample size to read to determine if it's HTML or not.
BORDER_MAPPINGS  = ['dash-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DASHDOT, 'dash-dot-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DASHDOTDOT, 'dashed' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DASHED, 'dotted' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DOTTED, 'double' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DOUBLE, 'hair' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_HAIR, 'medium' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUM, 'medium-dashed' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUMDASHED, 'medium-dash-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUMDASHDOT, 'medium-dash-dot-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUMDASHDOTDOT, 'none' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_NONE, 'slant-dash-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_SLANTDASHDOT, 'solid' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_THIN, 'thick' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_THICK]
H1_ETC  = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p']
SPAN_ETC  = ['span', 'div', 'font', 'i', 'em', 'strong', 'b']

Properties

$dataArray  : array<string|int, mixed>
$fileHandle  : resource
$formats  : array<string|int, mixed>
Formats.
$includeCharts  : bool
Read charts that are defined in the workbook? Identifies whether the Reader should read the definitions for any charts that exist in the workbook;.
$inputEncoding  : string
Input encoding.
$loadSheetsOnly  : null|array<string|int, string>
Restrict which sheets should be loaded? This property holds an array of worksheet names to be loaded. If null, then all worksheets will be loaded.
$nestedColumn  : array<string|int, mixed>
$readDataOnly  : bool
Read data only? Identifies whether the Reader should only read data values for cells, and ignore any formatting information; or whether it should read both data and formatting.
$readEmptyCells  : bool
Read empty cells? Identifies whether the Reader should read data values for cells all cells, or should ignore cells containing null value or empty string.
$readFilter  : IReadFilter
IReadFilter instance.
$rowspan  : array<string|int, mixed>
$securityScanner  : XmlScanner|null
$sheetIndex  : int
Sheet index to read.
$tableLevel  : int
$currentColumn  : string

Methods

__construct()  : mixed
Create a new HTML Reader instance.
canRead()  : bool
Validate that the current file is an HTML file.
getBorderMappings()  : array<string|int, mixed>
getBorderStyle()  : string|null
Map html border style to PhpSpreadsheet border style.
getIncludeCharts()  : bool
Read charts in workbook? If this is true, then the Reader will include any charts that exist in the workbook.
getLoadSheetsOnly()  : array<string|int, mixed>|null
Get which sheets to load Returns either an array of worksheet names (the list of worksheets that should be loaded), or a null indicating that all worksheets in the workbook should be loaded.
getReadDataOnly()  : bool
Read data only? If this is true, then the Reader will only read data values for cells, it will not read any formatting or structural information (like merges).
getReadEmptyCells()  : bool
Read empty cells? If this is true (the default), then the Reader will read data values for all cells, irrespective of value.
getReadFilter()  : IReadFilter
Read filter.
getSecurityScanner()  : XmlScanner|null
getSecurityScannerOrThrow()  : XmlScanner
getSheetIndex()  : int
Get sheet index.
getStyleColor()  : string
Check if has #, so we can get clean hex.
listWorksheetInfo()  : array<string|int, mixed>
Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
listWorksheetNames()  : array<string|int, mixed>
Returns names of the worksheets from a file, possibly without parsing the whole file to a Spreadsheet object.
load()  : Spreadsheet
Loads Spreadsheet from file.
loadFromString()  : Spreadsheet
Spreadsheet from content.
loadIntoExisting()  : Spreadsheet
Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
loadSpreadsheetFromFile()  : Spreadsheet
Loads Spreadsheet from file.
setIncludeCharts()  : $this
Set read charts in workbook Set to true, to advise the Reader to include any charts that exist in the workbook.
setLoadAllSheets()  : $this
Set all sheets to load Tells the Reader to load all worksheets from the workbook.
setLoadSheetsOnly()  : $this
Set which sheets to load.
setReadDataOnly()  : $this
Set read data only Set to true, to advise the Reader only to read data values for cells, and to ignore any formatting or structural information (like merges).
setReadEmptyCells()  : $this
Set read empty cells Set to true (the default) to advise the Reader read data values for all cells, irrespective of value.
setReadFilter()  : $this
Set read filter.
setSheetIndex()  : $this
Set sheet index.
flushCell()  : void
Flush cell.
getTableStartColumn()  : string
openFile()  : void
Open file for reading.
processDomElement()  : void
processFlags()  : void
releaseTableStartColumn()  : string
setTableStartColumn()  : string
applyInlineStyle()  : void
Apply inline css inline style.
containsTags()  : bool
endsWithTag()  : bool
insertImage()  : void
loadDocument()  : Spreadsheet
Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
loadProperties()  : void
processDomElementA()  : void
processDomElementAlign()  : void
processDomElementBgcolor()  : void
processDomElementBody()  : void
processDomElementBr()  : void
processDomElementDataFormat()  : void
processDomElementH1Etc()  : void
processDomElementHeight()  : void
processDomElementHr()  : void
processDomElementImg()  : void
processDomElementLi()  : void
processDomElementSpanEtc()  : void
processDomElementTable()  : void
processDomElementThTd()  : void
processDomElementThTdOther()  : void
processDomElementTitle()  : void
processDomElementTr()  : void
processDomElementVAlign()  : void
processDomElementWidth()  : void
readBeginning()  : string
readEnding()  : string
replaceNonAscii()  : string
setBorderStyle()  : void
startsWithTag()  : bool

Constants

TEST_SAMPLE_SIZE

Sample size to read to determine if it's HTML or not.

public mixed TEST_SAMPLE_SIZE = 2048

BORDER_MAPPINGS

private mixed BORDER_MAPPINGS = ['dash-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DASHDOT, 'dash-dot-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DASHDOTDOT, 'dashed' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DASHED, 'dotted' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DOTTED, 'double' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_DOUBLE, 'hair' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_HAIR, 'medium' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUM, 'medium-dashed' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUMDASHED, 'medium-dash-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUMDASHDOT, 'medium-dash-dot-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_MEDIUMDASHDOTDOT, 'none' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_NONE, 'slant-dash-dot' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_SLANTDASHDOT, 'solid' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_THIN, 'thick' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_THICK]

H1_ETC

private mixed H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p']

SPAN_ETC

private mixed SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b']

Properties

$dataArray

protected array<string|int, mixed> $dataArray = []

$formats

Formats.

protected array<string|int, mixed> $formats = [ 'h1' => ['font' => ['bold' => true, 'size' => 24]], // Bold, 24pt 'h2' => ['font' => ['bold' => true, 'size' => 18]], // Bold, 18pt 'h3' => ['font' => ['bold' => true, 'size' => 13.5]], // Bold, 13.5pt 'h4' => ['font' => ['bold' => true, 'size' => 12]], // Bold, 12pt 'h5' => ['font' => ['bold' => true, 'size' => 10]], // Bold, 10pt 'h6' => ['font' => ['bold' => true, 'size' => 7.5]], // Bold, 7.5pt 'a' => ['font' => ['underline' => true, 'color' => ['argb' => \PhpOffice\PhpSpreadsheet\Style\Color::COLOR_BLUE]]], // Blue underlined 'hr' => ['borders' => ['bottom' => ['borderStyle' => \PhpOffice\PhpSpreadsheet\Style\Border::BORDER_THIN, 'color' => [\PhpOffice\PhpSpreadsheet\Style\Color::COLOR_BLACK]]]], // Bottom border 'strong' => ['font' => ['bold' => true]], // Bold 'b' => ['font' => ['bold' => true]], // Bold 'i' => ['font' => ['italic' => true]], // Italic 'em' => ['font' => ['italic' => true]], ]

$includeCharts

Read charts that are defined in the workbook? Identifies whether the Reader should read the definitions for any charts that exist in the workbook;.

protected bool $includeCharts = false

$inputEncoding

Input encoding.

protected string $inputEncoding = 'ANSI'

$loadSheetsOnly

Restrict which sheets should be loaded? This property holds an array of worksheet names to be loaded. If null, then all worksheets will be loaded.

protected null|array<string|int, string> $loadSheetsOnly = null

This property is ignored for Csv, Html, and Slk.

$nestedColumn

protected array<string|int, mixed> $nestedColumn = ['A']

$readDataOnly

Read data only? Identifies whether the Reader should only read data values for cells, and ignore any formatting information; or whether it should read both data and formatting.

protected bool $readDataOnly = false

$readEmptyCells

Read empty cells? Identifies whether the Reader should read data values for cells all cells, or should ignore cells containing null value or empty string.

protected bool $readEmptyCells = true

$rowspan

protected array<string|int, mixed> $rowspan = []

$sheetIndex

Sheet index to read.

protected int $sheetIndex = 0

$tableLevel

protected int $tableLevel = 0

$currentColumn

private string $currentColumn = 'A'

Methods

__construct()

Create a new HTML Reader instance.

public __construct() : mixed

canRead()

Validate that the current file is an HTML file.

public canRead(string $filename) : bool
Parameters
$filename : string
Return values
bool

getBorderMappings()

public static getBorderMappings() : array<string|int, mixed>
Return values
array<string|int, mixed>

getBorderStyle()

Map html border style to PhpSpreadsheet border style.

public getBorderStyle(string $style) : string|null
Parameters
$style : string
Return values
string|null

getIncludeCharts()

Read charts in workbook? If this is true, then the Reader will include any charts that exist in the workbook.

public getIncludeCharts() : bool

Note that a ReadDataOnly value of false overrides, and charts won't be read regardless of the IncludeCharts value. If false (the default) it will ignore any charts defined in the workbook file.

Return values
bool

getLoadSheetsOnly()

Get which sheets to load Returns either an array of worksheet names (the list of worksheets that should be loaded), or a null indicating that all worksheets in the workbook should be loaded.

public getLoadSheetsOnly() : array<string|int, mixed>|null
Return values
array<string|int, mixed>|null

getReadDataOnly()

Read data only? If this is true, then the Reader will only read data values for cells, it will not read any formatting or structural information (like merges).

public getReadDataOnly() : bool

If false (the default) it will read data and formatting.

Return values
bool

getReadEmptyCells()

Read empty cells? If this is true (the default), then the Reader will read data values for all cells, irrespective of value.

public getReadEmptyCells() : bool

If false it will not read data for cells containing a null value or an empty string.

Return values
bool

getSheetIndex()

Get sheet index.

public getSheetIndex() : int
Return values
int

getStyleColor()

Check if has #, so we can get clean hex.

public getStyleColor(string|null $value) : string
Parameters
$value : string|null
Return values
string

listWorksheetInfo()

Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).

public listWorksheetInfo(string $filename) : array<string|int, mixed>
Parameters
$filename : string
Return values
array<string|int, mixed>

listWorksheetNames()

Returns names of the worksheets from a file, possibly without parsing the whole file to a Spreadsheet object.

public listWorksheetNames(string $filename) : array<string|int, mixed>

Readers will often have a more efficient method with which they can override this method.

Parameters
$filename : string
Return values
array<string|int, mixed>

load()

Loads Spreadsheet from file.

public load(string $filename[, int $flags = 0 ]) : Spreadsheet
Parameters
$filename : string

The name of the file to load

$flags : int = 0

the optional second parameter flags may be used to identify specific elements that should be loaded, but which won't be loaded by default, using these values: IReader::LOAD_WITH_CHARTS - Include any charts that are defined in the loaded file

Return values
Spreadsheet

loadIntoExisting()

Loads PhpSpreadsheet from file into PhpSpreadsheet instance.

public loadIntoExisting(string $filename, Spreadsheet $spreadsheet) : Spreadsheet
Parameters
$filename : string
$spreadsheet : Spreadsheet
Return values
Spreadsheet

loadSpreadsheetFromFile()

Loads Spreadsheet from file.

public loadSpreadsheetFromFile(string $filename) : Spreadsheet
Parameters
$filename : string
Return values
Spreadsheet

setIncludeCharts()

Set read charts in workbook Set to true, to advise the Reader to include any charts that exist in the workbook.

public setIncludeCharts(bool $includeCharts) : $this

Note that a ReadDataOnly value of false overrides, and charts won't be read regardless of the IncludeCharts value. Set to false (the default) to discard charts.

Parameters
$includeCharts : bool
Return values
$this

setLoadAllSheets()

Set all sheets to load Tells the Reader to load all worksheets from the workbook.

public setLoadAllSheets() : $this
Return values
$this

setLoadSheetsOnly()

Set which sheets to load.

public setLoadSheetsOnly(string|array<string|int, mixed>|null $sheetList) : $this
Parameters
$sheetList : string|array<string|int, mixed>|null
Return values
$this

setReadDataOnly()

Set read data only Set to true, to advise the Reader only to read data values for cells, and to ignore any formatting or structural information (like merges).

public setReadDataOnly(bool $readCellValuesOnly) : $this

Set to false (the default) to advise the Reader to read both data and formatting for cells.

Parameters
$readCellValuesOnly : bool
Return values
$this

setReadEmptyCells()

Set read empty cells Set to true (the default) to advise the Reader read data values for all cells, irrespective of value.

public setReadEmptyCells(bool $readEmptyCells) : $this

Set to false to advise the Reader to ignore cells containing a null value or an empty string.

Parameters
$readEmptyCells : bool
Return values
$this

setSheetIndex()

Set sheet index.

public setSheetIndex(int $sheetIndex) : $this
Parameters
$sheetIndex : int

Sheet index

Return values
$this

flushCell()

Flush cell.

protected flushCell(Worksheet $sheet, string $column, int|string $row, mixed &$cellContent, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$column : string
$row : int|string
$cellContent : mixed
$attributeArray : array<string|int, mixed>

getTableStartColumn()

protected getTableStartColumn() : string
Return values
string

openFile()

Open file for reading.

protected openFile(string $filename) : void
Parameters
$filename : string

processDomElement()

protected processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent) : void
Parameters
$element : DOMNode
$sheet : Worksheet
$row : int
$column : string
$cellContent : string

processFlags()

protected processFlags(int $flags) : void
Parameters
$flags : int

releaseTableStartColumn()

protected releaseTableStartColumn() : string
Return values
string

setTableStartColumn()

protected setTableStartColumn(string $column) : string
Parameters
$column : string
Return values
string

applyInlineStyle()

Apply inline css inline style.

private applyInlineStyle(Worksheet &$sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void

NOTES : Currently only intended for td & th element, and only takes 'background-color' and 'color'; property with HEX color

TODO :

  • Implement to other propertie, such as border
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>

containsTags()

private static containsTags(string $data) : bool
Parameters
$data : string
Return values
bool

endsWithTag()

private static endsWithTag(string $data) : bool
Parameters
$data : string
Return values
bool

insertImage()

private insertImage(Worksheet $sheet, string $column, int $row, array<string|int, mixed> $attributes) : void
Parameters
$sheet : Worksheet
$column : string
$row : int
$attributes : array<string|int, mixed>

loadDocument()

Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.

private loadDocument(DOMDocument $document, Spreadsheet $spreadsheet) : Spreadsheet
Parameters
$document : DOMDocument
$spreadsheet : Spreadsheet
Return values
Spreadsheet

loadProperties()

private static loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet) : void
Parameters
$dom : DOMDocument
$spreadsheet : Spreadsheet

processDomElementA()

private processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementAlign()

private processDomElementAlign(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>

processDomElementBgcolor()

private processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>

processDomElementBody()

private processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement

processDomElementBr()

private processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementDataFormat()

private processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>

processDomElementH1Etc()

private processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementHeight()

private processDomElementHeight(Worksheet $sheet, int $row, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$attributeArray : array<string|int, mixed>

processDomElementHr()

private processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementImg()

private processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementLi()

private processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementSpanEtc()

private processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementTable()

private processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementThTd()

private processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementThTdOther()

private processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementTitle()

private processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementTr()

private processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array<string|int, mixed> &$attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$cellContent : string
$child : DOMElement
$attributeArray : array<string|int, mixed>

processDomElementVAlign()

private processDomElementVAlign(Worksheet $sheet, int $row, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$row : int
$column : string
$attributeArray : array<string|int, mixed>

processDomElementWidth()

private processDomElementWidth(Worksheet $sheet, string $column, array<string|int, mixed> $attributeArray) : void
Parameters
$sheet : Worksheet
$column : string
$attributeArray : array<string|int, mixed>

readBeginning()

private readBeginning() : string
Return values
string

readEnding()

private readEnding() : string
Return values
string

replaceNonAscii()

private static replaceNonAscii(array<string|int, mixed> $matches) : string
Parameters
$matches : array<string|int, mixed>
Return values
string

setBorderStyle()

private setBorderStyle(Style $cellStyle, string $styleValue, string $type) : void
Parameters
$cellStyle : Style
$styleValue : string
$type : string

startsWithTag()

private static startsWithTag(string $data) : bool
Parameters
$data : string
Return values
bool

        
On this page

Search results