first create XlsxReader.php
class XlsxReader
{
protected $sheets = array();
protected $sharedstrings = array();
protected $sheetInfo;
protected $zip;
public $config = array(
'removeTrailingRows' => true
);
// XML schemas
const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
const SCHEMA_OFFICEDOCUMENT_RELATIONSHIP = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships';
const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
public function __construct($filePath, $config = array())
{
$this->config = array_merge($this->config, $config);
$this->zip = new ZipArchive();
$status = $this->zip->open($filePath);
if ($status === true)
{
$this->parse();
} else
{
throw new Exception("Failed to open $filePath with zip error code: $status");
}
}
// get a file from the zip
protected function getEntryData($name)
{
$data = $this->zip->getFromName($name);
if ($data === false)
{
throw new Exception("File $name does not exist in the Excel file");
} else
{
return $data;
}
}
// extract the shared string and the list of sheets
protected function parse()
{
$sheets = array();
$relationshipsXML = simplexml_load_string($this->getEntryData("_rels/.rels"));
foreach ($relationshipsXML->Relationship as $rel)
{
if ($rel['Type'] == self::SCHEMA_OFFICEDOCUMENT)
{
$workbookDir = dirname($rel['Target']) . '/';
$workbookXML = simplexml_load_string($this->getEntryData($rel['Target']));
foreach ($workbookXML->sheets->sheet as $sheet)
{
$r = $sheet->attributes('r', true);
$sheets[(string) $r->id] = array(
'sheetId' => (int) $sheet['sheetId'],
'name' => (string) $sheet['name']
);
}
$workbookRelationsXML = simplexml_load_string($this->getEntryData($workbookDir . '_rels/' . basename($rel['Target']) . '.rels'));
foreach ($workbookRelationsXML->Relationship as $wrel)
{
switch ($wrel['Type'])
{
case self::SCHEMA_WORKSHEETRELATION:
$sheets[(string) $wrel['Id']]['path'] = $workbookDir . (string) $wrel['Target'];
break;
case self::SCHEMA_SHAREDSTRINGS:
$sharedStringsXML = simplexml_load_string($this->getEntryData($workbookDir . (string) $wrel['Target']));
foreach ($sharedStringsXML->si as $val)
{
if (isset($val->t))
{
$this->sharedStrings[] = (string) $val->t;
} elseif (isset($val->r))
{
$this->sharedStrings[] = XLSXWorksheet::parseRichText($val);
}
}
break;
}
}
}
}
$this->sheetInfo = array();
foreach ($sheets as $rid => $info)
{
$this->sheetInfo[$info['name']] = array(
'sheetId' => $info['sheetId'],
'rid' => $rid,
'path' => $info['path']
);
}
}
// returns an array of sheet names, indexed by sheetId
public function getSheetNames()
{
$res = array();
foreach ($this->sheetInfo as $sheetName => $info)
{
$res[$info['sheetId']] = $sheetName;
}
return $res;
}
public function getSheetCount()
{
return count($this->sheetInfo);
}
// instantiates a sheet object (if needed) and returns an array of its data
public function getSheetData($sheetNameOrId)
{
$sheet = $this->getSheet($sheetNameOrId);
return $sheet->getData();
}
// instantiates a sheet object (if needed) and returns the sheet object
public function getSheet($sheet)
{
if (is_numeric($sheet))
{
$sheet = $this->getSheetNameById($sheet);
} elseif (!is_string($sheet))
{
throw new Exception("Sheet must be a string or a sheet Id");
}
if (!array_key_exists($sheet, $this->sheets))
{
$this->sheets[$sheet] = new XLSXWorksheet($this->getSheetXML($sheet), $sheet, $this);
}
return $this->sheets[$sheet];
}
public function getSheetNameById($sheetId)
{
foreach ($this->sheetInfo as $sheetName => $sheetInfo)
{
if ($sheetInfo['sheetId'] === $sheetId)
{
return $sheetName;
}
}
throw new Exception("Sheet ID $sheetId does not exist in the Excel file");
}
protected function getSheetXML($name)
{
return simplexml_load_string($this->getEntryData($this->sheetInfo[$name]['path']));
}
// converts an Excel date field (a number) to a unix timestamp (granularity: seconds)
public static function toUnixTimeStamp($excelDateTime)
{
if (!is_numeric($excelDateTime))
{
return $excelDateTime;
}
$d = floor($excelDateTime); // seconds since 1900
$t = $excelDateTime - $d;
return ($d > 0) ? ( $d - 25569 ) * 86400 + $t * 86400 : $t * 86400;
}
}
class XLSXWorksheet
{
protected $workbook;
public $sheetName;
protected $data;
public $colCount;
public $rowCount;
protected $config;
public function __construct($xml, $sheetName, XLSXReader $workbook)
{
$this->config = $workbook->config;
$this->sheetName = $sheetName;
$this->workbook = $workbook;
$this->parse($xml);
}
// returns an array of the data from the sheet
public function getData()
{
return $this->data;
}
protected function parse($xml)
{
$this->parseDimensions($xml->dimension);
$this->parseData($xml->sheetData);
}
protected function parseDimensions($dimensions)
{
$range = (string) $dimensions['ref'];
$cells = explode(':', $range);
$maxValues = $this->getColumnIndex($cells[1]);
$this->colCount = $maxValues[0] + 1;
$this->rowCount = $maxValues[1] + 1;
}
protected function parseData($sheetData)
{
$rows = array();
$curR = 0;
$lastDataRow = -1;
foreach ($sheetData->row as $row)
{
$rowNum = (int) $row['r'];
if ($rowNum != ($curR + 1))
{
$missingRows = $rowNum - ($curR + 1);
for ($i = 0; $i < $missingRows; $i++)
{
$rows[$curR] = array_pad(array(), $this->colCount, null);
$curR++;
}
}
$curC = 0;
$rowData = array();
foreach ($row->c as $c)
{
list($cellIndex, ) = $this->getColumnIndex((string) $c['r']);
if ($cellIndex !== $curC)
{
$missingCols = $cellIndex - $curC;
for ($i = 0; $i < $missingCols; $i++)
{
$rowData[$curC] = null;
$curC++;
}
}
$val = $this->parseCellValue($c);
if (!is_null($val))
{
$lastDataRow = $curR;
}
$rowData[$curC] = $val;
$curC++;
}
$rows[$curR] = array_pad($rowData, $this->colCount, null);
$curR++;
}
if ($this->config['removeTrailingRows'])
{
$this->data = array_slice($rows, 0, $lastDataRow + 1);
$this->rowCount = count($this->data);
} else
{
$this->data = $rows;
}
}
protected function getColumnIndex($cell = 'A1')
{
if (preg_match("/([A-Z]+)(\d+)/", $cell, $matches))
{
$col = $matches[1];
$row = $matches[2];
$colLen = strlen($col);
$index = 0;
for ($i = $colLen - 1; $i >= 0; $i--)
{
$index += (ord($col{$i}) - 64) * pow(26, $colLen - $i - 1);
}
return array($index - 1, $row - 1);
}
throw new Exception("Invalid cell index");
}