How to read a spreadsheet and delimited files using PHPExcel

11 August 2015| Post by Graham

AuthorGraham

I was recently tasked with reading from a spreadsheet using PHPExcel. My own goal was to create a standard class that automatically the correct PHPExcel worksheet class (from a factory) depending on what file type was passed in. I also needed to be able to read the data from the spreadsheet.

A list of supported file types supported by the class I created are:

  • CSV
  • XLS
  • XLSX
  • ODS

The Class

				
 ‘CSV’,
‘xls’ => ‘Excel5’,
‘xlsx’ => ‘Excel2007’,
‘ods’ => ‘OOCalc’
);

/**
* Path of the file
*
* @var string
*/
protected $file;

/**
* Column names for the current spreadsheet
*
* @var array
*/
protected $columns = array();

/**
* Additional config for Spreadsheet
*
* @var array
*/
protected $extraConfig = array();

/**
* Number of header rows for the current spreadsheet
*
* @var int
*/
protected $headerRows = 1;

/**
* Instance of PHPExcel reader object
*
* @var object
*/
protected $reader;

/**
* PHPExcel instance from reader object
*
* @var object
*/
protected $phpExcel;

/**
* PHPExcel worksheet instance
*
* @var object
*/
protected $worksheet;

/**
* Constructor
*
* @param string $file Full path to the spreadsheet file
* @param array $columns Labels for column headers
* @param int $headerRows Number of header rows
* @param array $extraConfig additional config to consider when setting up PHPExcel
* @throws Exception If file cannot be found or recognised
*/
public function __construct($file, $columns = array(), $headerRows = 1, array $extraConfig = array()) {
if (!is_readable($file)) {
throw new Exception(‘Unable to read file’);
}

$this->setFile($file);

if (!empty($columns) && is_array($columns)) {
$this->setColumns($columns);
}

$this->setHeaderRows($headerRows);

$reader = $this->getReader(array_key_exists(‘type’, $extraConfig) ? $extraConfig[‘type’] : strtolower(pathinfo($file, PATHINFO_EXTENSION)));

if ((!$reader) || !$reader instanceof PHPExcel_Reader_Abstract) {
throw new Exception(‘PHPExcel Reader object cannot be generated from spreadsheet file’);
}

if (array_key_exists(‘delimiter’, $extraConfig)) {
$reader->setDelimiter($extraConfig[‘delimiter’]);
}

if ((!$loadedFile = $reader->load($file)) || !$worksheet = $loadedFile->getSheet()) {
throw new Exception(‘The file was not a valid spreadsheet’);
}

$this->extraConfig = $extraConfig;

$this->setPhpExcel($loadedFile);
$this->setWorksheet($worksheet);
}

/**
* Set file location
*
* @param string $file
* @return Spreadsheet
*/
public function setFile($file) {
$this->file = $file;
return $this;
}

/**
* Get file location
* @return string
*/
public function getFile() {
return $this->file;
}

/**
* Column map of spreadsheet
*
* @param array $columns
* @return Spreadsheet
*/
public function setColumns($columns) {
$this->columns = $columns;
return $this;
}

/**
* Get column map of spreadsheet
*
* @return array
*/
public function getColumns() {
return $this->columns;
}

/**
* Set header rows
*
* @param int $headerRows amount of header rows before raw data starts
* @return Spreadsheet
*/
public function setHeaderRows($headerRows) {
$this->headerRows = (int) $headerRows;
}

/**
* Get header rows
*
* @return int
*/
public function getHeaderRows() {
return $this->headerRows;
}

/**
* Set reader encoding
*
* @param string $encoding
* return Spreadsheet
*/
public function setEncoding($encoding) {
if (!$reader = $this->getReader()) {
$reader->setEncoding($encoding);
}
return $this;
}

/**
* Return spreadsheet reader from PHPExcel Factory
*
* @param string $extension File extension of spreadsheet
* @return object
*/
public function getReader($extension = null) {
if ($extension && !$this->reader) {
if ($reader = array_key_exists($extension, $this->fileTypes) ? PHPExcel_IOFactory::createReader($this->fileTypes[$extension]) : false) {
if ($reader instanceof PHPExcel_Reader_CSV) {
$reader->setInputEncoding(self::DEFAULT_CSV_ENCODING);
}
$this->reader = $reader;
}
}
return $this->reader;
}

/**
* Set PHPExcel object from Reader
*
* @param object $phpExcel
* @return Spreadsheet
*/
public function setPhpExcel($phpExcel) {
$this->phpExcel = $phpExcel;
return $this;
}

/**
* Get PHPExcel object
*
* @return object
*/
public function getPhpExcel() {
return $this->phpExcel;
}

/**
* Loaded worksheet from PHPExcel object
*
* @param object $worksheet
* @return Spreadsheet
*/
public function setWorksheet($worksheet) {
$this->worksheet = $worksheet;
return $this;
}

/**
* Get Worksheet from PHPExcel object
*
* @return object
*/
public function getWorksheet() {
return $this->worksheet;
}

/**
* Get data from spreadsheet in array format, excluding empty rows
*
* @return array
*/
public function getData() {
$data = array();
$worksheet = $this->getWorksheet();
$columns = $this->getColumns();
$rowCount = 0;
foreach ($worksheet->getRowIterator() as $rowIterator) {
if ($rowCount++ < $this->getHeaderRows()) {
continue;
}
$row = array();
$cellIterator = $rowIterator->getCellIterator();
$cellIterator->setIterateOnlyExistingCells(false);
$cellCount = 0;
foreach ($cellIterator as $cell) {
$row[array_key_exists($cellCount, $columns) ? $columns[$cellCount] : $cellCount] = preg_replace(array(‘/^s+/’, ‘/s+$/’), ”, (array_key_exists(‘rawValues’, $this->extraConfig) && $this->extraConfig[‘rawValues’]) ? $cell->getValue() : $cell->getFormattedValue());
$cellCount++;
}
if (strlen(trim(implode($row)))) {
$data[] = array_map(function($record) {
return trim($record);
}, $row);
}
}
return $data;
}

}

Usage

Spreadsheet file, with 2 header rows

				
$source = new Spreadsheet($file, array(
‘name’,
‘last_name’,
), 2);

$data = $source->getData();

Pipe delimited file, with 1 header row

				
$source = new Spreadsheet($file, array(
‘name’,
‘last_name’,
), 1, array(‘type’ => ‘csv’, ‘delimiter’ => ‘|’);

$data = $source->getData();

Parameters

The first parameter ($file) is the path to the file you would like to read.

The second parameter ($columns) is an array of columns from the spreadsheet, any columns that are in the spreadsheet that you do not specify here will instead have a numeric index.

The third parameter ($headerRows) is the number of rows present in the spreadsheet before any data is met, the default is 1 row.

The fourth parameter ($extraConfig) is an array of additional configuration. Valid keys for this include :

  • type – This is the file type, this will override automatic checking of the file type, for instance you may want a .txt file to be treated as a .csv
  • delimiter – This is to set the delimiter of CSV files
  • rawValues – This should be either true or false (false by default), if this is set to true then PHPExcel will return raw values and not calculated cells (formulas etc).
Scroll