How to read a spreadsheet and delimited files using PHPExcel

11 August 2015| Post by Graham2 minutes

AuthorGraham

I was recently tasked with reading from a spreadsheet using PHPExcel. My own goal was to create a standard class that automatically the correct PHPExcel worksheet class (from a factory) depending on what file type was passed in. I also needed to be able to read the data from the spreadsheet.

A list of supported file types supported by the class I created are:

  • CSV
  • XLS
  • XLSX
  • ODS

The Class

				
 ‘CSV’,
‘xls’ => ‘Excel5’,
‘xlsx’ => ‘Excel2007’,
‘ods’ => ‘OOCalc’
);

/**
* Path of the file
*
* @var string
*/
protected $file;

/**
* Column names for the current spreadsheet
*
* @var array
*/
protected $columns = array();

/**
* Additional config for Spreadsheet
*
* @var array
*/
protected $extraConfig = array();

/**
* Number of header rows for the current spreadsheet
*
* @var int
*/
protected $headerRows = 1;

/**
* Instance of PHPExcel reader object
*
* @var object
*/
protected $reader;

/**
* PHPExcel instance from reader object
*
* @var object
*/
protected $phpExcel;

/**
* PHPExcel worksheet instance
*
* @var object
*/
protected $worksheet;

/**
* Constructor
*
* @param string $file Full path to the spreadsheet file
* @param array $columns Labels for column headers
* @param int $headerRows Number of header rows
* @param array $extraConfig additional config to consider when setting up PHPExcel
* @throws Exception If file cannot be found or recognised
*/
public function __construct($file, $columns = array(), $headerRows = 1, array $extraConfig = array()) {
if (!is_readable($file)) {
throw new Exception(‘Unable to read file’);
}

$this->setFile($file);

if (!empty($columns) && is_array($columns)) {
$this->setColumns($columns);
}

$this->setHeaderRows($headerRows);

$reader = $this->getReader(array_key_exists(‘type’, $extraConfig) ? $extraConfig[‘type’] : strtolower(pathinfo($file, PATHINFO_EXTENSION)));

if ((!$reader) || !$reader instanceof PHPExcel_Reader_Abstract) {
throw new Exception(‘PHPExcel Reader object cannot be generated from spreadsheet file’);
}

if (array_key_exists(‘delimiter’, $extraConfig)) {
$reader->setDelimiter($extraConfig[‘delimiter’]);
}

if ((!$loadedFile = $reader->load($file)) || !$worksheet = $loadedFile->getSheet()) {
throw new Exception(‘The file was not a valid spreadsheet’);
}

$this->extraConfig = $extraConfig;

$this->setPhpExcel($loadedFile);
$this->setWorksheet($worksheet);
}

/**
* Set file location
*
* @param string $file
* @return Spreadsheet
*/
public function setFile($file) {
$this->file = $file;
return $this;
}

/**
* Get file location
* @return string
*/
public function getFile() {
return $this->file;
}

/**
* Column map of spreadsheet
*
* @param array $columns
* @return Spreadsheet
*/
public function setColumns($columns) {
$this->columns = $columns;
return $this;
}

/**
* Get column map of spreadsheet
*
* @return array
*/
public function getColumns() {
return $this->columns;
}

/**
* Set header rows
*
* @param int $headerRows amount of header rows before raw data starts
* @return Spreadsheet
*/
public function setHeaderRows($headerRows) {
$this->headerRows = (int) $headerRows;
}

/**
* Get header rows
*
* @return int
*/
public function getHeaderRows() {
return $this->headerRows;
}

/**
* Set reader encoding
*
* @param string $encoding
* return Spreadsheet
*/
public function setEncoding($encoding) {
if (!$reader = $this->getReader()) {
$reader->setEncoding($encoding);
}
return $this;
}

/**
* Return spreadsheet reader from PHPExcel Factory
*
* @param string $extension File extension of spreadsheet
* @return object
*/
public function getReader($extension = null) {
if ($extension && !$this->reader) {
if ($reader = array_key_exists($extension, $this->fileTypes) ? PHPExcel_IOFactory::createReader($this->fileTypes[$extension]) : false) {
if ($reader instanceof PHPExcel_Reader_CSV) {
$reader->setInputEncoding(self::DEFAULT_CSV_ENCODING);
}
$this->reader = $reader;
}
}
return $this->reader;
}

/**
* Set PHPExcel object from Reader
*
* @param object $phpExcel
* @return Spreadsheet
*/
public function setPhpExcel($phpExcel) {
$this->phpExcel = $phpExcel;
return $this;
}

/**
* Get PHPExcel object
*
* @return object
*/
public function getPhpExcel() {
return $this->phpExcel;
}

/**
* Loaded worksheet from PHPExcel object
*
* @param object $worksheet
* @return Spreadsheet
*/
public function setWorksheet($worksheet) {
$this->worksheet = $worksheet;
return $this;
}

/**
* Get Worksheet from PHPExcel object
*
* @return object
*/
public function getWorksheet() {
return $this->worksheet;
}

/**
* Get data from spreadsheet in array format, excluding empty rows
*
* @return array
*/
public function getData() {
$data = array();
$worksheet = $this->getWorksheet();
$columns = $this->getColumns();
$rowCount = 0;
foreach ($worksheet->getRowIterator() as $rowIterator) {
if ($rowCount++ < $this->getHeaderRows()) {
continue;
}
$row = array();
$cellIterator = $rowIterator->getCellIterator();
$cellIterator->setIterateOnlyExistingCells(false);
$cellCount = 0;
foreach ($cellIterator as $cell) {
$row[array_key_exists($cellCount, $columns) ? $columns[$cellCount] : $cellCount] = preg_replace(array(‘/^s+/’, ‘/s+$/’), ”, (array_key_exists(‘rawValues’, $this->extraConfig) && $this->extraConfig[‘rawValues’]) ? $cell->getValue() : $cell->getFormattedValue());
$cellCount++;
}
if (strlen(trim(implode($row)))) {
$data[] = array_map(function($record) {
return trim($record);
}, $row);
}
}
return $data;
}

}

Usage

Spreadsheet file, with 2 header rows

				
$source = new Spreadsheet($file, array(
‘name’,
‘last_name’,
), 2);

$data = $source->getData();

Pipe delimited file, with 1 header row

				
$source = new Spreadsheet($file, array(
‘name’,
‘last_name’,
), 1, array(‘type’ => ‘csv’, ‘delimiter’ => ‘|’);

$data = $source->getData();

Parameters

The first parameter ($file) is the path to the file you would like to read.

The second parameter ($columns) is an array of columns from the spreadsheet, any columns that are in the spreadsheet that you do not specify here will instead have a numeric index.

The third parameter ($headerRows) is the number of rows present in the spreadsheet before any data is met, the default is 1 row.

The fourth parameter ($extraConfig) is an array of additional configuration. Valid keys for this include :

  • type – This is the file type, this will override automatic checking of the file type, for instance you may want a .txt file to be treated as a .csv
  • delimiter – This is to set the delimiter of CSV files
  • rawValues – This should be either true or false (false by default), if this is set to true then PHPExcel will return raw values and not calculated cells (formulas etc).
Scroll

We’re hiring an Account Manager. Want in? Apply Here