<?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Pdf * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id$ */ /** Internally used classes */ require_once 'Zend/Pdf/Element.php'; require_once 'Zend/Pdf/Element/Numeric.php'; /** Zend_Pdf_StringParser */ require_once 'Zend/Pdf/StringParser.php'; /** * PDF file parser * * @package Zend_Pdf * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ class Zend_Pdf_Parser { /** * String parser * * @var Zend_Pdf_StringParser */ private $_stringParser; /** * Last PDF file trailer * * @var Zend_Pdf_Trailer_Keeper */ private $_trailer; /** * PDF version specified in the file header * * @var string */ private $_pdfVersion; /** * Get length of source PDF * * @return integer */ public function getPDFLength() { return strlen($this->_stringParser->data); } /** * Get PDF String * * @return string */ public function getPDFString() { return $this->_stringParser->data; } /** * PDF version specified in the file header * * @return string */ public function getPDFVersion() { return $this->_pdfVersion; } /** * Load XReference table and referenced objects * * @param integer $offset * @throws Zend_Pdf_Exception * @return Zend_Pdf_Trailer_Keeper */ private function _loadXRefTable($offset) { $this->_stringParser->offset = $offset; require_once 'Zend/Pdf/Element/Reference/Table.php'; $refTable = new Zend_Pdf_Element_Reference_Table(); require_once 'Zend/Pdf/Element/Reference/Context.php'; $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable); $this->_stringParser->setContext($context); $nextLexeme = $this->_stringParser->readLexeme(); if ($nextLexeme == 'xref') { /** * Common cross-reference table */ $this->_stringParser->skipWhiteSpace(); while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) { if (!ctype_digit($nextLexeme)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme))); } $objNum = (int)$nextLexeme; $refCount = $this->_stringParser->readLexeme(); if (!ctype_digit($refCount)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount))); } $this->_stringParser->skipWhiteSpace(); while ($refCount > 0) { $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10); if (!ctype_digit($objectOffset)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset)); } // Force $objectOffset to be treated as decimal instead of octal number for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) { if ($objectOffset[$numStart] != '0') { break; } } $objectOffset = substr($objectOffset, $numStart); $this->_stringParser->offset += 10; if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); } $this->_stringParser->offset++; $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5); if (!ctype_digit($objectOffset)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset)); } // Force $objectOffset to be treated as decimal instead of octal number for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) { if ($genNumber[$numStart] != '0') { break; } } $genNumber = substr($genNumber, $numStart); $this->_stringParser->offset += 5; if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); } $this->_stringParser->offset++; $inUseKey = $this->_stringParser->data[$this->_stringParser->offset]; $this->_stringParser->offset++; switch ($inUseKey) { case 'f': // free entry unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] ); $refTable->addReference($objNum . ' ' . $genNumber . ' R', $objectOffset, false); break; case 'n': // in-use entry $refTable->addReference($objNum . ' ' . $genNumber . ' R', $objectOffset, true); } if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); } $this->_stringParser->offset++; if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); } $this->_stringParser->offset++; $refCount--; $objNum++; } } $trailerDictOffset = $this->_stringParser->offset; $trailerDict = $this->_stringParser->readElement(); if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset)); } } else { $xrefStream = $this->_stringParser->getObject($offset, $context); if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset)); } $trailerDict = $xrefStream->dictionary; if ($trailerDict->Type->value != 'XRef') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset)); } if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset)); } $entryField1Size = $trailerDict->W->items[0]->value; $entryField2Size = $trailerDict->W->items[1]->value; $entryField3Size = $trailerDict->W->items[2]->value; if ($entryField2Size == 0 || $entryField3Size == 0) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset)); } $xrefStreamData = $xrefStream->value; if ($trailerDict->Index !== null) { if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset)); } $sections = count($trailerDict->Index->items)/2; } else { $sections = 1; } $streamOffset = 0; $size = $entryField1Size + $entryField2Size + $entryField3Size; $entries = strlen($xrefStreamData)/$size; for ($count = 0; $count < $sections; $count++) { if ($trailerDict->Index !== null) { $objNum = $trailerDict->Index->items[$count*2 ]->value; $entries = $trailerDict->Index->items[$count*2 + 1]->value; } else { $objNum = 0; $entries = $trailerDict->Size->value; } for ($count2 = 0; $count2 < $entries; $count2++) { if ($entryField1Size == 0) { $type = 1; } else if ($entryField1Size == 1) { // Optimyze one-byte field case $type = ord($xrefStreamData[$streamOffset++]); } else { $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size); $streamOffset += $entryField1Size; } if ($entryField2Size == 1) { // Optimyze one-byte field case $field2 = ord($xrefStreamData[$streamOffset++]); } else { $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size); $streamOffset += $entryField2Size; } if ($entryField3Size == 1) { // Optimyze one-byte field case $field3 = ord($xrefStreamData[$streamOffset++]); } else { $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size); $streamOffset += $entryField3Size; } switch ($type) { case 0: // Free object $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false); // Debug output: // echo "Free object - $objNum $field3 R, next free - $field2\n"; break; case 1: // In use object $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true); // Debug output: // echo "In-use object - $objNum $field3 R, offset - $field2\n"; break; case 2: // Object in an object stream // Debug output: // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n"; break; } $objNum++; } } // $streamOffset . ' ' . strlen($xrefStreamData) . "\n"; // "$entries\n"; require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.'); } require_once 'Zend/Pdf/Trailer/Keeper.php'; $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context); if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric || $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) { $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value)); $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable()); } /** * We set '/Prev' dictionary property to the current cross-reference section offset. * It doesn't correspond to the actual data, but is true when trailer will be used * as a trailer for next generated PDF section. */ $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset); return $trailerObj; } /** * Get Trailer object * * @return Zend_Pdf_Trailer_Keeper */ public function getTrailer() { return $this->_trailer; } /** * Object constructor * * Note: PHP duplicates string, which is sent by value, only of it's updated. * Thus we don't need to care about overhead * * @param mixed $source * @param Zend_Pdf_ElementFactory_Interface $factory * @param boolean $load * @throws Zend_Exception */ public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load) { if ($load) { if (($pdfFile = @fopen($source, 'rb')) === false ) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." ); } $data = ''; $byteCount = filesize($source); while ($byteCount > 0 && !feof($pdfFile)) { $nextBlock = fread($pdfFile, $byteCount); if ($nextBlock === false) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." ); } $data .= $nextBlock; $byteCount -= strlen($nextBlock); } if ($byteCount != 0) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." ); } fclose($pdfFile); $this->_stringParser = new Zend_Pdf_StringParser($data, $factory); } else { $this->_stringParser = new Zend_Pdf_StringParser($source, $factory); } $pdfVersionComment = $this->_stringParser->readComment(); if (substr($pdfVersionComment, 0, 5) != '%PDF-') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception('File is not a PDF.'); } $pdfVersion = substr($pdfVersionComment, 5); if (version_compare($pdfVersion, '0.9', '<') || version_compare($pdfVersion, '1.61', '>=') ) { /** * @todo * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7) * Stream compression filter must be implemented (for compressed object streams). * Cross reference streams must be implemented */ require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion)); } $this->_pdfVersion = $pdfVersion; $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF'); if ($this->_stringParser->offset === false || strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.'); } $this->_stringParser->offset--; /** * Go to end of cross-reference table offset */ while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&& ($this->_stringParser->offset > 0)) { $this->_stringParser->offset--; } /** * Go to the start of cross-reference table offset */ while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&& ($this->_stringParser->offset > 0)) { $this->_stringParser->offset--; } /** * Go to the end of 'startxref' keyword */ while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&& ($this->_stringParser->offset > 0)) { $this->_stringParser->offset--; } /** * Go to the white space (eol marker) before 'startxref' keyword */ $this->_stringParser->offset -= 9; $nextLexeme = $this->_stringParser->readLexeme(); if ($nextLexeme != 'startxref') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme))); } $startXref = $this->_stringParser->readLexeme(); if (!ctype_digit($startXref)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme))); } $this->_trailer = $this->_loadXRefTable($startXref); $factory->setObjectCount($this->_trailer->Size->value); } /** * Object destructor */ public function __destruct() { $this->_stringParser->cleanUp(); } }