fpdi_pdf_parser.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. <?php
  2. //
  3. // FPDI - Version 1.2
  4. //
  5. // Copyright 2004-2007 Setasign - Jan Slabon
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. require_once("pdf_parser.php");
  20. class fpdi_pdf_parser extends pdf_parser {
  21. /**
  22. * Pages
  23. * Index beginns at 0
  24. *
  25. * @var array
  26. */
  27. var $pages;
  28. /**
  29. * Page count
  30. * @var integer
  31. */
  32. var $page_count;
  33. /**
  34. * actual page number
  35. * @var integer
  36. */
  37. var $pageno;
  38. /**
  39. * PDF Version of imported Document
  40. * @var string
  41. */
  42. var $pdfVersion;
  43. /**
  44. * FPDI Reference
  45. * @var object
  46. */
  47. var $fpdi;
  48. /**
  49. * Available BoxTypes
  50. *
  51. * @var array
  52. */
  53. var $availableBoxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox");
  54. /**
  55. * Constructor
  56. *
  57. * @param string $filename Source-Filename
  58. * @param object $fpdi Object of type fpdi
  59. */
  60. function fpdi_pdf_parser($filename,&$fpdi) {
  61. $this->fpdi =& $fpdi;
  62. $this->filename = $filename;
  63. parent::pdf_parser($filename);
  64. // resolve Pages-Dictonary
  65. $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
  66. // Read pages
  67. $this->read_pages($this->c, $pages, $this->pages);
  68. // count pages;
  69. $this->page_count = count($this->pages);
  70. }
  71. /**
  72. * Overwrite parent::error()
  73. *
  74. * @param string $msg Error-Message
  75. */
  76. function error($msg) {
  77. $this->fpdi->error($msg);
  78. }
  79. /**
  80. * Get pagecount from sourcefile
  81. *
  82. * @return int
  83. */
  84. function getPageCount() {
  85. return $this->page_count;
  86. }
  87. /**
  88. * Set pageno
  89. *
  90. * @param int $pageno Pagenumber to use
  91. */
  92. function setPageno($pageno) {
  93. $pageno = ((int) $pageno) - 1;
  94. if ($pageno < 0 || $pageno >= $this->getPageCount()) {
  95. $this->fpdi->error("Pagenumber is wrong!");
  96. }
  97. $this->pageno = $pageno;
  98. }
  99. /**
  100. * Get page-resources from current page
  101. *
  102. * @return array
  103. */
  104. function getPageResources() {
  105. return $this->_getPageResources($this->pages[$this->pageno]);
  106. }
  107. /**
  108. * Get page-resources from /Page
  109. *
  110. * @param array $obj Array of pdf-data
  111. */
  112. function _getPageResources ($obj) { // $obj = /Page
  113. $obj = $this->pdf_resolve_object($this->c, $obj);
  114. // If the current object has a resources
  115. // dictionary associated with it, we use
  116. // it. Otherwise, we move back to its
  117. // parent object.
  118. if (isset ($obj[1][1]['/Resources'])) {
  119. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
  120. if ($res[0] == PDF_TYPE_OBJECT)
  121. return $res[1];
  122. return $res;
  123. } else {
  124. if (!isset ($obj[1][1]['/Parent'])) {
  125. return false;
  126. } else {
  127. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  128. if ($res[0] == PDF_TYPE_OBJECT)
  129. return $res[1];
  130. return $res;
  131. }
  132. }
  133. }
  134. /**
  135. * Get content of current page
  136. *
  137. * If more /Contents is an array, the streams are concated
  138. *
  139. * @return string
  140. */
  141. function getContent() {
  142. $buffer = "";
  143. if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
  144. $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
  145. foreach($contents AS $tmp_content) {
  146. $buffer .= $this->_rebuildContentStream($tmp_content).' ';
  147. }
  148. }
  149. return $buffer;
  150. }
  151. /**
  152. * Resolve all content-objects
  153. *
  154. * @param array $content_ref
  155. * @return array
  156. */
  157. function _getPageContent($content_ref) {
  158. $contents = array();
  159. if ($content_ref[0] == PDF_TYPE_OBJREF) {
  160. $content = $this->pdf_resolve_object($this->c, $content_ref);
  161. if ($content[1][0] == PDF_TYPE_ARRAY) {
  162. $contents = $this->_getPageContent($content[1]);
  163. } else {
  164. $contents[] = $content;
  165. }
  166. } else if ($content_ref[0] == PDF_TYPE_ARRAY) {
  167. foreach ($content_ref[1] AS $tmp_content_ref) {
  168. $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
  169. }
  170. }
  171. return $contents;
  172. }
  173. /**
  174. * Rebuild content-streams
  175. *
  176. * @param array $obj
  177. * @return string
  178. */
  179. function _rebuildContentStream($obj) {
  180. $filters = array();
  181. if (isset($obj[1][1]['/Filter'])) {
  182. $_filter = $obj[1][1]['/Filter'];
  183. if ($_filter[0] == PDF_TYPE_TOKEN) {
  184. $filters[] = $_filter;
  185. } else if ($_filter[0] == PDF_TYPE_ARRAY) {
  186. $filters = $_filter[1];
  187. }
  188. }
  189. $stream = $obj[2][1];
  190. foreach ($filters AS $_filter) {
  191. switch ($_filter[1]) {
  192. case "/FlateDecode":
  193. if (function_exists('gzuncompress')) {
  194. $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
  195. } else {
  196. $this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1]));
  197. }
  198. if ($stream === false) {
  199. $this->fpdi->error("Error while decompressing stream.");
  200. }
  201. break;
  202. case null:
  203. $stream = $stream;
  204. break;
  205. default:
  206. if (preg_match("/^\/[a-z85]*$/i", $_filter[1], $filterName) && @include_once('decoders'.$_filter[1].'.php')) {
  207. $filterName = substr($_filter[1],1);
  208. if (class_exists($filterName)) {
  209. $decoder =& new $filterName($this->fpdi);
  210. $stream = $decoder->decode(trim($stream));
  211. } else {
  212. $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
  213. }
  214. } else {
  215. $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
  216. }
  217. }
  218. }
  219. return $stream;
  220. }
  221. /**
  222. * Get a Box from a page
  223. * Arrayformat is same as used by fpdf_tpl
  224. *
  225. * @param array $page a /Page
  226. * @param string $box_index Type of Box @see $availableBoxes
  227. * @return array
  228. */
  229. function getPageBox($page, $box_index) {
  230. $page = $this->pdf_resolve_object($this->c,$page);
  231. $box = null;
  232. if (isset($page[1][1][$box_index]))
  233. $box =& $page[1][1][$box_index];
  234. if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
  235. $tmp_box = $this->pdf_resolve_object($this->c,$box);
  236. $box = $tmp_box[1];
  237. }
  238. if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
  239. $b =& $box[1];
  240. return array("x" => $b[0][1]/$this->fpdi->k,
  241. "y" => $b[1][1]/$this->fpdi->k,
  242. "w" => abs($b[0][1]-$b[2][1])/$this->fpdi->k,
  243. "h" => abs($b[1][1]-$b[3][1])/$this->fpdi->k);
  244. } else if (!isset ($page[1][1]['/Parent'])) {
  245. return false;
  246. } else {
  247. return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index);
  248. }
  249. }
  250. function getPageBoxes($pageno) {
  251. return $this->_getPageBoxes($this->pages[$pageno-1]);
  252. }
  253. /**
  254. * Get all Boxes from /Page
  255. *
  256. * @param array a /Page
  257. * @return array
  258. */
  259. function _getPageBoxes($page) {
  260. $boxes = array();
  261. foreach($this->availableBoxes AS $box) {
  262. if ($_box = $this->getPageBox($page,$box)) {
  263. $boxes[$box] = $_box;
  264. }
  265. }
  266. return $boxes;
  267. }
  268. function getPageRotation($pageno) {
  269. return $this->_getPageRotation($this->pages[$pageno-1]);
  270. }
  271. function _getPageRotation ($obj) { // $obj = /Page
  272. $obj = $this->pdf_resolve_object($this->c, $obj);
  273. if (isset ($obj[1][1]['/Rotate'])) {
  274. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']);
  275. if ($res[0] == PDF_TYPE_OBJECT)
  276. return $res[1];
  277. return $res;
  278. } else {
  279. if (!isset ($obj[1][1]['/Parent'])) {
  280. return false;
  281. } else {
  282. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  283. if ($res[0] == PDF_TYPE_OBJECT)
  284. return $res[1];
  285. return $res;
  286. }
  287. }
  288. }
  289. /**
  290. * Read all /Page(es)
  291. *
  292. * @param object pdf_context
  293. * @param array /Pages
  294. * @param array the result-array
  295. */
  296. function read_pages (&$c, &$pages, &$result) {
  297. // Get the kids dictionary
  298. $kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
  299. if (!is_array($kids))
  300. $this->fpdi->Error("Cannot find /Kids in current /Page-Dictionary");
  301. foreach ($kids[1] as $v) {
  302. $pg = $this->pdf_resolve_object ($c, $v);
  303. if ($pg[1][1]['/Type'][1] === '/Pages') {
  304. // If one of the kids is an embedded
  305. // /Pages array, resolve it as well.
  306. $this->read_pages ($c, $pg, $result);
  307. } else {
  308. $result[] = $pg;
  309. }
  310. }
  311. }
  312. /**
  313. * Get PDF-Version
  314. *
  315. * And reset the PDF Version used in FPDI if needed
  316. */
  317. function getPDFVersion() {
  318. parent::getPDFVersion();
  319. if (isset($this->fpdi->importVersion) && $this->pdfVersion > $this->fpdi->importVersion) {
  320. $this->fpdi->importVersion = $this->pdfVersion;
  321. }
  322. }
  323. }
  324. ?>