TreeBuilder.php 191 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989
  1. <?php
  2. /*
  3. Copyright 2007 Jeroen van der Meer <http://jero.net/>
  4. Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
  5. Permission is hereby granted, free of charge, to any person obtaining a
  6. copy of this software and associated documentation files (the
  7. "Software"), to deal in the Software without restriction, including
  8. without limitation the rights to use, copy, modify, merge, publish,
  9. distribute, sublicense, and/or sell copies of the Software, and to
  10. permit persons to whom the Software is furnished to do so, subject to
  11. the following conditions:
  12. The above copyright notice and this permission notice shall be included
  13. in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15. OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18. CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. // Tags for FIX ME!!!: (in order of priority)
  23. // XXX - should be fixed NAO!
  24. // XERROR - with regards to parse errors
  25. // XSCRIPT - with regards to scripting mode
  26. // XENCODING - with regards to encoding (for reparsing tests)
  27. // XDOM - DOM specific code (tagName is explicitly not marked).
  28. // this is not (yet) in helper functions.
  29. class HTML5_TreeBuilder {
  30. public $stack = [];
  31. public $content_model;
  32. private $mode;
  33. private $original_mode;
  34. private $secondary_mode;
  35. private $dom;
  36. // Whether or not normal insertion of nodes should actually foster
  37. // parent (used in one case in spec)
  38. private $foster_parent = false;
  39. private $a_formatting = [];
  40. private $head_pointer = null;
  41. private $form_pointer = null;
  42. private $flag_frameset_ok = true;
  43. private $flag_force_quirks = false;
  44. private $ignored = false;
  45. private $quirks_mode = null;
  46. // this gets to 2 when we want to ignore the next lf character, and
  47. // is decrement at the beginning of each processed token (this way,
  48. // code can check for (bool)$ignore_lf_token, but it phases out
  49. // appropriately)
  50. private $ignore_lf_token = 0;
  51. private $fragment = false;
  52. private $root;
  53. private $scoping = ['applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject'];
  54. private $formatting = ['a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u'];
  55. // dl and ds are speculative
  56. private $special = ['address','area','article','aside','base','basefont','bgsound',
  57. 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
  58. 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
  59. 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
  60. 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
  61. 'p','param','plaintext','pre','script','select','spacer','style',
  62. 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'];
  63. private $pendingTableCharacters;
  64. private $pendingTableCharactersDirty;
  65. // Tree construction modes
  66. const INITIAL = 0;
  67. const BEFORE_HTML = 1;
  68. const BEFORE_HEAD = 2;
  69. const IN_HEAD = 3;
  70. const IN_HEAD_NOSCRIPT = 4;
  71. const AFTER_HEAD = 5;
  72. const IN_BODY = 6;
  73. const IN_CDATA_RCDATA = 7;
  74. const IN_TABLE = 8;
  75. const IN_TABLE_TEXT = 9;
  76. const IN_CAPTION = 10;
  77. const IN_COLUMN_GROUP = 11;
  78. const IN_TABLE_BODY = 12;
  79. const IN_ROW = 13;
  80. const IN_CELL = 14;
  81. const IN_SELECT = 15;
  82. const IN_SELECT_IN_TABLE= 16;
  83. const IN_FOREIGN_CONTENT= 17;
  84. const AFTER_BODY = 18;
  85. const IN_FRAMESET = 19;
  86. const AFTER_FRAMESET = 20;
  87. const AFTER_AFTER_BODY = 21;
  88. const AFTER_AFTER_FRAMESET = 22;
  89. /**
  90. * Converts a magic number to a readable name. Use for debugging.
  91. */
  92. private function strConst($number) {
  93. static $lookup;
  94. if (!$lookup) {
  95. $lookup = [];
  96. $r = new ReflectionClass('HTML5_TreeBuilder');
  97. $consts = $r->getConstants();
  98. foreach ($consts as $const => $num) {
  99. if (!is_int($num)) {
  100. continue;
  101. }
  102. $lookup[$num] = $const;
  103. }
  104. }
  105. return $lookup[$number];
  106. }
  107. // The different types of elements.
  108. const SPECIAL = 100;
  109. const SCOPING = 101;
  110. const FORMATTING = 102;
  111. const PHRASING = 103;
  112. // Quirks modes in $quirks_mode
  113. const NO_QUIRKS = 200;
  114. const QUIRKS_MODE = 201;
  115. const LIMITED_QUIRKS_MODE = 202;
  116. // Marker to be placed in $a_formatting
  117. const MARKER = 300;
  118. // Namespaces for foreign content
  119. const NS_HTML = null; // to prevent DOM from requiring NS on everything
  120. const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
  121. const NS_SVG = 'http://www.w3.org/2000/svg';
  122. const NS_XLINK = 'http://www.w3.org/1999/xlink';
  123. const NS_XML = 'http://www.w3.org/XML/1998/namespace';
  124. const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
  125. // Different types of scopes to test for elements
  126. const SCOPE = 0;
  127. const SCOPE_LISTITEM = 1;
  128. const SCOPE_TABLE = 2;
  129. /**
  130. * HTML5_TreeBuilder constructor.
  131. */
  132. public function __construct() {
  133. $this->mode = self::INITIAL;
  134. $this->dom = new DOMDocument;
  135. $this->dom->encoding = 'UTF-8';
  136. $this->dom->preserveWhiteSpace = true;
  137. $this->dom->substituteEntities = true;
  138. $this->dom->strictErrorChecking = false;
  139. }
  140. public function getQuirksMode(){
  141. return $this->quirks_mode;
  142. }
  143. /**
  144. * Process tag tokens
  145. *
  146. * @param $token
  147. * @param null $mode
  148. */
  149. public function emitToken($token, $mode = null) {
  150. // XXX: ignore parse errors... why are we emitting them, again?
  151. if ($token['type'] === HTML5_Tokenizer::PARSEERROR) {
  152. return;
  153. }
  154. if ($mode === null) {
  155. $mode = $this->mode;
  156. }
  157. /*
  158. $backtrace = debug_backtrace();
  159. if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
  160. echo $this->strConst($mode);
  161. if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
  162. echo "\n ";
  163. token_dump($token);
  164. $this->printStack();
  165. $this->printActiveFormattingElements();
  166. if ($this->foster_parent) echo " -> this is a foster parent mode\n";
  167. if ($this->flag_frameset_ok) echo " -> frameset ok\n";
  168. */
  169. if ($this->ignore_lf_token) {
  170. $this->ignore_lf_token--;
  171. }
  172. $this->ignored = false;
  173. switch ($mode) {
  174. case self::INITIAL:
  175. /* A character token that is one of U+0009 CHARACTER TABULATION,
  176. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
  177. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  178. /* Ignore the token. */
  179. $this->ignored = true;
  180. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  181. if (
  182. $token['name'] !== 'html' || !empty($token['public']) ||
  183. !empty($token['system']) || $token !== 'about:legacy-compat'
  184. ) {
  185. /* If the DOCTYPE token's name is not a case-sensitive match
  186. * for the string "html", or if the token's public identifier
  187. * is not missing, or if the token's system identifier is
  188. * neither missing nor a case-sensitive match for the string
  189. * "about:legacy-compat", then there is a parse error (this
  190. * is the DOCTYPE parse error). */
  191. // DOCTYPE parse error
  192. }
  193. /* Append a DocumentType node to the Document node, with the name
  194. * attribute set to the name given in the DOCTYPE token, or the
  195. * empty string if the name was missing; the publicId attribute
  196. * set to the public identifier given in the DOCTYPE token, or
  197. * the empty string if the public identifier was missing; the
  198. * systemId attribute set to the system identifier given in the
  199. * DOCTYPE token, or the empty string if the system identifier
  200. * was missing; and the other attributes specific to
  201. * DocumentType objects set to null and empty lists as
  202. * appropriate. Associate the DocumentType node with the
  203. * Document object so that it is returned as the value of the
  204. * doctype attribute of the Document object. */
  205. if (!isset($token['public'])) {
  206. $token['public'] = null;
  207. }
  208. if (!isset($token['system'])) {
  209. $token['system'] = null;
  210. }
  211. // XDOM
  212. // Yes this is hacky. I'm kind of annoyed that I can't appendChild
  213. // a doctype to DOMDocument. Maybe I haven't chanted the right
  214. // syllables.
  215. $impl = new DOMImplementation();
  216. // This call can fail for particularly pathological cases (namely,
  217. // the qualifiedName parameter ($token['name']) could be missing.
  218. if ($token['name']) {
  219. $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
  220. $this->dom->appendChild($doctype);
  221. } else {
  222. // It looks like libxml's not actually *able* to express this case.
  223. // So... don't.
  224. $this->dom->emptyDoctype = true;
  225. }
  226. $public = is_null($token['public']) ? false : strtolower($token['public']);
  227. $system = is_null($token['system']) ? false : strtolower($token['system']);
  228. $publicStartsWithForQuirks = [
  229. "+//silmaril//dtd html pro v0r11 19970101//",
  230. "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
  231. "-//as//dtd html 3.0 aswedit + extensions//",
  232. "-//ietf//dtd html 2.0 level 1//",
  233. "-//ietf//dtd html 2.0 level 2//",
  234. "-//ietf//dtd html 2.0 strict level 1//",
  235. "-//ietf//dtd html 2.0 strict level 2//",
  236. "-//ietf//dtd html 2.0 strict//",
  237. "-//ietf//dtd html 2.0//",
  238. "-//ietf//dtd html 2.1e//",
  239. "-//ietf//dtd html 3.0//",
  240. "-//ietf//dtd html 3.2 final//",
  241. "-//ietf//dtd html 3.2//",
  242. "-//ietf//dtd html 3//",
  243. "-//ietf//dtd html level 0//",
  244. "-//ietf//dtd html level 1//",
  245. "-//ietf//dtd html level 2//",
  246. "-//ietf//dtd html level 3//",
  247. "-//ietf//dtd html strict level 0//",
  248. "-//ietf//dtd html strict level 1//",
  249. "-//ietf//dtd html strict level 2//",
  250. "-//ietf//dtd html strict level 3//",
  251. "-//ietf//dtd html strict//",
  252. "-//ietf//dtd html//",
  253. "-//metrius//dtd metrius presentational//",
  254. "-//microsoft//dtd internet explorer 2.0 html strict//",
  255. "-//microsoft//dtd internet explorer 2.0 html//",
  256. "-//microsoft//dtd internet explorer 2.0 tables//",
  257. "-//microsoft//dtd internet explorer 3.0 html strict//",
  258. "-//microsoft//dtd internet explorer 3.0 html//",
  259. "-//microsoft//dtd internet explorer 3.0 tables//",
  260. "-//netscape comm. corp.//dtd html//",
  261. "-//netscape comm. corp.//dtd strict html//",
  262. "-//o'reilly and associates//dtd html 2.0//",
  263. "-//o'reilly and associates//dtd html extended 1.0//",
  264. "-//o'reilly and associates//dtd html extended relaxed 1.0//",
  265. "-//spyglass//dtd html 2.0 extended//",
  266. "-//sq//dtd html 2.0 hotmetal + extensions//",
  267. "-//sun microsystems corp.//dtd hotjava html//",
  268. "-//sun microsystems corp.//dtd hotjava strict html//",
  269. "-//w3c//dtd html 3 1995-03-24//",
  270. "-//w3c//dtd html 3.2 draft//",
  271. "-//w3c//dtd html 3.2 final//",
  272. "-//w3c//dtd html 3.2//",
  273. "-//w3c//dtd html 3.2s draft//",
  274. "-//w3c//dtd html 4.0 frameset//",
  275. "-//w3c//dtd html 4.0 transitional//",
  276. "-//w3c//dtd html experimental 19960712//",
  277. "-//w3c//dtd html experimental 970421//",
  278. "-//w3c//dtd w3 html//",
  279. "-//w3o//dtd w3 html 3.0//",
  280. "-//webtechs//dtd mozilla html 2.0//",
  281. "-//webtechs//dtd mozilla html//",
  282. ];
  283. $publicSetToForQuirks = [
  284. "-//w3o//dtd w3 html strict 3.0//",
  285. "-/w3c/dtd html 4.0 transitional/en",
  286. "html",
  287. ];
  288. $publicStartsWithAndSystemForQuirks = [
  289. "-//w3c//dtd html 4.01 frameset//",
  290. "-//w3c//dtd html 4.01 transitional//",
  291. ];
  292. $publicStartsWithForLimitedQuirks = [
  293. "-//w3c//dtd xhtml 1.0 frameset//",
  294. "-//w3c//dtd xhtml 1.0 transitional//",
  295. ];
  296. $publicStartsWithAndSystemForLimitedQuirks = [
  297. "-//w3c//dtd html 4.01 frameset//",
  298. "-//w3c//dtd html 4.01 transitional//",
  299. ];
  300. // first, do easy checks
  301. if (
  302. !empty($token['force-quirks']) ||
  303. strtolower($token['name']) !== 'html'
  304. ) {
  305. $this->quirks_mode = self::QUIRKS_MODE;
  306. } else {
  307. do {
  308. if ($system) {
  309. foreach ($publicStartsWithAndSystemForQuirks as $x) {
  310. if (strncmp($public, $x, strlen($x)) === 0) {
  311. $this->quirks_mode = self::QUIRKS_MODE;
  312. break;
  313. }
  314. }
  315. if (!is_null($this->quirks_mode)) {
  316. break;
  317. }
  318. foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
  319. if (strncmp($public, $x, strlen($x)) === 0) {
  320. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  321. break;
  322. }
  323. }
  324. if (!is_null($this->quirks_mode)) {
  325. break;
  326. }
  327. }
  328. foreach ($publicSetToForQuirks as $x) {
  329. if ($public === $x) {
  330. $this->quirks_mode = self::QUIRKS_MODE;
  331. break;
  332. }
  333. }
  334. if (!is_null($this->quirks_mode)) {
  335. break;
  336. }
  337. foreach ($publicStartsWithForLimitedQuirks as $x) {
  338. if (strncmp($public, $x, strlen($x)) === 0) {
  339. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  340. }
  341. }
  342. if (!is_null($this->quirks_mode)) {
  343. break;
  344. }
  345. if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
  346. $this->quirks_mode = self::QUIRKS_MODE;
  347. break;
  348. }
  349. foreach ($publicStartsWithForQuirks as $x) {
  350. if (strncmp($public, $x, strlen($x)) === 0) {
  351. $this->quirks_mode = self::QUIRKS_MODE;
  352. break;
  353. }
  354. }
  355. if (is_null($this->quirks_mode)) {
  356. $this->quirks_mode = self::NO_QUIRKS;
  357. }
  358. } while (false);
  359. }
  360. $this->mode = self::BEFORE_HTML;
  361. } else {
  362. // parse error
  363. /* Switch the insertion mode to "before html", then reprocess the
  364. * current token. */
  365. $this->mode = self::BEFORE_HTML;
  366. $this->quirks_mode = self::QUIRKS_MODE;
  367. $this->emitToken($token);
  368. }
  369. break;
  370. case self::BEFORE_HTML:
  371. /* A DOCTYPE token */
  372. if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  373. // Parse error. Ignore the token.
  374. $this->ignored = true;
  375. /* A comment token */
  376. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  377. /* Append a Comment node to the Document object with the data
  378. attribute set to the data given in the comment token. */
  379. // XDOM
  380. $comment = $this->dom->createComment($token['data']);
  381. $this->dom->appendChild($comment);
  382. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  383. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  384. or U+0020 SPACE */
  385. } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  386. /* Ignore the token. */
  387. $this->ignored = true;
  388. /* A start tag whose tag name is "html" */
  389. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
  390. /* Create an element for the token in the HTML namespace. Append it
  391. * to the Document object. Put this element in the stack of open
  392. * elements. */
  393. // XDOM
  394. $html = $this->insertElement($token, false);
  395. $this->dom->appendChild($html);
  396. $this->stack[] = $html;
  397. $this->mode = self::BEFORE_HEAD;
  398. } else {
  399. /* Create an html element. Append it to the Document object. Put
  400. * this element in the stack of open elements. */
  401. // XDOM
  402. $html = $this->dom->createElementNS(self::NS_HTML, 'html');
  403. $this->dom->appendChild($html);
  404. $this->stack[] = $html;
  405. /* Switch the insertion mode to "before head", then reprocess the
  406. * current token. */
  407. $this->mode = self::BEFORE_HEAD;
  408. $this->emitToken($token);
  409. }
  410. break;
  411. case self::BEFORE_HEAD:
  412. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  413. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  414. or U+0020 SPACE */
  415. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  416. /* Ignore the token. */
  417. $this->ignored = true;
  418. /* A comment token */
  419. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  420. /* Append a Comment node to the current node with the data attribute
  421. set to the data given in the comment token. */
  422. $this->insertComment($token['data']);
  423. /* A DOCTYPE token */
  424. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  425. /* Parse error. Ignore the token */
  426. $this->ignored = true;
  427. // parse error
  428. /* A start tag token with the tag name "html" */
  429. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  430. /* Process the token using the rules for the "in body"
  431. * insertion mode. */
  432. $this->processWithRulesFor($token, self::IN_BODY);
  433. /* A start tag token with the tag name "head" */
  434. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
  435. /* Insert an HTML element for the token. */
  436. $element = $this->insertElement($token);
  437. /* Set the head element pointer to this new element node. */
  438. $this->head_pointer = $element;
  439. /* Change the insertion mode to "in head". */
  440. $this->mode = self::IN_HEAD;
  441. /* An end tag whose tag name is one of: "head", "body", "html", "br" */
  442. } elseif (
  443. $token['type'] === HTML5_Tokenizer::ENDTAG && (
  444. $token['name'] === 'head' || $token['name'] === 'body' ||
  445. $token['name'] === 'html' || $token['name'] === 'br'
  446. )) {
  447. /* Act as if a start tag token with the tag name "head" and no
  448. * attributes had been seen, then reprocess the current token. */
  449. $this->emitToken([
  450. 'name' => 'head',
  451. 'type' => HTML5_Tokenizer::STARTTAG,
  452. 'attr' => []
  453. ]);
  454. $this->emitToken($token);
  455. /* Any other end tag */
  456. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
  457. /* Parse error. Ignore the token. */
  458. $this->ignored = true;
  459. } else {
  460. /* Act as if a start tag token with the tag name "head" and no
  461. * attributes had been seen, then reprocess the current token.
  462. * Note: This will result in an empty head element being
  463. * generated, with the current token being reprocessed in the
  464. * "after head" insertion mode. */
  465. $this->emitToken([
  466. 'name' => 'head',
  467. 'type' => HTML5_Tokenizer::STARTTAG,
  468. 'attr' => []
  469. ]);
  470. $this->emitToken($token);
  471. }
  472. break;
  473. case self::IN_HEAD:
  474. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  475. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  476. or U+0020 SPACE. */
  477. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  478. /* Insert the character into the current node. */
  479. $this->insertText($token['data']);
  480. /* A comment token */
  481. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  482. /* Append a Comment node to the current node with the data attribute
  483. set to the data given in the comment token. */
  484. $this->insertComment($token['data']);
  485. /* A DOCTYPE token */
  486. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  487. /* Parse error. Ignore the token. */
  488. $this->ignored = true;
  489. // parse error
  490. /* A start tag whose tag name is "html" */
  491. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  492. $token['name'] === 'html') {
  493. $this->processWithRulesFor($token, self::IN_BODY);
  494. /* A start tag whose tag name is one of: "base", "command", "link" */
  495. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  496. ($token['name'] === 'base' || $token['name'] === 'command' ||
  497. $token['name'] === 'link')) {
  498. /* Insert an HTML element for the token. Immediately pop the
  499. * current node off the stack of open elements. */
  500. $this->insertElement($token);
  501. array_pop($this->stack);
  502. // YYY: Acknowledge the token's self-closing flag, if it is set.
  503. /* A start tag whose tag name is "meta" */
  504. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
  505. /* Insert an HTML element for the token. Immediately pop the
  506. * current node off the stack of open elements. */
  507. $this->insertElement($token);
  508. array_pop($this->stack);
  509. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  510. // XENCODING: If the element has a charset attribute, and its value is a
  511. // supported encoding, and the confidence is currently tentative,
  512. // then change the encoding to the encoding given by the value of
  513. // the charset attribute.
  514. //
  515. // Otherwise, if the element has a content attribute, and applying
  516. // the algorithm for extracting an encoding from a Content-Type to
  517. // its value returns a supported encoding encoding, and the
  518. // confidence is currently tentative, then change the encoding to
  519. // the encoding encoding.
  520. /* A start tag with the tag name "title" */
  521. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
  522. $this->insertRCDATAElement($token);
  523. /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
  524. * A start tag whose tag name is one of: "noframes", "style" */
  525. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  526. ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
  527. // XSCRIPT: Scripting flag not respected
  528. $this->insertCDATAElement($token);
  529. // XSCRIPT: Scripting flag disable not implemented
  530. /* A start tag with the tag name "script" */
  531. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  532. /* 1. Create an element for the token in the HTML namespace. */
  533. $node = $this->insertElement($token, false);
  534. /* 2. Mark the element as being "parser-inserted" */
  535. // Uhhh... XSCRIPT
  536. /* 3. If the parser was originally created for the HTML
  537. * fragment parsing algorithm, then mark the script element as
  538. * "already executed". (fragment case) */
  539. // ditto... XSCRIPT
  540. /* 4. Append the new element to the current node and push it onto
  541. * the stack of open elements. */
  542. end($this->stack)->appendChild($node);
  543. $this->stack[] = $node;
  544. // I guess we could squash these together
  545. /* 6. Let the original insertion mode be the current insertion mode. */
  546. $this->original_mode = $this->mode;
  547. /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
  548. $this->mode = self::IN_CDATA_RCDATA;
  549. /* 5. Switch the tokeniser's content model flag to the CDATA state. */
  550. $this->content_model = HTML5_Tokenizer::CDATA;
  551. /* An end tag with the tag name "head" */
  552. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
  553. /* Pop the current node (which will be the head element) off the stack of open elements. */
  554. array_pop($this->stack);
  555. /* Change the insertion mode to "after head". */
  556. $this->mode = self::AFTER_HEAD;
  557. // Slight logic inversion here to minimize duplication
  558. /* A start tag with the tag name "head". */
  559. /* An end tag whose tag name is not one of: "body", "html", "br" */
  560. } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  561. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
  562. $token['name'] !== 'body' && $token['name'] !== 'br')) {
  563. // Parse error. Ignore the token.
  564. $this->ignored = true;
  565. /* Anything else */
  566. } else {
  567. /* Act as if an end tag token with the tag name "head" had been
  568. * seen, and reprocess the current token. */
  569. $this->emitToken([
  570. 'name' => 'head',
  571. 'type' => HTML5_Tokenizer::ENDTAG
  572. ]);
  573. /* Then, reprocess the current token. */
  574. $this->emitToken($token);
  575. }
  576. break;
  577. case self::IN_HEAD_NOSCRIPT:
  578. if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  579. // parse error
  580. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  581. $this->processWithRulesFor($token, self::IN_BODY);
  582. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
  583. /* Pop the current node (which will be a noscript element) from the
  584. * stack of open elements; the new current node will be a head
  585. * element. */
  586. array_pop($this->stack);
  587. $this->mode = self::IN_HEAD;
  588. } elseif (
  589. ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
  590. ($token['type'] === HTML5_Tokenizer::COMMENT) ||
  591. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  592. $token['name'] === 'link' || $token['name'] === 'meta' ||
  593. $token['name'] === 'noframes' || $token['name'] === 'style'))) {
  594. $this->processWithRulesFor($token, self::IN_HEAD);
  595. // inverted logic
  596. } elseif (
  597. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  598. $token['name'] === 'head' || $token['name'] === 'noscript')) ||
  599. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  600. $token['name'] !== 'br')) {
  601. // parse error
  602. } else {
  603. // parse error
  604. $this->emitToken([
  605. 'type' => HTML5_Tokenizer::ENDTAG,
  606. 'name' => 'noscript',
  607. ]);
  608. $this->emitToken($token);
  609. }
  610. break;
  611. case self::AFTER_HEAD:
  612. /* Handle the token as follows: */
  613. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  614. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  615. or U+0020 SPACE */
  616. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  617. /* Append the character to the current node. */
  618. $this->insertText($token['data']);
  619. /* A comment token */
  620. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  621. /* Append a Comment node to the current node with the data attribute
  622. set to the data given in the comment token. */
  623. $this->insertComment($token['data']);
  624. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  625. // parse error
  626. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  627. $this->processWithRulesFor($token, self::IN_BODY);
  628. /* A start tag token with the tag name "body" */
  629. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
  630. $this->insertElement($token);
  631. /* Set the frameset-ok flag to "not ok". */
  632. $this->flag_frameset_ok = false;
  633. /* Change the insertion mode to "in body". */
  634. $this->mode = self::IN_BODY;
  635. /* A start tag token with the tag name "frameset" */
  636. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
  637. /* Insert a frameset element for the token. */
  638. $this->insertElement($token);
  639. /* Change the insertion mode to "in frameset". */
  640. $this->mode = self::IN_FRAMESET;
  641. /* A start tag token whose tag name is one of: "base", "link", "meta",
  642. "script", "style", "title" */
  643. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  644. ['base', 'link', 'meta', 'noframes', 'script', 'style', 'title'])) {
  645. // parse error
  646. /* Push the node pointed to by the head element pointer onto the
  647. * stack of open elements. */
  648. $this->stack[] = $this->head_pointer;
  649. $this->processWithRulesFor($token, self::IN_HEAD);
  650. array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
  651. // inversion of specification
  652. } elseif (
  653. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  654. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  655. $token['name'] !== 'body' && $token['name'] !== 'html' &&
  656. $token['name'] !== 'br')) {
  657. // parse error
  658. /* Anything else */
  659. } else {
  660. $this->emitToken([
  661. 'name' => 'body',
  662. 'type' => HTML5_Tokenizer::STARTTAG,
  663. 'attr' => []
  664. ]);
  665. $this->flag_frameset_ok = true;
  666. $this->emitToken($token);
  667. }
  668. break;
  669. case self::IN_BODY:
  670. /* Handle the token as follows: */
  671. switch($token['type']) {
  672. /* A character token */
  673. case HTML5_Tokenizer::CHARACTER:
  674. case HTML5_Tokenizer::SPACECHARACTER:
  675. /* Reconstruct the active formatting elements, if any. */
  676. $this->reconstructActiveFormattingElements();
  677. /* Append the token's character to the current node. */
  678. $this->insertText($token['data']);
  679. /* If the token is not one of U+0009 CHARACTER TABULATION,
  680. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
  681. * SPACE, then set the frameset-ok flag to "not ok". */
  682. // i.e., if any of the characters is not whitespace
  683. if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
  684. $this->flag_frameset_ok = false;
  685. }
  686. break;
  687. /* A comment token */
  688. case HTML5_Tokenizer::COMMENT:
  689. /* Append a Comment node to the current node with the data
  690. attribute set to the data given in the comment token. */
  691. $this->insertComment($token['data']);
  692. break;
  693. case HTML5_Tokenizer::DOCTYPE:
  694. // parse error
  695. break;
  696. case HTML5_Tokenizer::EOF:
  697. // parse error
  698. break;
  699. case HTML5_Tokenizer::STARTTAG:
  700. switch($token['name']) {
  701. case 'html':
  702. // parse error
  703. /* For each attribute on the token, check to see if the
  704. * attribute is already present on the top element of the
  705. * stack of open elements. If it is not, add the attribute
  706. * and its corresponding value to that element. */
  707. foreach($token['attr'] as $attr) {
  708. if (!$this->stack[0]->hasAttribute($attr['name'])) {
  709. $this->stack[0]->setAttribute($attr['name'], $attr['value']);
  710. }
  711. }
  712. break;
  713. case 'base': case 'command': case 'link': case 'meta': case 'noframes':
  714. case 'script': case 'style': case 'title':
  715. /* Process the token as if the insertion mode had been "in
  716. head". */
  717. $this->processWithRulesFor($token, self::IN_HEAD);
  718. break;
  719. /* A start tag token with the tag name "body" */
  720. case 'body':
  721. /* Parse error. If the second element on the stack of open
  722. elements is not a body element, or, if the stack of open
  723. elements has only one node on it, then ignore the token.
  724. (fragment case) */
  725. if (count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  726. $this->ignored = true;
  727. // Ignore
  728. /* Otherwise, for each attribute on the token, check to see
  729. if the attribute is already present on the body element (the
  730. second element) on the stack of open elements. If it is not,
  731. add the attribute and its corresponding value to that
  732. element. */
  733. } else {
  734. foreach($token['attr'] as $attr) {
  735. if (!$this->stack[1]->hasAttribute($attr['name'])) {
  736. $this->stack[1]->setAttribute($attr['name'], $attr['value']);
  737. }
  738. }
  739. }
  740. break;
  741. case 'frameset':
  742. // parse error
  743. /* If the second element on the stack of open elements is
  744. * not a body element, or, if the stack of open elements
  745. * has only one node on it, then ignore the token.
  746. * (fragment case) */
  747. if (count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  748. $this->ignored = true;
  749. // Ignore
  750. } elseif (!$this->flag_frameset_ok) {
  751. $this->ignored = true;
  752. // Ignore
  753. } else {
  754. /* 1. Remove the second element on the stack of open
  755. * elements from its parent node, if it has one. */
  756. if ($this->stack[1]->parentNode) {
  757. $this->stack[1]->parentNode->removeChild($this->stack[1]);
  758. }
  759. /* 2. Pop all the nodes from the bottom of the stack of
  760. * open elements, from the current node up to the root
  761. * html element. */
  762. array_splice($this->stack, 1);
  763. $this->insertElement($token);
  764. $this->mode = self::IN_FRAMESET;
  765. }
  766. break;
  767. // in spec, there is a diversion here
  768. case 'address': case 'article': case 'aside': case 'blockquote':
  769. case 'center': case 'datagrid': case 'details': case 'dir':
  770. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
  771. case 'header': case 'hgroup': case 'menu': case 'nav':
  772. case 'ol': case 'p': case 'section': case 'ul':
  773. /* If the stack of open elements has a p element in scope,
  774. then act as if an end tag with the tag name p had been
  775. seen. */
  776. if ($this->elementInScope('p')) {
  777. $this->emitToken([
  778. 'name' => 'p',
  779. 'type' => HTML5_Tokenizer::ENDTAG
  780. ]);
  781. }
  782. /* Insert an HTML element for the token. */
  783. $this->insertElement($token);
  784. break;
  785. /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
  786. "h5", "h6" */
  787. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  788. /* If the stack of open elements has a p element in scope,
  789. then act as if an end tag with the tag name p had been seen. */
  790. if ($this->elementInScope('p')) {
  791. $this->emitToken([
  792. 'name' => 'p',
  793. 'type' => HTML5_Tokenizer::ENDTAG
  794. ]);
  795. }
  796. /* If the current node is an element whose tag name is one
  797. * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
  798. * parse error; pop the current node off the stack of open
  799. * elements. */
  800. $peek = array_pop($this->stack);
  801. if (in_array($peek->tagName, ["h1", "h2", "h3", "h4", "h5", "h6"])) {
  802. // parse error
  803. } else {
  804. $this->stack[] = $peek;
  805. }
  806. /* Insert an HTML element for the token. */
  807. $this->insertElement($token);
  808. break;
  809. case 'pre': case 'listing':
  810. /* If the stack of open elements has a p element in scope,
  811. then act as if an end tag with the tag name p had been seen. */
  812. if ($this->elementInScope('p')) {
  813. $this->emitToken([
  814. 'name' => 'p',
  815. 'type' => HTML5_Tokenizer::ENDTAG
  816. ]);
  817. }
  818. $this->insertElement($token);
  819. /* If the next token is a U+000A LINE FEED (LF) character
  820. * token, then ignore that token and move on to the next
  821. * one. (Newlines at the start of pre blocks are ignored as
  822. * an authoring convenience.) */
  823. $this->ignore_lf_token = 2;
  824. $this->flag_frameset_ok = false;
  825. break;
  826. /* A start tag whose tag name is "form" */
  827. case 'form':
  828. /* If the form element pointer is not null, ignore the
  829. token with a parse error. */
  830. if ($this->form_pointer !== null) {
  831. $this->ignored = true;
  832. // Ignore.
  833. /* Otherwise: */
  834. } else {
  835. /* If the stack of open elements has a p element in
  836. scope, then act as if an end tag with the tag name p
  837. had been seen. */
  838. if ($this->elementInScope('p')) {
  839. $this->emitToken([
  840. 'name' => 'p',
  841. 'type' => HTML5_Tokenizer::ENDTAG
  842. ]);
  843. }
  844. /* Insert an HTML element for the token, and set the
  845. form element pointer to point to the element created. */
  846. $element = $this->insertElement($token);
  847. $this->form_pointer = $element;
  848. }
  849. break;
  850. // condensed specification
  851. case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
  852. /* 1. Set the frameset-ok flag to "not ok". */
  853. $this->flag_frameset_ok = false;
  854. $stack_length = count($this->stack) - 1;
  855. for($n = $stack_length; 0 <= $n; $n--) {
  856. /* 2. Initialise node to be the current node (the
  857. bottommost node of the stack). */
  858. $stop = false;
  859. $node = $this->stack[$n];
  860. $cat = $this->getElementCategory($node);
  861. // for case 'li':
  862. /* 3. If node is an li element, then act as if an end
  863. * tag with the tag name "li" had been seen, then jump
  864. * to the last step. */
  865. // for case 'dc': case 'dd': case 'ds': case 'dt':
  866. /* If node is a dc, dd, ds or dt element, then act as if an end
  867. * tag with the same tag name as node had been seen, then
  868. * jump to the last step. */
  869. if (($token['name'] === 'li' && $node->tagName === 'li') ||
  870. ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
  871. $this->emitToken([
  872. 'type' => HTML5_Tokenizer::ENDTAG,
  873. 'name' => $node->tagName,
  874. ]);
  875. break;
  876. }
  877. /* 4. If node is not in the formatting category, and is
  878. not in the phrasing category, and is not an address,
  879. div or p element, then stop this algorithm. */
  880. if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
  881. $node->tagName !== 'address' && $node->tagName !== 'div' &&
  882. $node->tagName !== 'p') {
  883. break;
  884. }
  885. /* 5. Otherwise, set node to the previous entry in the
  886. * stack of open elements and return to step 2. */
  887. }
  888. /* 6. This is the last step. */
  889. /* If the stack of open elements has a p element in scope,
  890. then act as if an end tag with the tag name p had been
  891. seen. */
  892. if ($this->elementInScope('p')) {
  893. $this->emitToken([
  894. 'name' => 'p',
  895. 'type' => HTML5_Tokenizer::ENDTAG
  896. ]);
  897. }
  898. /* Finally, insert an HTML element with the same tag
  899. name as the token's. */
  900. $this->insertElement($token);
  901. break;
  902. /* A start tag token whose tag name is "plaintext" */
  903. case 'plaintext':
  904. /* If the stack of open elements has a p element in scope,
  905. then act as if an end tag with the tag name p had been
  906. seen. */
  907. if ($this->elementInScope('p')) {
  908. $this->emitToken([
  909. 'name' => 'p',
  910. 'type' => HTML5_Tokenizer::ENDTAG
  911. ]);
  912. }
  913. /* Insert an HTML element for the token. */
  914. $this->insertElement($token);
  915. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  916. break;
  917. // more diversions
  918. /* A start tag whose tag name is "a" */
  919. case 'a':
  920. /* If the list of active formatting elements contains
  921. an element whose tag name is "a" between the end of the
  922. list and the last marker on the list (or the start of
  923. the list if there is no marker on the list), then this
  924. is a parse error; act as if an end tag with the tag name
  925. "a" had been seen, then remove that element from the list
  926. of active formatting elements and the stack of open
  927. elements if the end tag didn't already remove it (it
  928. might not have if the element is not in table scope). */
  929. $leng = count($this->a_formatting);
  930. for ($n = $leng - 1; $n >= 0; $n--) {
  931. if ($this->a_formatting[$n] === self::MARKER) {
  932. break;
  933. } elseif ($this->a_formatting[$n]->tagName === 'a') {
  934. $a = $this->a_formatting[$n];
  935. $this->emitToken([
  936. 'name' => 'a',
  937. 'type' => HTML5_Tokenizer::ENDTAG
  938. ]);
  939. if (in_array($a, $this->a_formatting)) {
  940. $a_i = array_search($a, $this->a_formatting, true);
  941. if ($a_i !== false) {
  942. array_splice($this->a_formatting, $a_i, 1);
  943. }
  944. }
  945. if (in_array($a, $this->stack)) {
  946. $a_i = array_search($a, $this->stack, true);
  947. if ($a_i !== false) {
  948. array_splice($this->stack, $a_i, 1);
  949. }
  950. }
  951. break;
  952. }
  953. }
  954. /* Reconstruct the active formatting elements, if any. */
  955. $this->reconstructActiveFormattingElements();
  956. /* Insert an HTML element for the token. */
  957. $el = $this->insertElement($token);
  958. /* Add that element to the list of active formatting
  959. elements. */
  960. $this->a_formatting[] = $el;
  961. break;
  962. case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
  963. case 's': case 'small': case 'strike':
  964. case 'strong': case 'tt': case 'u':
  965. /* Reconstruct the active formatting elements, if any. */
  966. $this->reconstructActiveFormattingElements();
  967. /* Insert an HTML element for the token. */
  968. $el = $this->insertElement($token);
  969. /* Add that element to the list of active formatting
  970. elements. */
  971. $this->a_formatting[] = $el;
  972. break;
  973. case 'nobr':
  974. /* Reconstruct the active formatting elements, if any. */
  975. $this->reconstructActiveFormattingElements();
  976. /* If the stack of open elements has a nobr element in
  977. * scope, then this is a parse error; act as if an end tag
  978. * with the tag name "nobr" had been seen, then once again
  979. * reconstruct the active formatting elements, if any. */
  980. if ($this->elementInScope('nobr')) {
  981. $this->emitToken([
  982. 'name' => 'nobr',
  983. 'type' => HTML5_Tokenizer::ENDTAG,
  984. ]);
  985. $this->reconstructActiveFormattingElements();
  986. }
  987. /* Insert an HTML element for the token. */
  988. $el = $this->insertElement($token);
  989. /* Add that element to the list of active formatting
  990. elements. */
  991. $this->a_formatting[] = $el;
  992. break;
  993. // another diversion
  994. /* A start tag token whose tag name is "button" */
  995. case 'button':
  996. /* If the stack of open elements has a button element in scope,
  997. then this is a parse error; act as if an end tag with the tag
  998. name "button" had been seen, then reprocess the token. (We don't
  999. do that. Unnecessary.) (I hope you're right! -- ezyang) */
  1000. if ($this->elementInScope('button')) {
  1001. $this->emitToken([
  1002. 'name' => 'button',
  1003. 'type' => HTML5_Tokenizer::ENDTAG
  1004. ]);
  1005. }
  1006. /* Reconstruct the active formatting elements, if any. */
  1007. $this->reconstructActiveFormattingElements();
  1008. /* Insert an HTML element for the token. */
  1009. $this->insertElement($token);
  1010. /* Insert a marker at the end of the list of active
  1011. formatting elements. */
  1012. $this->a_formatting[] = self::MARKER;
  1013. $this->flag_frameset_ok = false;
  1014. break;
  1015. case 'applet': case 'marquee': case 'object':
  1016. /* Reconstruct the active formatting elements, if any. */
  1017. $this->reconstructActiveFormattingElements();
  1018. /* Insert an HTML element for the token. */
  1019. $this->insertElement($token);
  1020. /* Insert a marker at the end of the list of active
  1021. formatting elements. */
  1022. $this->a_formatting[] = self::MARKER;
  1023. $this->flag_frameset_ok = false;
  1024. break;
  1025. // spec diversion
  1026. /* A start tag whose tag name is "table" */
  1027. case 'table':
  1028. /* If the Document is not set to quirks mode, and the
  1029. * stack of open elements has a p element in scope, then
  1030. * act as if an end tag with the tag name "p" had been
  1031. * seen. */
  1032. if ($this->quirks_mode !== self::QUIRKS_MODE &&
  1033. $this->elementInScope('p')) {
  1034. $this->emitToken([
  1035. 'name' => 'p',
  1036. 'type' => HTML5_Tokenizer::ENDTAG
  1037. ]);
  1038. }
  1039. /* Insert an HTML element for the token. */
  1040. $this->insertElement($token);
  1041. $this->flag_frameset_ok = false;
  1042. /* Change the insertion mode to "in table". */
  1043. $this->mode = self::IN_TABLE;
  1044. break;
  1045. /* A start tag whose tag name is one of: "area", "basefont",
  1046. "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
  1047. case 'area': case 'basefont': case 'bgsound': case 'br':
  1048. case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
  1049. case 'wbr':
  1050. /* Reconstruct the active formatting elements, if any. */
  1051. $this->reconstructActiveFormattingElements();
  1052. /* Insert an HTML element for the token. */
  1053. $this->insertElement($token);
  1054. /* Immediately pop the current node off the stack of open elements. */
  1055. array_pop($this->stack);
  1056. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1057. $this->flag_frameset_ok = false;
  1058. break;
  1059. case 'param': case 'source':
  1060. /* Insert an HTML element for the token. */
  1061. $this->insertElement($token);
  1062. /* Immediately pop the current node off the stack of open elements. */
  1063. array_pop($this->stack);
  1064. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1065. break;
  1066. /* A start tag whose tag name is "hr" */
  1067. case 'hr':
  1068. /* If the stack of open elements has a p element in scope,
  1069. then act as if an end tag with the tag name p had been seen. */
  1070. if ($this->elementInScope('p')) {
  1071. $this->emitToken([
  1072. 'name' => 'p',
  1073. 'type' => HTML5_Tokenizer::ENDTAG
  1074. ]);
  1075. }
  1076. /* Insert an HTML element for the token. */
  1077. $this->insertElement($token);
  1078. /* Immediately pop the current node off the stack of open elements. */
  1079. array_pop($this->stack);
  1080. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1081. $this->flag_frameset_ok = false;
  1082. break;
  1083. /* A start tag whose tag name is "image" */
  1084. case 'image':
  1085. /* Parse error. Change the token's tag name to "img" and
  1086. reprocess it. (Don't ask.) */
  1087. $token['name'] = 'img';
  1088. $this->emitToken($token);
  1089. break;
  1090. /* A start tag whose tag name is "isindex" */
  1091. case 'isindex':
  1092. /* Parse error. */
  1093. /* If the form element pointer is not null,
  1094. then ignore the token. */
  1095. if ($this->form_pointer === null) {
  1096. /* Act as if a start tag token with the tag name "form" had
  1097. been seen. */
  1098. /* If the token has an attribute called "action", set
  1099. * the action attribute on the resulting form
  1100. * element to the value of the "action" attribute of
  1101. * the token. */
  1102. $attr = [];
  1103. $action = $this->getAttr($token, 'action');
  1104. if ($action !== false) {
  1105. $attr[] = ['name' => 'action', 'value' => $action];
  1106. }
  1107. $this->emitToken([
  1108. 'name' => 'form',
  1109. 'type' => HTML5_Tokenizer::STARTTAG,
  1110. 'attr' => $attr
  1111. ]);
  1112. /* Act as if a start tag token with the tag name "hr" had
  1113. been seen. */
  1114. $this->emitToken([
  1115. 'name' => 'hr',
  1116. 'type' => HTML5_Tokenizer::STARTTAG,
  1117. 'attr' => []
  1118. ]);
  1119. /* Act as if a start tag token with the tag name "label"
  1120. had been seen. */
  1121. $this->emitToken([
  1122. 'name' => 'label',
  1123. 'type' => HTML5_Tokenizer::STARTTAG,
  1124. 'attr' => []
  1125. ]);
  1126. /* Act as if a stream of character tokens had been seen. */
  1127. $prompt = $this->getAttr($token, 'prompt');
  1128. if ($prompt === false) {
  1129. $prompt = 'This is a searchable index. '.
  1130. 'Insert your search keywords here: ';
  1131. }
  1132. $this->emitToken([
  1133. 'data' => $prompt,
  1134. 'type' => HTML5_Tokenizer::CHARACTER,
  1135. ]);
  1136. /* Act as if a start tag token with the tag name "input"
  1137. had been seen, with all the attributes from the "isindex"
  1138. token, except with the "name" attribute set to the value
  1139. "isindex" (ignoring any explicit "name" attribute). */
  1140. $attr = [];
  1141. foreach ($token['attr'] as $keypair) {
  1142. if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
  1143. $keypair['name'] === 'prompt') {
  1144. continue;
  1145. }
  1146. $attr[] = $keypair;
  1147. }
  1148. $attr[] = ['name' => 'name', 'value' => 'isindex'];
  1149. $this->emitToken([
  1150. 'name' => 'input',
  1151. 'type' => HTML5_Tokenizer::STARTTAG,
  1152. 'attr' => $attr
  1153. ]);
  1154. /* Act as if an end tag token with the tag name "label"
  1155. had been seen. */
  1156. $this->emitToken([
  1157. 'name' => 'label',
  1158. 'type' => HTML5_Tokenizer::ENDTAG
  1159. ]);
  1160. /* Act as if a start tag token with the tag name "hr" had
  1161. been seen. */
  1162. $this->emitToken([
  1163. 'name' => 'hr',
  1164. 'type' => HTML5_Tokenizer::STARTTAG
  1165. ]);
  1166. /* Act as if an end tag token with the tag name "form" had
  1167. been seen. */
  1168. $this->emitToken([
  1169. 'name' => 'form',
  1170. 'type' => HTML5_Tokenizer::ENDTAG
  1171. ]);
  1172. } else {
  1173. $this->ignored = true;
  1174. }
  1175. break;
  1176. /* A start tag whose tag name is "textarea" */
  1177. case 'textarea':
  1178. $this->insertElement($token);
  1179. /* If the next token is a U+000A LINE FEED (LF)
  1180. * character token, then ignore that token and move on to
  1181. * the next one. (Newlines at the start of textarea
  1182. * elements are ignored as an authoring convenience.)
  1183. * need flag, see also <pre> */
  1184. $this->ignore_lf_token = 2;
  1185. $this->original_mode = $this->mode;
  1186. $this->flag_frameset_ok = false;
  1187. $this->mode = self::IN_CDATA_RCDATA;
  1188. /* Switch the tokeniser's content model flag to the
  1189. RCDATA state. */
  1190. $this->content_model = HTML5_Tokenizer::RCDATA;
  1191. break;
  1192. /* A start tag token whose tag name is "xmp" */
  1193. case 'xmp':
  1194. /* If the stack of open elements has a p element in
  1195. scope, then act as if an end tag with the tag name
  1196. "p" has been seen. */
  1197. if ($this->elementInScope('p')) {
  1198. $this->emitToken([
  1199. 'name' => 'p',
  1200. 'type' => HTML5_Tokenizer::ENDTAG
  1201. ]);
  1202. }
  1203. /* Reconstruct the active formatting elements, if any. */
  1204. $this->reconstructActiveFormattingElements();
  1205. $this->flag_frameset_ok = false;
  1206. $this->insertCDATAElement($token);
  1207. break;
  1208. case 'iframe':
  1209. $this->flag_frameset_ok = false;
  1210. $this->insertCDATAElement($token);
  1211. break;
  1212. case 'noembed': case 'noscript':
  1213. // XSCRIPT: should check scripting flag
  1214. $this->insertCDATAElement($token);
  1215. break;
  1216. /* A start tag whose tag name is "select" */
  1217. case 'select':
  1218. /* Reconstruct the active formatting elements, if any. */
  1219. $this->reconstructActiveFormattingElements();
  1220. /* Insert an HTML element for the token. */
  1221. $this->insertElement($token);
  1222. $this->flag_frameset_ok = false;
  1223. /* If the insertion mode is one of in table", "in caption",
  1224. * "in column group", "in table body", "in row", or "in
  1225. * cell", then switch the insertion mode to "in select in
  1226. * table". Otherwise, switch the insertion mode to "in
  1227. * select". */
  1228. if (
  1229. $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
  1230. $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
  1231. $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
  1232. ) {
  1233. $this->mode = self::IN_SELECT_IN_TABLE;
  1234. } else {
  1235. $this->mode = self::IN_SELECT;
  1236. }
  1237. break;
  1238. case 'option': case 'optgroup':
  1239. if ($this->elementInScope('option')) {
  1240. $this->emitToken([
  1241. 'name' => 'option',
  1242. 'type' => HTML5_Tokenizer::ENDTAG,
  1243. ]);
  1244. }
  1245. $this->reconstructActiveFormattingElements();
  1246. $this->insertElement($token);
  1247. break;
  1248. case 'rp': case 'rt':
  1249. /* If the stack of open elements has a ruby element in scope, then generate
  1250. * implied end tags. If the current node is not then a ruby element, this is
  1251. * a parse error; pop all the nodes from the current node up to the node
  1252. * immediately before the bottommost ruby element on the stack of open elements.
  1253. */
  1254. if ($this->elementInScope('ruby')) {
  1255. $this->generateImpliedEndTags();
  1256. }
  1257. $peek = false;
  1258. do {
  1259. /*if ($peek) {
  1260. // parse error
  1261. }*/
  1262. $peek = array_pop($this->stack);
  1263. } while ($peek->tagName !== 'ruby');
  1264. $this->stack[] = $peek; // we popped one too many
  1265. $this->insertElement($token);
  1266. break;
  1267. // spec diversion
  1268. case 'math':
  1269. $this->reconstructActiveFormattingElements();
  1270. $token = $this->adjustMathMLAttributes($token);
  1271. $token = $this->adjustForeignAttributes($token);
  1272. $this->insertForeignElement($token, self::NS_MATHML);
  1273. if (isset($token['self-closing'])) {
  1274. // XERROR: acknowledge the token's self-closing flag
  1275. array_pop($this->stack);
  1276. }
  1277. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1278. $this->secondary_mode = $this->mode;
  1279. $this->mode = self::IN_FOREIGN_CONTENT;
  1280. }
  1281. break;
  1282. case 'svg':
  1283. $this->reconstructActiveFormattingElements();
  1284. $token = $this->adjustSVGAttributes($token);
  1285. $token = $this->adjustForeignAttributes($token);
  1286. $this->insertForeignElement($token, self::NS_SVG);
  1287. if (isset($token['self-closing'])) {
  1288. // XERROR: acknowledge the token's self-closing flag
  1289. array_pop($this->stack);
  1290. }
  1291. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1292. $this->secondary_mode = $this->mode;
  1293. $this->mode = self::IN_FOREIGN_CONTENT;
  1294. }
  1295. break;
  1296. case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
  1297. case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
  1298. // parse error
  1299. break;
  1300. /* A start tag token not covered by the previous entries */
  1301. default:
  1302. /* Reconstruct the active formatting elements, if any. */
  1303. $this->reconstructActiveFormattingElements();
  1304. $this->insertElement($token);
  1305. /* This element will be a phrasing element. */
  1306. break;
  1307. }
  1308. break;
  1309. case HTML5_Tokenizer::ENDTAG:
  1310. switch ($token['name']) {
  1311. /* An end tag with the tag name "body" */
  1312. case 'body':
  1313. /* If the stack of open elements does not have a body
  1314. * element in scope, this is a parse error; ignore the
  1315. * token. */
  1316. if (!$this->elementInScope('body')) {
  1317. $this->ignored = true;
  1318. /* Otherwise, if there is a node in the stack of open
  1319. * elements that is not either a dc element, a dd element,
  1320. * a ds element, a dt element, an li element, an optgroup
  1321. * element, an option element, a p element, an rp element,
  1322. * an rt element, a tbody element, a td element, a tfoot
  1323. * element, a th element, a thead element, a tr element,
  1324. * the body element, or the html element, then this is a
  1325. * parse error.
  1326. */
  1327. } else {
  1328. // XERROR: implement this check for parse error
  1329. }
  1330. /* Change the insertion mode to "after body". */
  1331. $this->mode = self::AFTER_BODY;
  1332. break;
  1333. /* An end tag with the tag name "html" */
  1334. case 'html':
  1335. /* Act as if an end tag with tag name "body" had been seen,
  1336. then, if that token wasn't ignored, reprocess the current
  1337. token. */
  1338. $this->emitToken([
  1339. 'name' => 'body',
  1340. 'type' => HTML5_Tokenizer::ENDTAG
  1341. ]);
  1342. if (!$this->ignored) {
  1343. $this->emitToken($token);
  1344. }
  1345. break;
  1346. case 'address': case 'article': case 'aside': case 'blockquote':
  1347. case 'center': case 'datagrid': case 'details': case 'dir':
  1348. case 'div': case 'dl': case 'fieldset': case 'footer':
  1349. case 'header': case 'hgroup': case 'listing': case 'menu':
  1350. case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
  1351. /* If the stack of open elements has an element in scope
  1352. with the same tag name as that of the token, then generate
  1353. implied end tags. */
  1354. if ($this->elementInScope($token['name'])) {
  1355. $this->generateImpliedEndTags();
  1356. /* Now, if the current node is not an element with
  1357. the same tag name as that of the token, then this
  1358. is a parse error. */
  1359. // XERROR: implement parse error logic
  1360. /* If the stack of open elements has an element in
  1361. scope with the same tag name as that of the token,
  1362. then pop elements from this stack until an element
  1363. with that tag name has been popped from the stack. */
  1364. do {
  1365. $node = array_pop($this->stack);
  1366. } while ($node->tagName !== $token['name']);
  1367. } else {
  1368. // parse error
  1369. }
  1370. break;
  1371. /* An end tag whose tag name is "form" */
  1372. case 'form':
  1373. /* Let node be the element that the form element pointer is set to. */
  1374. $node = $this->form_pointer;
  1375. /* Set the form element pointer to null. */
  1376. $this->form_pointer = null;
  1377. /* If node is null or the stack of open elements does not
  1378. * have node in scope, then this is a parse error; ignore the token. */
  1379. if ($node === null || !in_array($node, $this->stack)) {
  1380. // parse error
  1381. $this->ignored = true;
  1382. } else {
  1383. /* 1. Generate implied end tags. */
  1384. $this->generateImpliedEndTags();
  1385. /* 2. If the current node is not node, then this is a parse error. */
  1386. if (end($this->stack) !== $node) {
  1387. // parse error
  1388. }
  1389. /* 3. Remove node from the stack of open elements. */
  1390. array_splice($this->stack, array_search($node, $this->stack, true), 1);
  1391. }
  1392. break;
  1393. /* An end tag whose tag name is "p" */
  1394. case 'p':
  1395. /* If the stack of open elements has a p element in scope,
  1396. then generate implied end tags, except for p elements. */
  1397. if ($this->elementInScope('p')) {
  1398. /* Generate implied end tags, except for elements with
  1399. * the same tag name as the token. */
  1400. $this->generateImpliedEndTags(['p']);
  1401. /* If the current node is not a p element, then this is
  1402. a parse error. */
  1403. // XERROR: implement
  1404. /* Pop elements from the stack of open elements until
  1405. * an element with the same tag name as the token has
  1406. * been popped from the stack. */
  1407. do {
  1408. $node = array_pop($this->stack);
  1409. } while ($node->tagName !== 'p');
  1410. } else {
  1411. // parse error
  1412. $this->emitToken([
  1413. 'name' => 'p',
  1414. 'type' => HTML5_Tokenizer::STARTTAG,
  1415. ]);
  1416. $this->emitToken($token);
  1417. }
  1418. break;
  1419. /* An end tag whose tag name is "li" */
  1420. case 'li':
  1421. /* If the stack of open elements does not have an element
  1422. * in list item scope with the same tag name as that of the
  1423. * token, then this is a parse error; ignore the token. */
  1424. if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
  1425. /* Generate implied end tags, except for elements with the
  1426. * same tag name as the token. */
  1427. $this->generateImpliedEndTags([$token['name']]);
  1428. /* If the current node is not an element with the same tag
  1429. * name as that of the token, then this is a parse error. */
  1430. // XERROR: parse error
  1431. /* Pop elements from the stack of open elements until an
  1432. * element with the same tag name as the token has been
  1433. * popped from the stack. */
  1434. do {
  1435. $node = array_pop($this->stack);
  1436. } while ($node->tagName !== $token['name']);
  1437. }
  1438. /*else {
  1439. // XERROR: parse error
  1440. }*/
  1441. break;
  1442. /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
  1443. case 'dc': case 'dd': case 'ds': case 'dt':
  1444. if ($this->elementInScope($token['name'])) {
  1445. $this->generateImpliedEndTags([$token['name']]);
  1446. /* If the current node is not an element with the same
  1447. tag name as the token, then this is a parse error. */
  1448. // XERROR: implement parse error
  1449. /* Pop elements from the stack of open elements until
  1450. * an element with the same tag name as the token has
  1451. * been popped from the stack. */
  1452. do {
  1453. $node = array_pop($this->stack);
  1454. } while ($node->tagName !== $token['name']);
  1455. }
  1456. /*else {
  1457. // XERROR: parse error
  1458. }*/
  1459. break;
  1460. /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
  1461. "h5", "h6" */
  1462. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  1463. $elements = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
  1464. /* If the stack of open elements has in scope an element whose
  1465. tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
  1466. generate implied end tags. */
  1467. if ($this->elementInScope($elements)) {
  1468. $this->generateImpliedEndTags();
  1469. /* Now, if the current node is not an element with the same
  1470. tag name as that of the token, then this is a parse error. */
  1471. // XERROR: implement parse error
  1472. /* If the stack of open elements has in scope an element
  1473. whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
  1474. "h6", then pop elements from the stack until an element
  1475. with one of those tag names has been popped from the stack. */
  1476. do {
  1477. $node = array_pop($this->stack);
  1478. } while (!in_array($node->tagName, $elements));
  1479. }
  1480. /*else {
  1481. // parse error
  1482. }*/
  1483. break;
  1484. /* An end tag whose tag name is one of: "a", "b", "big", "em",
  1485. "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
  1486. case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
  1487. case 'i': case 'nobr': case 's': case 'small': case 'strike':
  1488. case 'strong': case 'tt': case 'u':
  1489. // XERROR: generally speaking this needs parse error logic
  1490. /* 1. Let the formatting element be the last element in
  1491. the list of active formatting elements that:
  1492. * is between the end of the list and the last scope
  1493. marker in the list, if any, or the start of the list
  1494. otherwise, and
  1495. * has the same tag name as the token.
  1496. */
  1497. while (true) {
  1498. for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
  1499. if ($this->a_formatting[$a] === self::MARKER) {
  1500. break;
  1501. } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
  1502. $formatting_element = $this->a_formatting[$a];
  1503. $in_stack = in_array($formatting_element, $this->stack, true);
  1504. $fe_af_pos = $a;
  1505. break;
  1506. }
  1507. }
  1508. /* If there is no such node, or, if that node is
  1509. also in the stack of open elements but the element
  1510. is not in scope, then this is a parse error. Abort
  1511. these steps. The token is ignored. */
  1512. if (
  1513. !isset($formatting_element) || (
  1514. $in_stack &&
  1515. !$this->elementInScope($token['name'])
  1516. )
  1517. ) {
  1518. $this->ignored = true;
  1519. break;
  1520. /* Otherwise, if there is such a node, but that node
  1521. is not in the stack of open elements, then this is a
  1522. parse error; remove the element from the list, and
  1523. abort these steps. */
  1524. } elseif (isset($formatting_element) && !$in_stack) {
  1525. unset($this->a_formatting[$fe_af_pos]);
  1526. $this->a_formatting = array_merge($this->a_formatting);
  1527. break;
  1528. }
  1529. /* Otherwise, there is a formatting element and that
  1530. * element is in the stack and is in scope. If the
  1531. * element is not the current node, this is a parse
  1532. * error. In any case, proceed with the algorithm as
  1533. * written in the following steps. */
  1534. // XERROR: implement me
  1535. /* 2. Let the furthest block be the topmost node in the
  1536. stack of open elements that is lower in the stack
  1537. than the formatting element, and is not an element in
  1538. the phrasing or formatting categories. There might
  1539. not be one. */
  1540. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1541. $length = count($this->stack);
  1542. for ($s = $fe_s_pos + 1; $s < $length; $s++) {
  1543. $category = $this->getElementCategory($this->stack[$s]);
  1544. if ($category !== self::PHRASING && $category !== self::FORMATTING) {
  1545. $furthest_block = $this->stack[$s];
  1546. break;
  1547. }
  1548. }
  1549. /* 3. If there is no furthest block, then the UA must
  1550. skip the subsequent steps and instead just pop all
  1551. the nodes from the bottom of the stack of open
  1552. elements, from the current node up to the formatting
  1553. element, and remove the formatting element from the
  1554. list of active formatting elements. */
  1555. if (!isset($furthest_block)) {
  1556. for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
  1557. array_pop($this->stack);
  1558. }
  1559. unset($this->a_formatting[$fe_af_pos]);
  1560. $this->a_formatting = array_merge($this->a_formatting);
  1561. break;
  1562. }
  1563. /* 4. Let the common ancestor be the element
  1564. immediately above the formatting element in the stack
  1565. of open elements. */
  1566. $common_ancestor = $this->stack[$fe_s_pos - 1];
  1567. /* 5. Let a bookmark note the position of the
  1568. formatting element in the list of active formatting
  1569. elements relative to the elements on either side
  1570. of it in the list. */
  1571. $bookmark = $fe_af_pos;
  1572. /* 6. Let node and last node be the furthest block.
  1573. Follow these steps: */
  1574. $node = $furthest_block;
  1575. $last_node = $furthest_block;
  1576. while (true) {
  1577. for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
  1578. /* 6.1 Let node be the element immediately
  1579. prior to node in the stack of open elements. */
  1580. $node = $this->stack[$n];
  1581. /* 6.2 If node is not in the list of active
  1582. formatting elements, then remove node from
  1583. the stack of open elements and then go back
  1584. to step 1. */
  1585. if (!in_array($node, $this->a_formatting, true)) {
  1586. array_splice($this->stack, $n, 1);
  1587. } else {
  1588. break;
  1589. }
  1590. }
  1591. /* 6.3 Otherwise, if node is the formatting
  1592. element, then go to the next step in the overall
  1593. algorithm. */
  1594. if ($node === $formatting_element) {
  1595. break;
  1596. /* 6.4 Otherwise, if last node is the furthest
  1597. block, then move the aforementioned bookmark to
  1598. be immediately after the node in the list of
  1599. active formatting elements. */
  1600. } elseif ($last_node === $furthest_block) {
  1601. $bookmark = array_search($node, $this->a_formatting, true) + 1;
  1602. }
  1603. /* 6.5 Create an element for the token for which
  1604. * the element node was created, replace the entry
  1605. * for node in the list of active formatting
  1606. * elements with an entry for the new element,
  1607. * replace the entry for node in the stack of open
  1608. * elements with an entry for the new element, and
  1609. * let node be the new element. */
  1610. // we don't know what the token is anymore
  1611. // XDOM
  1612. $clone = $node->cloneNode();
  1613. $a_pos = array_search($node, $this->a_formatting, true);
  1614. $s_pos = array_search($node, $this->stack, true);
  1615. $this->a_formatting[$a_pos] = $clone;
  1616. $this->stack[$s_pos] = $clone;
  1617. $node = $clone;
  1618. /* 6.6 Insert last node into node, first removing
  1619. it from its previous parent node if any. */
  1620. // XDOM
  1621. if ($last_node->parentNode !== null) {
  1622. $last_node->parentNode->removeChild($last_node);
  1623. }
  1624. // XDOM
  1625. $node->appendChild($last_node);
  1626. /* 6.7 Let last node be node. */
  1627. $last_node = $node;
  1628. /* 6.8 Return to step 1 of this inner set of steps. */
  1629. }
  1630. /* 7. If the common ancestor node is a table, tbody,
  1631. * tfoot, thead, or tr element, then, foster parent
  1632. * whatever last node ended up being in the previous
  1633. * step, first removing it from its previous parent
  1634. * node if any. */
  1635. // XDOM
  1636. if ($last_node->parentNode) { // common step
  1637. $last_node->parentNode->removeChild($last_node);
  1638. }
  1639. if (in_array($common_ancestor->tagName, ['table', 'tbody', 'tfoot', 'thead', 'tr'])) {
  1640. $this->fosterParent($last_node);
  1641. /* Otherwise, append whatever last node ended up being
  1642. * in the previous step to the common ancestor node,
  1643. * first removing it from its previous parent node if
  1644. * any. */
  1645. } else {
  1646. // XDOM
  1647. $common_ancestor->appendChild($last_node);
  1648. }
  1649. /* 8. Create an element for the token for which the
  1650. * formatting element was created. */
  1651. // XDOM
  1652. $clone = $formatting_element->cloneNode();
  1653. /* 9. Take all of the child nodes of the furthest
  1654. block and append them to the element created in the
  1655. last step. */
  1656. // XDOM
  1657. while ($furthest_block->hasChildNodes()) {
  1658. $child = $furthest_block->firstChild;
  1659. $furthest_block->removeChild($child);
  1660. $clone->appendChild($child);
  1661. }
  1662. /* 10. Append that clone to the furthest block. */
  1663. // XDOM
  1664. $furthest_block->appendChild($clone);
  1665. /* 11. Remove the formatting element from the list
  1666. of active formatting elements, and insert the new element
  1667. into the list of active formatting elements at the
  1668. position of the aforementioned bookmark. */
  1669. $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
  1670. array_splice($this->a_formatting, $fe_af_pos, 1);
  1671. $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
  1672. $af_part2 = array_slice($this->a_formatting, $bookmark);
  1673. $this->a_formatting = array_merge($af_part1, [$clone], $af_part2);
  1674. /* 12. Remove the formatting element from the stack
  1675. of open elements, and insert the new element into the stack
  1676. of open elements immediately below the position of the
  1677. furthest block in that stack. */
  1678. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1679. array_splice($this->stack, $fe_s_pos, 1);
  1680. $fb_s_pos = array_search($furthest_block, $this->stack, true);
  1681. $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
  1682. $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
  1683. $this->stack = array_merge($s_part1, [$clone], $s_part2);
  1684. /* 13. Jump back to step 1 in this series of steps. */
  1685. unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
  1686. }
  1687. break;
  1688. case 'applet': case 'button': case 'marquee': case 'object':
  1689. /* If the stack of open elements has an element in scope whose
  1690. tag name matches the tag name of the token, then generate implied
  1691. tags. */
  1692. if ($this->elementInScope($token['name'])) {
  1693. $this->generateImpliedEndTags();
  1694. /* Now, if the current node is not an element with the same
  1695. tag name as the token, then this is a parse error. */
  1696. // XERROR: implement logic
  1697. /* Pop elements from the stack of open elements until
  1698. * an element with the same tag name as the token has
  1699. * been popped from the stack. */
  1700. do {
  1701. $node = array_pop($this->stack);
  1702. } while ($node->tagName !== $token['name']);
  1703. /* Clear the list of active formatting elements up to the
  1704. * last marker. */
  1705. $keys = array_keys($this->a_formatting, self::MARKER, true);
  1706. $marker = end($keys);
  1707. for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
  1708. array_pop($this->a_formatting);
  1709. }
  1710. }
  1711. /*else {
  1712. // parse error
  1713. }*/
  1714. break;
  1715. case 'br':
  1716. // Parse error
  1717. $this->emitToken([
  1718. 'name' => 'br',
  1719. 'type' => HTML5_Tokenizer::STARTTAG,
  1720. ]);
  1721. break;
  1722. /* An end tag token not covered by the previous entries */
  1723. default:
  1724. for ($n = count($this->stack) - 1; $n >= 0; $n--) {
  1725. /* Initialise node to be the current node (the bottommost
  1726. node of the stack). */
  1727. $node = $this->stack[$n];
  1728. /* If node has the same tag name as the end tag token,
  1729. then: */
  1730. if ($token['name'] === $node->tagName) {
  1731. /* Generate implied end tags. */
  1732. $this->generateImpliedEndTags();
  1733. /* If the tag name of the end tag token does not
  1734. match the tag name of the current node, this is a
  1735. parse error. */
  1736. // XERROR: implement this
  1737. /* Pop all the nodes from the current node up to
  1738. node, including node, then stop these steps. */
  1739. // XSKETCHY
  1740. do {
  1741. $pop = array_pop($this->stack);
  1742. } while ($pop !== $node);
  1743. break;
  1744. } else {
  1745. $category = $this->getElementCategory($node);
  1746. if ($category !== self::FORMATTING && $category !== self::PHRASING) {
  1747. /* Otherwise, if node is in neither the formatting
  1748. category nor the phrasing category, then this is a
  1749. parse error. Stop this algorithm. The end tag token
  1750. is ignored. */
  1751. $this->ignored = true;
  1752. break;
  1753. // parse error
  1754. }
  1755. }
  1756. /* Set node to the previous entry in the stack of open elements. Loop. */
  1757. }
  1758. break;
  1759. }
  1760. break;
  1761. }
  1762. break;
  1763. case self::IN_CDATA_RCDATA:
  1764. if (
  1765. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  1766. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  1767. ) {
  1768. $this->insertText($token['data']);
  1769. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1770. // parse error
  1771. /* If the current node is a script element, mark the script
  1772. * element as "already executed". */
  1773. // probably not necessary
  1774. array_pop($this->stack);
  1775. $this->mode = $this->original_mode;
  1776. $this->emitToken($token);
  1777. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
  1778. array_pop($this->stack);
  1779. $this->mode = $this->original_mode;
  1780. // we're ignoring all of the execution stuff
  1781. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
  1782. array_pop($this->stack);
  1783. $this->mode = $this->original_mode;
  1784. }
  1785. break;
  1786. case self::IN_TABLE:
  1787. $clear = ['html', 'table'];
  1788. /* A character token */
  1789. if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
  1790. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1791. /* Let the pending table character tokens
  1792. * be an empty list of tokens. */
  1793. $this->pendingTableCharacters = "";
  1794. $this->pendingTableCharactersDirty = false;
  1795. /* Let the original insertion mode be the current
  1796. * insertion mode. */
  1797. $this->original_mode = $this->mode;
  1798. /* Switch the insertion mode to
  1799. * "in table text" and
  1800. * reprocess the token. */
  1801. $this->mode = self::IN_TABLE_TEXT;
  1802. $this->emitToken($token);
  1803. /* A comment token */
  1804. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  1805. /* Append a Comment node to the current node with the data
  1806. attribute set to the data given in the comment token. */
  1807. $this->insertComment($token['data']);
  1808. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1809. // parse error
  1810. /* A start tag whose tag name is "caption" */
  1811. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1812. $token['name'] === 'caption') {
  1813. /* Clear the stack back to a table context. */
  1814. $this->clearStackToTableContext($clear);
  1815. /* Insert a marker at the end of the list of active
  1816. formatting elements. */
  1817. $this->a_formatting[] = self::MARKER;
  1818. /* Insert an HTML element for the token, then switch the
  1819. insertion mode to "in caption". */
  1820. $this->insertElement($token);
  1821. $this->mode = self::IN_CAPTION;
  1822. /* A start tag whose tag name is "colgroup" */
  1823. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1824. $token['name'] === 'colgroup') {
  1825. /* Clear the stack back to a table context. */
  1826. $this->clearStackToTableContext($clear);
  1827. /* Insert an HTML element for the token, then switch the
  1828. insertion mode to "in column group". */
  1829. $this->insertElement($token);
  1830. $this->mode = self::IN_COLUMN_GROUP;
  1831. /* A start tag whose tag name is "col" */
  1832. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1833. $token['name'] === 'col') {
  1834. $this->emitToken([
  1835. 'name' => 'colgroup',
  1836. 'type' => HTML5_Tokenizer::STARTTAG,
  1837. 'attr' => []
  1838. ]);
  1839. $this->emitToken($token);
  1840. /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
  1841. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1842. ['tbody', 'tfoot', 'thead'])) {
  1843. /* Clear the stack back to a table context. */
  1844. $this->clearStackToTableContext($clear);
  1845. /* Insert an HTML element for the token, then switch the insertion
  1846. mode to "in table body". */
  1847. $this->insertElement($token);
  1848. $this->mode = self::IN_TABLE_BODY;
  1849. /* A start tag whose tag name is one of: "td", "th", "tr" */
  1850. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1851. in_array($token['name'], ['td', 'th', 'tr'])) {
  1852. /* Act as if a start tag token with the tag name "tbody" had been
  1853. seen, then reprocess the current token. */
  1854. $this->emitToken([
  1855. 'name' => 'tbody',
  1856. 'type' => HTML5_Tokenizer::STARTTAG,
  1857. 'attr' => []
  1858. ]);
  1859. $this->emitToken($token);
  1860. /* A start tag whose tag name is "table" */
  1861. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1862. $token['name'] === 'table') {
  1863. /* Parse error. Act as if an end tag token with the tag name "table"
  1864. had been seen, then, if that token wasn't ignored, reprocess the
  1865. current token. */
  1866. $this->emitToken([
  1867. 'name' => 'table',
  1868. 'type' => HTML5_Tokenizer::ENDTAG
  1869. ]);
  1870. if (!$this->ignored) {
  1871. $this->emitToken($token);
  1872. }
  1873. /* An end tag whose tag name is "table" */
  1874. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1875. $token['name'] === 'table') {
  1876. /* If the stack of open elements does not have an element in table
  1877. scope with the same tag name as the token, this is a parse error.
  1878. Ignore the token. (fragment case) */
  1879. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  1880. $this->ignored = true;
  1881. } else {
  1882. do {
  1883. $node = array_pop($this->stack);
  1884. } while ($node->tagName !== 'table');
  1885. /* Reset the insertion mode appropriately. */
  1886. $this->resetInsertionMode();
  1887. }
  1888. /* An end tag whose tag name is one of: "body", "caption", "col",
  1889. "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1890. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1891. ['body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
  1892. 'tfoot', 'th', 'thead', 'tr'])) {
  1893. // Parse error. Ignore the token.
  1894. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1895. ($token['name'] === 'style' || $token['name'] === 'script')) {
  1896. $this->processWithRulesFor($token, self::IN_HEAD);
  1897. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
  1898. // assignment is intentional
  1899. /* If the token does not have an attribute with the name "type", or
  1900. * if it does, but that attribute's value is not an ASCII
  1901. * case-insensitive match for the string "hidden", then: act as
  1902. * described in the "anything else" entry below. */
  1903. ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
  1904. // I.e., if its an input with the type attribute == 'hidden'
  1905. /* Otherwise */
  1906. // parse error
  1907. $this->insertElement($token);
  1908. array_pop($this->stack);
  1909. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1910. /* If the current node is not the root html element, then this is a parse error. */
  1911. if (end($this->stack)->tagName !== 'html') {
  1912. // Note: It can only be the current node in the fragment case.
  1913. // parse error
  1914. }
  1915. /* Stop parsing. */
  1916. /* Anything else */
  1917. } else {
  1918. /* Parse error. Process the token as if the insertion mode was "in
  1919. body", with the following exception: */
  1920. $old = $this->foster_parent;
  1921. $this->foster_parent = true;
  1922. $this->processWithRulesFor($token, self::IN_BODY);
  1923. $this->foster_parent = $old;
  1924. }
  1925. break;
  1926. case self::IN_TABLE_TEXT:
  1927. /* A character token */
  1928. if ($token['type'] === HTML5_Tokenizer::CHARACTER) {
  1929. /* Append the character token to the pending table
  1930. * character tokens list. */
  1931. $this->pendingTableCharacters .= $token['data'];
  1932. $this->pendingTableCharactersDirty = true;
  1933. } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1934. $this->pendingTableCharacters .= $token['data'];
  1935. /* Anything else */
  1936. } else {
  1937. if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
  1938. /* If any of the tokens in the pending table character tokens list
  1939. * are character tokens that are not one of U+0009 CHARACTER
  1940. * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
  1941. * U+0020 SPACE, then reprocess those character tokens using the
  1942. * rules given in the "anything else" entry in the in table"
  1943. * insertion mode.*/
  1944. if ($this->pendingTableCharactersDirty) {
  1945. /* Parse error. Process the token using the rules for the
  1946. * "in body" insertion mode, except that if the current
  1947. * node is a table, tbody, tfoot, thead, or tr element,
  1948. * then, whenever a node would be inserted into the current
  1949. * node, it must instead be foster parented. */
  1950. // XERROR
  1951. $old = $this->foster_parent;
  1952. $this->foster_parent = true;
  1953. $text_token = [
  1954. 'type' => HTML5_Tokenizer::CHARACTER,
  1955. 'data' => $this->pendingTableCharacters,
  1956. ];
  1957. $this->processWithRulesFor($text_token, self::IN_BODY);
  1958. $this->foster_parent = $old;
  1959. /* Otherwise, insert the characters given by the pending table
  1960. * character tokens list into the current node. */
  1961. } else {
  1962. $this->insertText($this->pendingTableCharacters);
  1963. }
  1964. $this->pendingTableCharacters = null;
  1965. $this->pendingTableCharactersNull = null;
  1966. }
  1967. /* Switch the insertion mode to the original insertion mode and
  1968. * reprocess the token.
  1969. */
  1970. $this->mode = $this->original_mode;
  1971. $this->emitToken($token);
  1972. }
  1973. break;
  1974. case self::IN_CAPTION:
  1975. /* An end tag whose tag name is "caption" */
  1976. if ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
  1977. /* If the stack of open elements does not have an element in table
  1978. scope with the same tag name as the token, this is a parse error.
  1979. Ignore the token. (fragment case) */
  1980. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  1981. $this->ignored = true;
  1982. // Ignore
  1983. /* Otherwise: */
  1984. } else {
  1985. /* Generate implied end tags. */
  1986. $this->generateImpliedEndTags();
  1987. /* Now, if the current node is not a caption element, then this
  1988. is a parse error. */
  1989. // XERROR: implement
  1990. /* Pop elements from this stack until a caption element has
  1991. been popped from the stack. */
  1992. do {
  1993. $node = array_pop($this->stack);
  1994. } while ($node->tagName !== 'caption');
  1995. /* Clear the list of active formatting elements up to the last
  1996. marker. */
  1997. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  1998. /* Switch the insertion mode to "in table". */
  1999. $this->mode = self::IN_TABLE;
  2000. }
  2001. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2002. "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
  2003. name is "table" */
  2004. } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2005. ['caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  2006. 'thead', 'tr'])) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2007. $token['name'] === 'table')) {
  2008. /* Parse error. Act as if an end tag with the tag name "caption"
  2009. had been seen, then, if that token wasn't ignored, reprocess the
  2010. current token. */
  2011. $this->emitToken([
  2012. 'name' => 'caption',
  2013. 'type' => HTML5_Tokenizer::ENDTAG
  2014. ]);
  2015. if (!$this->ignored) {
  2016. $this->emitToken($token);
  2017. }
  2018. /* An end tag whose tag name is one of: "body", "col", "colgroup",
  2019. "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  2020. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2021. ['body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
  2022. 'thead', 'tr'])) {
  2023. // Parse error. Ignore the token.
  2024. $this->ignored = true;
  2025. } else {
  2026. /* Process the token as if the insertion mode was "in body". */
  2027. $this->processWithRulesFor($token, self::IN_BODY);
  2028. }
  2029. break;
  2030. case self::IN_COLUMN_GROUP:
  2031. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2032. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2033. or U+0020 SPACE */
  2034. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2035. /* Append the character to the current node. */
  2036. $this->insertText($token['data']);
  2037. /* A comment token */
  2038. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2039. /* Append a Comment node to the current node with the data
  2040. attribute set to the data given in the comment token. */
  2041. $this->insertComment($token['data']);
  2042. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2043. // parse error
  2044. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2045. $this->processWithRulesFor($token, self::IN_BODY);
  2046. /* A start tag whose tag name is "col" */
  2047. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
  2048. /* Insert a col element for the token. Immediately pop the current
  2049. node off the stack of open elements. */
  2050. $this->insertElement($token);
  2051. array_pop($this->stack);
  2052. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  2053. /* An end tag whose tag name is "colgroup" */
  2054. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2055. $token['name'] === 'colgroup') {
  2056. /* If the current node is the root html element, then this is a
  2057. parse error, ignore the token. (fragment case) */
  2058. if (end($this->stack)->tagName === 'html') {
  2059. $this->ignored = true;
  2060. /* Otherwise, pop the current node (which will be a colgroup
  2061. element) from the stack of open elements. Switch the insertion
  2062. mode to "in table". */
  2063. } else {
  2064. array_pop($this->stack);
  2065. $this->mode = self::IN_TABLE;
  2066. }
  2067. /* An end tag whose tag name is "col" */
  2068. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
  2069. /* Parse error. Ignore the token. */
  2070. $this->ignored = true;
  2071. /* An end-of-file token */
  2072. /* If the current node is the root html element */
  2073. } elseif ($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
  2074. /* Stop parsing */
  2075. /* Anything else */
  2076. } else {
  2077. /* Act as if an end tag with the tag name "colgroup" had been seen,
  2078. and then, if that token wasn't ignored, reprocess the current token. */
  2079. $this->emitToken([
  2080. 'name' => 'colgroup',
  2081. 'type' => HTML5_Tokenizer::ENDTAG
  2082. ]);
  2083. if (!$this->ignored) {
  2084. $this->emitToken($token);
  2085. }
  2086. }
  2087. break;
  2088. case self::IN_TABLE_BODY:
  2089. $clear = ['tbody', 'tfoot', 'thead', 'html'];
  2090. /* A start tag whose tag name is "tr" */
  2091. if ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
  2092. /* Clear the stack back to a table body context. */
  2093. $this->clearStackToTableContext($clear);
  2094. /* Insert a tr element for the token, then switch the insertion
  2095. mode to "in row". */
  2096. $this->insertElement($token);
  2097. $this->mode = self::IN_ROW;
  2098. /* A start tag whose tag name is one of: "th", "td" */
  2099. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2100. ($token['name'] === 'th' || $token['name'] === 'td')) {
  2101. /* Parse error. Act as if a start tag with the tag name "tr" had
  2102. been seen, then reprocess the current token. */
  2103. $this->emitToken([
  2104. 'name' => 'tr',
  2105. 'type' => HTML5_Tokenizer::STARTTAG,
  2106. 'attr' => []
  2107. ]);
  2108. $this->emitToken($token);
  2109. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  2110. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2111. in_array($token['name'], ['tbody', 'tfoot', 'thead'])) {
  2112. /* If the stack of open elements does not have an element in table
  2113. scope with the same tag name as the token, this is a parse error.
  2114. Ignore the token. */
  2115. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2116. // Parse error
  2117. $this->ignored = true;
  2118. /* Otherwise: */
  2119. } else {
  2120. /* Clear the stack back to a table body context. */
  2121. $this->clearStackToTableContext($clear);
  2122. /* Pop the current node from the stack of open elements. Switch
  2123. the insertion mode to "in table". */
  2124. array_pop($this->stack);
  2125. $this->mode = self::IN_TABLE;
  2126. }
  2127. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2128. "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
  2129. } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2130. ['caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'])) ||
  2131. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  2132. /* If the stack of open elements does not have a tbody, thead, or
  2133. tfoot element in table scope, this is a parse error. Ignore the
  2134. token. (fragment case) */
  2135. if (!$this->elementInScope(['tbody', 'thead', 'tfoot'], self::SCOPE_TABLE)) {
  2136. // parse error
  2137. $this->ignored = true;
  2138. /* Otherwise: */
  2139. } else {
  2140. /* Clear the stack back to a table body context. */
  2141. $this->clearStackToTableContext($clear);
  2142. /* Act as if an end tag with the same tag name as the current
  2143. node ("tbody", "tfoot", or "thead") had been seen, then
  2144. reprocess the current token. */
  2145. $this->emitToken([
  2146. 'name' => end($this->stack)->tagName,
  2147. 'type' => HTML5_Tokenizer::ENDTAG
  2148. ]);
  2149. $this->emitToken($token);
  2150. }
  2151. /* An end tag whose tag name is one of: "body", "caption", "col",
  2152. "colgroup", "html", "td", "th", "tr" */
  2153. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2154. ['body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'])) {
  2155. /* Parse error. Ignore the token. */
  2156. $this->ignored = true;
  2157. /* Anything else */
  2158. } else {
  2159. /* Process the token as if the insertion mode was "in table". */
  2160. $this->processWithRulesFor($token, self::IN_TABLE);
  2161. }
  2162. break;
  2163. case self::IN_ROW:
  2164. $clear = ['tr', 'html'];
  2165. /* A start tag whose tag name is one of: "th", "td" */
  2166. if ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2167. ($token['name'] === 'th' || $token['name'] === 'td')) {
  2168. /* Clear the stack back to a table row context. */
  2169. $this->clearStackToTableContext($clear);
  2170. /* Insert an HTML element for the token, then switch the insertion
  2171. mode to "in cell". */
  2172. $this->insertElement($token);
  2173. $this->mode = self::IN_CELL;
  2174. /* Insert a marker at the end of the list of active formatting
  2175. elements. */
  2176. $this->a_formatting[] = self::MARKER;
  2177. /* An end tag whose tag name is "tr" */
  2178. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
  2179. /* If the stack of open elements does not have an element in table
  2180. scope with the same tag name as the token, this is a parse error.
  2181. Ignore the token. (fragment case) */
  2182. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2183. // Ignore.
  2184. $this->ignored = true;
  2185. } else {
  2186. /* Clear the stack back to a table row context. */
  2187. $this->clearStackToTableContext($clear);
  2188. /* Pop the current node (which will be a tr element) from the
  2189. stack of open elements. Switch the insertion mode to "in table
  2190. body". */
  2191. array_pop($this->stack);
  2192. $this->mode = self::IN_TABLE_BODY;
  2193. }
  2194. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2195. "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
  2196. } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2197. ['caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'])) ||
  2198. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  2199. /* Act as if an end tag with the tag name "tr" had been seen, then,
  2200. if that token wasn't ignored, reprocess the current token. */
  2201. $this->emitToken([
  2202. 'name' => 'tr',
  2203. 'type' => HTML5_Tokenizer::ENDTAG
  2204. ]);
  2205. if (!$this->ignored) {
  2206. $this->emitToken($token);
  2207. }
  2208. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  2209. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2210. in_array($token['name'], ['tbody', 'tfoot', 'thead'])) {
  2211. /* If the stack of open elements does not have an element in table
  2212. scope with the same tag name as the token, this is a parse error.
  2213. Ignore the token. */
  2214. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2215. $this->ignored = true;
  2216. /* Otherwise: */
  2217. } else {
  2218. /* Otherwise, act as if an end tag with the tag name "tr" had
  2219. been seen, then reprocess the current token. */
  2220. $this->emitToken([
  2221. 'name' => 'tr',
  2222. 'type' => HTML5_Tokenizer::ENDTAG
  2223. ]);
  2224. $this->emitToken($token);
  2225. }
  2226. /* An end tag whose tag name is one of: "body", "caption", "col",
  2227. "colgroup", "html", "td", "th" */
  2228. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2229. ['body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'])) {
  2230. /* Parse error. Ignore the token. */
  2231. $this->ignored = true;
  2232. /* Anything else */
  2233. } else {
  2234. /* Process the token as if the insertion mode was "in table". */
  2235. $this->processWithRulesFor($token, self::IN_TABLE);
  2236. }
  2237. break;
  2238. case self::IN_CELL:
  2239. /* An end tag whose tag name is one of: "td", "th" */
  2240. if ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2241. ($token['name'] === 'td' || $token['name'] === 'th')) {
  2242. /* If the stack of open elements does not have an element in table
  2243. scope with the same tag name as that of the token, then this is a
  2244. parse error and the token must be ignored. */
  2245. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2246. $this->ignored = true;
  2247. /* Otherwise: */
  2248. } else {
  2249. /* Generate implied end tags, except for elements with the same
  2250. tag name as the token. */
  2251. $this->generateImpliedEndTags([$token['name']]);
  2252. /* Now, if the current node is not an element with the same tag
  2253. name as the token, then this is a parse error. */
  2254. // XERROR: Implement parse error code
  2255. /* Pop elements from this stack until an element with the same
  2256. tag name as the token has been popped from the stack. */
  2257. do {
  2258. $node = array_pop($this->stack);
  2259. } while ($node->tagName !== $token['name']);
  2260. /* Clear the list of active formatting elements up to the last
  2261. marker. */
  2262. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  2263. /* Switch the insertion mode to "in row". (The current node
  2264. will be a tr element at this point.) */
  2265. $this->mode = self::IN_ROW;
  2266. }
  2267. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2268. "tbody", "td", "tfoot", "th", "thead", "tr" */
  2269. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2270. ['caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  2271. 'thead', 'tr'])) {
  2272. /* If the stack of open elements does not have a td or th element
  2273. in table scope, then this is a parse error; ignore the token.
  2274. (fragment case) */
  2275. if (!$this->elementInScope(['td', 'th'], self::SCOPE_TABLE)) {
  2276. // parse error
  2277. $this->ignored = true;
  2278. /* Otherwise, close the cell (see below) and reprocess the current
  2279. token. */
  2280. } else {
  2281. $this->closeCell();
  2282. $this->emitToken($token);
  2283. }
  2284. /* An end tag whose tag name is one of: "body", "caption", "col",
  2285. "colgroup", "html" */
  2286. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2287. ['body', 'caption', 'col', 'colgroup', 'html'])) {
  2288. /* Parse error. Ignore the token. */
  2289. $this->ignored = true;
  2290. /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
  2291. "thead", "tr" */
  2292. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2293. ['table', 'tbody', 'tfoot', 'thead', 'tr'])) {
  2294. /* If the stack of open elements does not have a td or th element
  2295. in table scope, then this is a parse error; ignore the token.
  2296. (innerHTML case) */
  2297. if (!$this->elementInScope(['td', 'th'], self::SCOPE_TABLE)) {
  2298. // Parse error
  2299. $this->ignored = true;
  2300. /* Otherwise, close the cell (see below) and reprocess the current
  2301. token. */
  2302. } else {
  2303. $this->closeCell();
  2304. $this->emitToken($token);
  2305. }
  2306. /* Anything else */
  2307. } else {
  2308. /* Process the token as if the insertion mode was "in body". */
  2309. $this->processWithRulesFor($token, self::IN_BODY);
  2310. }
  2311. break;
  2312. case self::IN_SELECT:
  2313. /* Handle the token as follows: */
  2314. /* A character token */
  2315. if (
  2316. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  2317. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  2318. ) {
  2319. /* Append the token's character to the current node. */
  2320. $this->insertText($token['data']);
  2321. /* A comment token */
  2322. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2323. /* Append a Comment node to the current node with the data
  2324. attribute set to the data given in the comment token. */
  2325. $this->insertComment($token['data']);
  2326. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2327. // parse error
  2328. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2329. $this->processWithRulesFor($token, self::IN_BODY);
  2330. /* A start tag token whose tag name is "option" */
  2331. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2332. $token['name'] === 'option') {
  2333. /* If the current node is an option element, act as if an end tag
  2334. with the tag name "option" had been seen. */
  2335. if (end($this->stack)->tagName === 'option') {
  2336. $this->emitToken([
  2337. 'name' => 'option',
  2338. 'type' => HTML5_Tokenizer::ENDTAG
  2339. ]);
  2340. }
  2341. /* Insert an HTML element for the token. */
  2342. $this->insertElement($token);
  2343. /* A start tag token whose tag name is "optgroup" */
  2344. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2345. $token['name'] === 'optgroup') {
  2346. /* If the current node is an option element, act as if an end tag
  2347. with the tag name "option" had been seen. */
  2348. if (end($this->stack)->tagName === 'option') {
  2349. $this->emitToken([
  2350. 'name' => 'option',
  2351. 'type' => HTML5_Tokenizer::ENDTAG
  2352. ]);
  2353. }
  2354. /* If the current node is an optgroup element, act as if an end tag
  2355. with the tag name "optgroup" had been seen. */
  2356. if (end($this->stack)->tagName === 'optgroup') {
  2357. $this->emitToken([
  2358. 'name' => 'optgroup',
  2359. 'type' => HTML5_Tokenizer::ENDTAG
  2360. ]);
  2361. }
  2362. /* Insert an HTML element for the token. */
  2363. $this->insertElement($token);
  2364. /* An end tag token whose tag name is "optgroup" */
  2365. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2366. $token['name'] === 'optgroup') {
  2367. /* First, if the current node is an option element, and the node
  2368. immediately before it in the stack of open elements is an optgroup
  2369. element, then act as if an end tag with the tag name "option" had
  2370. been seen. */
  2371. $elements_in_stack = count($this->stack);
  2372. if ($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
  2373. $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
  2374. $this->emitToken([
  2375. 'name' => 'option',
  2376. 'type' => HTML5_Tokenizer::ENDTAG
  2377. ]);
  2378. }
  2379. /* If the current node is an optgroup element, then pop that node
  2380. from the stack of open elements. Otherwise, this is a parse error,
  2381. ignore the token. */
  2382. if (end($this->stack)->tagName === 'optgroup') {
  2383. array_pop($this->stack);
  2384. } else {
  2385. // parse error
  2386. $this->ignored = true;
  2387. }
  2388. /* An end tag token whose tag name is "option" */
  2389. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2390. $token['name'] === 'option') {
  2391. /* If the current node is an option element, then pop that node
  2392. from the stack of open elements. Otherwise, this is a parse error,
  2393. ignore the token. */
  2394. if (end($this->stack)->tagName === 'option') {
  2395. array_pop($this->stack);
  2396. } else {
  2397. // parse error
  2398. $this->ignored = true;
  2399. }
  2400. /* An end tag whose tag name is "select" */
  2401. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2402. $token['name'] === 'select') {
  2403. /* If the stack of open elements does not have an element in table
  2404. scope with the same tag name as the token, this is a parse error.
  2405. Ignore the token. (fragment case) */
  2406. if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2407. $this->ignored = true;
  2408. // parse error
  2409. /* Otherwise: */
  2410. } else {
  2411. /* Pop elements from the stack of open elements until a select
  2412. element has been popped from the stack. */
  2413. do {
  2414. $node = array_pop($this->stack);
  2415. } while ($node->tagName !== 'select');
  2416. /* Reset the insertion mode appropriately. */
  2417. $this->resetInsertionMode();
  2418. }
  2419. /* A start tag whose tag name is "select" */
  2420. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
  2421. /* Parse error. Act as if the token had been an end tag with the
  2422. tag name "select" instead. */
  2423. $this->emitToken([
  2424. 'name' => 'select',
  2425. 'type' => HTML5_Tokenizer::ENDTAG
  2426. ]);
  2427. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2428. ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) {
  2429. // parse error
  2430. $this->emitToken([
  2431. 'name' => 'select',
  2432. 'type' => HTML5_Tokenizer::ENDTAG
  2433. ]);
  2434. $this->emitToken($token);
  2435. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  2436. $this->processWithRulesFor($token, self::IN_HEAD);
  2437. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  2438. // XERROR: If the current node is not the root html element, then this is a parse error.
  2439. /* Stop parsing */
  2440. /* Anything else */
  2441. } else {
  2442. /* Parse error. Ignore the token. */
  2443. $this->ignored = true;
  2444. }
  2445. break;
  2446. case self::IN_SELECT_IN_TABLE:
  2447. if ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2448. in_array($token['name'], ['caption', 'table', 'tbody',
  2449. 'tfoot', 'thead', 'tr', 'td', 'th'])) {
  2450. // parse error
  2451. $this->emitToken([
  2452. 'name' => 'select',
  2453. 'type' => HTML5_Tokenizer::ENDTAG,
  2454. ]);
  2455. $this->emitToken($token);
  2456. /* An end tag whose tag name is one of: "caption", "table", "tbody",
  2457. "tfoot", "thead", "tr", "td", "th" */
  2458. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2459. in_array($token['name'], ['caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'])) {
  2460. /* Parse error. */
  2461. // parse error
  2462. /* If the stack of open elements has an element in table scope with
  2463. the same tag name as that of the token, then act as if an end tag
  2464. with the tag name "select" had been seen, and reprocess the token.
  2465. Otherwise, ignore the token. */
  2466. if ($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2467. $this->emitToken([
  2468. 'name' => 'select',
  2469. 'type' => HTML5_Tokenizer::ENDTAG
  2470. ]);
  2471. $this->emitToken($token);
  2472. } else {
  2473. $this->ignored = true;
  2474. }
  2475. } else {
  2476. $this->processWithRulesFor($token, self::IN_SELECT);
  2477. }
  2478. break;
  2479. case self::IN_FOREIGN_CONTENT:
  2480. if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
  2481. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2482. $this->insertText($token['data']);
  2483. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2484. $this->insertComment($token['data']);
  2485. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2486. // XERROR: parse error
  2487. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2488. $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
  2489. // XDOM
  2490. end($this->stack)->namespaceURI === self::NS_SVG) {
  2491. array_pop($this->stack);
  2492. // a bunch of script running mumbo jumbo
  2493. } elseif (
  2494. ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2495. ((
  2496. $token['name'] !== 'mglyph' &&
  2497. $token['name'] !== 'malignmark' &&
  2498. // XDOM
  2499. end($this->stack)->namespaceURI === self::NS_MATHML &&
  2500. in_array(end($this->stack)->tagName, ['mi', 'mo', 'mn', 'ms', 'mtext'])
  2501. ) ||
  2502. (
  2503. $token['name'] === 'svg' &&
  2504. // XDOM
  2505. end($this->stack)->namespaceURI === self::NS_MATHML &&
  2506. end($this->stack)->tagName === 'annotation-xml'
  2507. ) ||
  2508. (
  2509. // XDOM
  2510. end($this->stack)->namespaceURI === self::NS_SVG &&
  2511. in_array(end($this->stack)->tagName, ['foreignObject', 'desc', 'title'])
  2512. ) ||
  2513. (
  2514. // XSKETCHY && XDOM
  2515. end($this->stack)->namespaceURI === self::NS_HTML
  2516. ))
  2517. ) || $token['type'] === HTML5_Tokenizer::ENDTAG
  2518. ) {
  2519. $this->processWithRulesFor($token, $this->secondary_mode);
  2520. /* If, after doing so, the insertion mode is still "in foreign
  2521. * content", but there is no element in scope that has a namespace
  2522. * other than the HTML namespace, switch the insertion mode to the
  2523. * secondary insertion mode. */
  2524. if ($this->mode === self::IN_FOREIGN_CONTENT) {
  2525. $found = false;
  2526. // this basically duplicates elementInScope()
  2527. for ($i = count($this->stack) - 1; $i >= 0; $i--) {
  2528. // XDOM
  2529. $node = $this->stack[$i];
  2530. if ($node->namespaceURI !== self::NS_HTML) {
  2531. $found = true;
  2532. break;
  2533. } elseif (in_array($node->tagName, ['table', 'html',
  2534. 'applet', 'caption', 'td', 'th', 'button', 'marquee',
  2535. 'object']) || ($node->tagName === 'foreignObject' &&
  2536. $node->namespaceURI === self::NS_SVG)) {
  2537. break;
  2538. }
  2539. }
  2540. if (!$found) {
  2541. $this->mode = $this->secondary_mode;
  2542. }
  2543. }
  2544. } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
  2545. $token['type'] === HTML5_Tokenizer::STARTTAG &&
  2546. (in_array($token['name'], ['b', "big", "blockquote", "body", "br",
  2547. "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
  2548. "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
  2549. "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
  2550. "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
  2551. "var"]) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
  2552. $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
  2553. // XERROR: parse error
  2554. do {
  2555. $node = array_pop($this->stack);
  2556. // XDOM
  2557. } while ($node->namespaceURI !== self::NS_HTML);
  2558. $this->stack[] = $node;
  2559. $this->mode = $this->secondary_mode;
  2560. $this->emitToken($token);
  2561. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
  2562. static $svg_lookup = [
  2563. 'altglyph' => 'altGlyph',
  2564. 'altglyphdef' => 'altGlyphDef',
  2565. 'altglyphitem' => 'altGlyphItem',
  2566. 'animatecolor' => 'animateColor',
  2567. 'animatemotion' => 'animateMotion',
  2568. 'animatetransform' => 'animateTransform',
  2569. 'clippath' => 'clipPath',
  2570. 'feblend' => 'feBlend',
  2571. 'fecolormatrix' => 'feColorMatrix',
  2572. 'fecomponenttransfer' => 'feComponentTransfer',
  2573. 'fecomposite' => 'feComposite',
  2574. 'feconvolvematrix' => 'feConvolveMatrix',
  2575. 'fediffuselighting' => 'feDiffuseLighting',
  2576. 'fedisplacementmap' => 'feDisplacementMap',
  2577. 'fedistantlight' => 'feDistantLight',
  2578. 'feflood' => 'feFlood',
  2579. 'fefunca' => 'feFuncA',
  2580. 'fefuncb' => 'feFuncB',
  2581. 'fefuncg' => 'feFuncG',
  2582. 'fefuncr' => 'feFuncR',
  2583. 'fegaussianblur' => 'feGaussianBlur',
  2584. 'feimage' => 'feImage',
  2585. 'femerge' => 'feMerge',
  2586. 'femergenode' => 'feMergeNode',
  2587. 'femorphology' => 'feMorphology',
  2588. 'feoffset' => 'feOffset',
  2589. 'fepointlight' => 'fePointLight',
  2590. 'fespecularlighting' => 'feSpecularLighting',
  2591. 'fespotlight' => 'feSpotLight',
  2592. 'fetile' => 'feTile',
  2593. 'feturbulence' => 'feTurbulence',
  2594. 'foreignobject' => 'foreignObject',
  2595. 'glyphref' => 'glyphRef',
  2596. 'lineargradient' => 'linearGradient',
  2597. 'radialgradient' => 'radialGradient',
  2598. 'textpath' => 'textPath',
  2599. ];
  2600. // XDOM
  2601. $current = end($this->stack);
  2602. if ($current->namespaceURI === self::NS_MATHML) {
  2603. $token = $this->adjustMathMLAttributes($token);
  2604. }
  2605. if ($current->namespaceURI === self::NS_SVG &&
  2606. isset($svg_lookup[$token['name']])) {
  2607. $token['name'] = $svg_lookup[$token['name']];
  2608. }
  2609. if ($current->namespaceURI === self::NS_SVG) {
  2610. $token = $this->adjustSVGAttributes($token);
  2611. }
  2612. $token = $this->adjustForeignAttributes($token);
  2613. $this->insertForeignElement($token, $current->namespaceURI);
  2614. if (isset($token['self-closing'])) {
  2615. array_pop($this->stack);
  2616. // XERROR: acknowledge self-closing flag
  2617. }
  2618. }
  2619. break;
  2620. case self::AFTER_BODY:
  2621. /* Handle the token as follows: */
  2622. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2623. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2624. or U+0020 SPACE */
  2625. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2626. /* Process the token as it would be processed if the insertion mode
  2627. was "in body". */
  2628. $this->processWithRulesFor($token, self::IN_BODY);
  2629. /* A comment token */
  2630. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2631. /* Append a Comment node to the first element in the stack of open
  2632. elements (the html element), with the data attribute set to the
  2633. data given in the comment token. */
  2634. // XDOM
  2635. $comment = $this->dom->createComment($token['data']);
  2636. $this->stack[0]->appendChild($comment);
  2637. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2638. // parse error
  2639. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2640. $this->processWithRulesFor($token, self::IN_BODY);
  2641. /* An end tag with the tag name "html" */
  2642. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
  2643. /* If the parser was originally created as part of the HTML
  2644. * fragment parsing algorithm, this is a parse error; ignore
  2645. * the token. (fragment case) */
  2646. $this->ignored = true;
  2647. // XERROR: implement this
  2648. $this->mode = self::AFTER_AFTER_BODY;
  2649. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  2650. /* Stop parsing */
  2651. /* Anything else */
  2652. } else {
  2653. /* Parse error. Set the insertion mode to "in body" and reprocess
  2654. the token. */
  2655. $this->mode = self::IN_BODY;
  2656. $this->emitToken($token);
  2657. }
  2658. break;
  2659. case self::IN_FRAMESET:
  2660. /* Handle the token as follows: */
  2661. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2662. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2663. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
  2664. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2665. /* Append the character to the current node. */
  2666. $this->insertText($token['data']);
  2667. /* A comment token */
  2668. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2669. /* Append a Comment node to the current node with the data
  2670. attribute set to the data given in the comment token. */
  2671. $this->insertComment($token['data']);
  2672. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2673. // parse error
  2674. /* A start tag with the tag name "frameset" */
  2675. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2676. $token['name'] === 'frameset') {
  2677. $this->insertElement($token);
  2678. /* An end tag with the tag name "frameset" */
  2679. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2680. $token['name'] === 'frameset') {
  2681. /* If the current node is the root html element, then this is a
  2682. parse error; ignore the token. (fragment case) */
  2683. if (end($this->stack)->tagName === 'html') {
  2684. $this->ignored = true;
  2685. // Parse error
  2686. } else {
  2687. /* Otherwise, pop the current node from the stack of open
  2688. elements. */
  2689. array_pop($this->stack);
  2690. /* If the parser was not originally created as part of the HTML
  2691. * fragment parsing algorithm (fragment case), and the current
  2692. * node is no longer a frameset element, then switch the
  2693. * insertion mode to "after frameset". */
  2694. $this->mode = self::AFTER_FRAMESET;
  2695. }
  2696. /* A start tag with the tag name "frame" */
  2697. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2698. $token['name'] === 'frame') {
  2699. /* Insert an HTML element for the token. */
  2700. $this->insertElement($token);
  2701. /* Immediately pop the current node off the stack of open elements. */
  2702. array_pop($this->stack);
  2703. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  2704. /* A start tag with the tag name "noframes" */
  2705. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2706. $token['name'] === 'noframes') {
  2707. /* Process the token using the rules for the "in head" insertion mode. */
  2708. $this->processwithRulesFor($token, self::IN_HEAD);
  2709. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  2710. // XERROR: If the current node is not the root html element, then this is a parse error.
  2711. /* Stop parsing */
  2712. /* Anything else */
  2713. } else {
  2714. /* Parse error. Ignore the token. */
  2715. $this->ignored = true;
  2716. }
  2717. break;
  2718. case self::AFTER_FRAMESET:
  2719. /* Handle the token as follows: */
  2720. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2721. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2722. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
  2723. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2724. /* Append the character to the current node. */
  2725. $this->insertText($token['data']);
  2726. /* A comment token */
  2727. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2728. /* Append a Comment node to the current node with the data
  2729. attribute set to the data given in the comment token. */
  2730. $this->insertComment($token['data']);
  2731. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2732. // parse error
  2733. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2734. $this->processWithRulesFor($token, self::IN_BODY);
  2735. /* An end tag with the tag name "html" */
  2736. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2737. $token['name'] === 'html') {
  2738. $this->mode = self::AFTER_AFTER_FRAMESET;
  2739. /* A start tag with the tag name "noframes" */
  2740. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2741. $token['name'] === 'noframes') {
  2742. $this->processWithRulesFor($token, self::IN_HEAD);
  2743. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  2744. /* Stop parsing */
  2745. /* Anything else */
  2746. } else {
  2747. /* Parse error. Ignore the token. */
  2748. $this->ignored = true;
  2749. }
  2750. break;
  2751. case self::AFTER_AFTER_BODY:
  2752. /* A comment token */
  2753. if ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2754. /* Append a Comment node to the Document object with the data
  2755. attribute set to the data given in the comment token. */
  2756. // XDOM
  2757. $comment = $this->dom->createComment($token['data']);
  2758. $this->dom->appendChild($comment);
  2759. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE ||
  2760. $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
  2761. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
  2762. $this->processWithRulesFor($token, self::IN_BODY);
  2763. /* An end-of-file token */
  2764. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  2765. /* OMG DONE!! */
  2766. } else {
  2767. // parse error
  2768. $this->mode = self::IN_BODY;
  2769. $this->emitToken($token);
  2770. }
  2771. break;
  2772. case self::AFTER_AFTER_FRAMESET:
  2773. /* A comment token */
  2774. if ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2775. /* Append a Comment node to the Document object with the data
  2776. attribute set to the data given in the comment token. */
  2777. // XDOM
  2778. $comment = $this->dom->createComment($token['data']);
  2779. $this->dom->appendChild($comment);
  2780. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE ||
  2781. $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
  2782. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
  2783. $this->processWithRulesFor($token, self::IN_BODY);
  2784. /* An end-of-file token */
  2785. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  2786. /* OMG DONE!! */
  2787. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
  2788. $this->processWithRulesFor($token, self::IN_HEAD);
  2789. } else {
  2790. // parse error
  2791. }
  2792. break;
  2793. }
  2794. }
  2795. private function insertElement($token, $append = true) {
  2796. $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
  2797. if (!empty($token['attr'])) {
  2798. foreach ($token['attr'] as $attr) {
  2799. if (!$el->hasAttribute($attr['name']) && preg_match("/^[a-zA-Z_:]/", $attr['name'])) {
  2800. $el->setAttribute($attr['name'], $attr['value']);
  2801. }
  2802. }
  2803. }
  2804. if ($append) {
  2805. $this->appendToRealParent($el);
  2806. $this->stack[] = $el;
  2807. }
  2808. return $el;
  2809. }
  2810. /**
  2811. * @param $data
  2812. */
  2813. private function insertText($data) {
  2814. if ($data === '') {
  2815. return;
  2816. }
  2817. if ($this->ignore_lf_token) {
  2818. if ($data[0] === "\n") {
  2819. $data = substr($data, 1);
  2820. if ($data === false) {
  2821. return;
  2822. }
  2823. }
  2824. }
  2825. $text = $this->dom->createTextNode($data);
  2826. $this->appendToRealParent($text);
  2827. }
  2828. /**
  2829. * @param $data
  2830. */
  2831. private function insertComment($data) {
  2832. $comment = $this->dom->createComment($data);
  2833. $this->appendToRealParent($comment);
  2834. }
  2835. /**
  2836. * @param $node
  2837. */
  2838. private function appendToRealParent($node) {
  2839. // this is only for the foster_parent case
  2840. /* If the current node is a table, tbody, tfoot, thead, or tr
  2841. element, then, whenever a node would be inserted into the current
  2842. node, it must instead be inserted into the foster parent element. */
  2843. if (
  2844. !$this->foster_parent ||
  2845. !in_array(
  2846. end($this->stack)->tagName,
  2847. ['table', 'tbody', 'tfoot', 'thead', 'tr']
  2848. )
  2849. ) {
  2850. end($this->stack)->appendChild($node);
  2851. } else {
  2852. $this->fosterParent($node);
  2853. }
  2854. }
  2855. /**
  2856. * @param $el
  2857. * @param int $scope
  2858. * @return bool|null
  2859. */
  2860. private function elementInScope($el, $scope = self::SCOPE) {
  2861. if (is_array($el)) {
  2862. foreach($el as $element) {
  2863. if ($this->elementInScope($element, $scope)) {
  2864. return true;
  2865. }
  2866. }
  2867. return false;
  2868. }
  2869. $leng = count($this->stack);
  2870. for ($n = 0; $n < $leng; $n++) {
  2871. /* 1. Initialise node to be the current node (the bottommost node of
  2872. the stack). */
  2873. $node = $this->stack[$leng - 1 - $n];
  2874. if ($node->tagName === $el) {
  2875. /* 2. If node is the target node, terminate in a match state. */
  2876. return true;
  2877. // We've expanded the logic for these states a little differently;
  2878. // Hixie's refactoring into "specific scope" is more general, but
  2879. // this "gets the job done"
  2880. // these are the common states for all scopes
  2881. } elseif ($node->tagName === 'table' || $node->tagName === 'html') {
  2882. return false;
  2883. // these are valid for "in scope" and "in list item scope"
  2884. } elseif ($scope !== self::SCOPE_TABLE &&
  2885. (in_array($node->tagName, ['applet', 'caption', 'td',
  2886. 'th', 'button', 'marquee', 'object']) ||
  2887. $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
  2888. return false;
  2889. // these are valid for "in list item scope"
  2890. } elseif ($scope === self::SCOPE_LISTITEM && in_array($node->tagName, ['ol', 'ul'])) {
  2891. return false;
  2892. }
  2893. /* Otherwise, set node to the previous entry in the stack of open
  2894. elements and return to step 2. (This will never fail, since the loop
  2895. will always terminate in the previous step if the top of the stack
  2896. is reached.) */
  2897. }
  2898. // To fix warning. This never happens or should return true/false
  2899. return null;
  2900. }
  2901. /**
  2902. * @return bool
  2903. */
  2904. private function reconstructActiveFormattingElements() {
  2905. /* 1. If there are no entries in the list of active formatting elements,
  2906. then there is nothing to reconstruct; stop this algorithm. */
  2907. $formatting_elements = count($this->a_formatting);
  2908. if ($formatting_elements === 0) {
  2909. return false;
  2910. }
  2911. /* 3. Let entry be the last (most recently added) element in the list
  2912. of active formatting elements. */
  2913. $entry = end($this->a_formatting);
  2914. /* 2. If the last (most recently added) entry in the list of active
  2915. formatting elements is a marker, or if it is an element that is in the
  2916. stack of open elements, then there is nothing to reconstruct; stop this
  2917. algorithm. */
  2918. if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
  2919. return false;
  2920. }
  2921. for ($a = $formatting_elements - 1; $a >= 0; true) {
  2922. /* 4. If there are no entries before entry in the list of active
  2923. formatting elements, then jump to step 8. */
  2924. if ($a === 0) {
  2925. $step_seven = false;
  2926. break;
  2927. }
  2928. /* 5. Let entry be the entry one earlier than entry in the list of
  2929. active formatting elements. */
  2930. $a--;
  2931. $entry = $this->a_formatting[$a];
  2932. /* 6. If entry is neither a marker nor an element that is also in
  2933. thetack of open elements, go to step 4. */
  2934. if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
  2935. break;
  2936. }
  2937. }
  2938. while (true) {
  2939. /* 7. Let entry be the element one later than entry in the list of
  2940. active formatting elements. */
  2941. if (isset($step_seven) && $step_seven === true) {
  2942. $a++;
  2943. $entry = $this->a_formatting[$a];
  2944. }
  2945. /* 8. Perform a shallow clone of the element entry to obtain clone. */
  2946. $clone = $entry->cloneNode();
  2947. /* 9. Append clone to the current node and push it onto the stack
  2948. of open elements so that it is the new current node. */
  2949. $this->appendToRealParent($clone);
  2950. $this->stack[] = $clone;
  2951. /* 10. Replace the entry for entry in the list with an entry for
  2952. clone. */
  2953. $this->a_formatting[$a] = $clone;
  2954. /* 11. If the entry for clone in the list of active formatting
  2955. elements is not the last entry in the list, return to step 7. */
  2956. if (end($this->a_formatting) !== $clone) {
  2957. $step_seven = true;
  2958. } else {
  2959. break;
  2960. }
  2961. }
  2962. // Return value not in use ATM. Would just make sense to also return true here.
  2963. return true;
  2964. }
  2965. /**
  2966. *
  2967. */
  2968. private function clearTheActiveFormattingElementsUpToTheLastMarker() {
  2969. /* When the steps below require the UA to clear the list of active
  2970. formatting elements up to the last marker, the UA must perform the
  2971. following steps: */
  2972. while (true) {
  2973. /* 1. Let entry be the last (most recently added) entry in the list
  2974. of active formatting elements. */
  2975. $entry = end($this->a_formatting);
  2976. /* 2. Remove entry from the list of active formatting elements. */
  2977. array_pop($this->a_formatting);
  2978. /* 3. If entry was a marker, then stop the algorithm at this point.
  2979. The list has been cleared up to the last marker. */
  2980. if ($entry === self::MARKER) {
  2981. break;
  2982. }
  2983. }
  2984. }
  2985. /**
  2986. * @param array $exclude
  2987. */
  2988. private function generateImpliedEndTags($exclude = []) {
  2989. /* When the steps below require the UA to generate implied end tags,
  2990. * then, while the current node is a dc element, a dd element, a ds
  2991. * element, a dt element, an li element, an option element, an optgroup
  2992. * element, a p element, an rp element, or an rt element, the UA must
  2993. * pop the current node off the stack of open elements. */
  2994. $node = end($this->stack);
  2995. $elements = array_diff(['dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'], $exclude);
  2996. while (in_array(end($this->stack)->tagName, $elements)) {
  2997. array_pop($this->stack);
  2998. }
  2999. }
  3000. /**
  3001. * @param $node
  3002. * @return int
  3003. */
  3004. private function getElementCategory($node) {
  3005. if (!is_object($node)) {
  3006. debug_print_backtrace();
  3007. }
  3008. $name = $node->tagName;
  3009. if (in_array($name, $this->special)) {
  3010. return self::SPECIAL;
  3011. } elseif (in_array($name, $this->scoping)) {
  3012. return self::SCOPING;
  3013. } elseif (in_array($name, $this->formatting)) {
  3014. return self::FORMATTING;
  3015. } else {
  3016. return self::PHRASING;
  3017. }
  3018. }
  3019. /**
  3020. * @param $elements
  3021. */
  3022. private function clearStackToTableContext($elements) {
  3023. /* When the steps above require the UA to clear the stack back to a
  3024. table context, it means that the UA must, while the current node is not
  3025. a table element or an html element, pop elements from the stack of open
  3026. elements. */
  3027. while (true) {
  3028. $name = end($this->stack)->tagName;
  3029. if (in_array($name, $elements)) {
  3030. break;
  3031. } else {
  3032. array_pop($this->stack);
  3033. }
  3034. }
  3035. }
  3036. /**
  3037. * @param null $context
  3038. */
  3039. private function resetInsertionMode($context = null) {
  3040. /* 1. Let last be false. */
  3041. $last = false;
  3042. $leng = count($this->stack);
  3043. for ($n = $leng - 1; $n >= 0; $n--) {
  3044. /* 2. Let node be the last node in the stack of open elements. */
  3045. $node = $this->stack[$n];
  3046. /* 3. If node is the first node in the stack of open elements, then
  3047. * set last to true and set node to the context element. (fragment
  3048. * case) */
  3049. if ($this->stack[0]->isSameNode($node)) {
  3050. $last = true;
  3051. $node = $context;
  3052. }
  3053. /* 4. If node is a select element, then switch the insertion mode to
  3054. "in select" and abort these steps. (fragment case) */
  3055. if ($node->tagName === 'select') {
  3056. $this->mode = self::IN_SELECT;
  3057. break;
  3058. /* 5. If node is a td or th element, then switch the insertion mode
  3059. to "in cell" and abort these steps. */
  3060. } elseif ($node->tagName === 'td' || $node->nodeName === 'th') {
  3061. $this->mode = self::IN_CELL;
  3062. break;
  3063. /* 6. If node is a tr element, then switch the insertion mode to
  3064. "in row" and abort these steps. */
  3065. } elseif ($node->tagName === 'tr') {
  3066. $this->mode = self::IN_ROW;
  3067. break;
  3068. /* 7. If node is a tbody, thead, or tfoot element, then switch the
  3069. insertion mode to "in table body" and abort these steps. */
  3070. } elseif (in_array($node->tagName, ['tbody', 'thead', 'tfoot'])) {
  3071. $this->mode = self::IN_TABLE_BODY;
  3072. break;
  3073. /* 8. If node is a caption element, then switch the insertion mode
  3074. to "in caption" and abort these steps. */
  3075. } elseif ($node->tagName === 'caption') {
  3076. $this->mode = self::IN_CAPTION;
  3077. break;
  3078. /* 9. If node is a colgroup element, then switch the insertion mode
  3079. to "in column group" and abort these steps. (innerHTML case) */
  3080. } elseif ($node->tagName === 'colgroup') {
  3081. $this->mode = self::IN_COLUMN_GROUP;
  3082. break;
  3083. /* 10. If node is a table element, then switch the insertion mode
  3084. to "in table" and abort these steps. */
  3085. } elseif ($node->tagName === 'table') {
  3086. $this->mode = self::IN_TABLE;
  3087. break;
  3088. /* 11. If node is an element from the MathML namespace or the SVG
  3089. * namespace, then switch the insertion mode to "in foreign
  3090. * content", let the secondary insertion mode be "in body", and
  3091. * abort these steps. */
  3092. } elseif ($node->namespaceURI === self::NS_SVG ||
  3093. $node->namespaceURI === self::NS_MATHML) {
  3094. $this->mode = self::IN_FOREIGN_CONTENT;
  3095. $this->secondary_mode = self::IN_BODY;
  3096. break;
  3097. /* 12. If node is a head element, then switch the insertion mode
  3098. to "in body" ("in body"! not "in head"!) and abort these steps.
  3099. (fragment case) */
  3100. } elseif ($node->tagName === 'head') {
  3101. $this->mode = self::IN_BODY;
  3102. break;
  3103. /* 13. If node is a body element, then switch the insertion mode to
  3104. "in body" and abort these steps. */
  3105. } elseif ($node->tagName === 'body') {
  3106. $this->mode = self::IN_BODY;
  3107. break;
  3108. /* 14. If node is a frameset element, then switch the insertion
  3109. mode to "in frameset" and abort these steps. (fragment case) */
  3110. } elseif ($node->tagName === 'frameset') {
  3111. $this->mode = self::IN_FRAMESET;
  3112. break;
  3113. /* 15. If node is an html element, then: if the head element
  3114. pointer is null, switch the insertion mode to "before head",
  3115. otherwise, switch the insertion mode to "after head". In either
  3116. case, abort these steps. (fragment case) */
  3117. } elseif ($node->tagName === 'html') {
  3118. $this->mode = ($this->head_pointer === null)
  3119. ? self::BEFORE_HEAD
  3120. : self::AFTER_HEAD;
  3121. break;
  3122. /* 16. If last is true, then set the insertion mode to "in body"
  3123. and abort these steps. (fragment case) */
  3124. } elseif ($last) {
  3125. $this->mode = self::IN_BODY;
  3126. break;
  3127. }
  3128. }
  3129. }
  3130. /**
  3131. *
  3132. */
  3133. private function closeCell() {
  3134. /* If the stack of open elements has a td or th element in table scope,
  3135. then act as if an end tag token with that tag name had been seen. */
  3136. foreach (['td', 'th'] as $cell) {
  3137. if ($this->elementInScope($cell, self::SCOPE_TABLE)) {
  3138. $this->emitToken([
  3139. 'name' => $cell,
  3140. 'type' => HTML5_Tokenizer::ENDTAG
  3141. ]);
  3142. break;
  3143. }
  3144. }
  3145. }
  3146. /**
  3147. * @param $token
  3148. * @param $mode
  3149. */
  3150. private function processWithRulesFor($token, $mode) {
  3151. /* "using the rules for the m insertion mode", where m is one of these
  3152. * modes, the user agent must use the rules described under the m
  3153. * insertion mode's section, but must leave the insertion mode
  3154. * unchanged unless the rules in m themselves switch the insertion mode
  3155. * to a new value. */
  3156. $this->emitToken($token, $mode);
  3157. }
  3158. /**
  3159. * @param $token
  3160. */
  3161. private function insertCDATAElement($token) {
  3162. $this->insertElement($token);
  3163. $this->original_mode = $this->mode;
  3164. $this->mode = self::IN_CDATA_RCDATA;
  3165. $this->content_model = HTML5_Tokenizer::CDATA;
  3166. }
  3167. /**
  3168. * @param $token
  3169. */
  3170. private function insertRCDATAElement($token) {
  3171. $this->insertElement($token);
  3172. $this->original_mode = $this->mode;
  3173. $this->mode = self::IN_CDATA_RCDATA;
  3174. $this->content_model = HTML5_Tokenizer::RCDATA;
  3175. }
  3176. /**
  3177. * @param $token
  3178. * @param $key
  3179. * @return bool
  3180. */
  3181. private function getAttr($token, $key) {
  3182. if (!isset($token['attr'])) {
  3183. return false;
  3184. }
  3185. $ret = false;
  3186. foreach ($token['attr'] as $keypair) {
  3187. if ($keypair['name'] === $key) {
  3188. $ret = $keypair['value'];
  3189. }
  3190. }
  3191. return $ret;
  3192. }
  3193. /**
  3194. * @return mixed
  3195. */
  3196. private function getCurrentTable() {
  3197. /* The current table is the last table element in the stack of open
  3198. * elements, if there is one. If there is no table element in the stack
  3199. * of open elements (fragment case), then the current table is the
  3200. * first element in the stack of open elements (the html element). */
  3201. for ($i = count($this->stack) - 1; $i >= 0; $i--) {
  3202. if ($this->stack[$i]->tagName === 'table') {
  3203. return $this->stack[$i];
  3204. }
  3205. }
  3206. return $this->stack[0];
  3207. }
  3208. /**
  3209. * @return mixed
  3210. */
  3211. private function getFosterParent() {
  3212. /* The foster parent element is the parent element of the last
  3213. table element in the stack of open elements, if there is a
  3214. table element and it has such a parent element. If there is no
  3215. table element in the stack of open elements (innerHTML case),
  3216. then the foster parent element is the first element in the
  3217. stack of open elements (the html element). Otherwise, if there
  3218. is a table element in the stack of open elements, but the last
  3219. table element in the stack of open elements has no parent, or
  3220. its parent node is not an element, then the foster parent
  3221. element is the element before the last table element in the
  3222. stack of open elements. */
  3223. for ($n = count($this->stack) - 1; $n >= 0; $n--) {
  3224. if ($this->stack[$n]->tagName === 'table') {
  3225. $table = $this->stack[$n];
  3226. break;
  3227. }
  3228. }
  3229. if (isset($table) && $table->parentNode !== null) {
  3230. return $table->parentNode;
  3231. } elseif (!isset($table)) {
  3232. return $this->stack[0];
  3233. } elseif (isset($table) && ($table->parentNode === null ||
  3234. $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
  3235. return $this->stack[$n - 1];
  3236. }
  3237. return null;
  3238. }
  3239. /**
  3240. * @param $node
  3241. */
  3242. public function fosterParent($node) {
  3243. $foster_parent = $this->getFosterParent();
  3244. $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
  3245. /* When a node node is to be foster parented, the node node must be
  3246. * be inserted into the foster parent element. */
  3247. /* If the foster parent element is the parent element of the last table
  3248. * element in the stack of open elements, then node must be inserted
  3249. * immediately before the last table element in the stack of open
  3250. * elements in the foster parent element; otherwise, node must be
  3251. * appended to the foster parent element. */
  3252. if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
  3253. $foster_parent->insertBefore($node, $table);
  3254. } else {
  3255. $foster_parent->appendChild($node);
  3256. }
  3257. }
  3258. /**
  3259. * For debugging, prints the stack
  3260. */
  3261. private function printStack() {
  3262. $names = [];
  3263. foreach ($this->stack as $i => $element) {
  3264. $names[] = $element->tagName;
  3265. }
  3266. echo " -> stack [" . implode(', ', $names) . "]\n";
  3267. }
  3268. /**
  3269. * For debugging, prints active formatting elements
  3270. */
  3271. private function printActiveFormattingElements() {
  3272. if (!$this->a_formatting) {
  3273. return;
  3274. }
  3275. $names = [];
  3276. foreach ($this->a_formatting as $node) {
  3277. if ($node === self::MARKER) {
  3278. $names[] = 'MARKER';
  3279. } else {
  3280. $names[] = $node->tagName;
  3281. }
  3282. }
  3283. echo " -> active formatting [" . implode(', ', $names) . "]\n";
  3284. }
  3285. /**
  3286. * @return bool
  3287. */
  3288. public function currentTableIsTainted() {
  3289. return !empty($this->getCurrentTable()->tainted);
  3290. }
  3291. /**
  3292. * Sets up the tree constructor for building a fragment.
  3293. *
  3294. * @param null $context
  3295. */
  3296. public function setupContext($context = null) {
  3297. $this->fragment = true;
  3298. if ($context) {
  3299. $context = $this->dom->createElementNS(self::NS_HTML, $context);
  3300. /* 4.1. Set the HTML parser's tokenization stage's content model
  3301. * flag according to the context element, as follows: */
  3302. switch ($context->tagName) {
  3303. case 'title': case 'textarea':
  3304. $this->content_model = HTML5_Tokenizer::RCDATA;
  3305. break;
  3306. case 'style': case 'script': case 'xmp': case 'iframe':
  3307. case 'noembed': case 'noframes':
  3308. $this->content_model = HTML5_Tokenizer::CDATA;
  3309. break;
  3310. case 'noscript':
  3311. // XSCRIPT: assuming scripting is enabled
  3312. $this->content_model = HTML5_Tokenizer::CDATA;
  3313. break;
  3314. case 'plaintext':
  3315. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  3316. break;
  3317. }
  3318. /* 4.2. Let root be a new html element with no attributes. */
  3319. $root = $this->dom->createElementNS(self::NS_HTML, 'html');
  3320. $this->root = $root;
  3321. /* 4.3 Append the element root to the Document node created above. */
  3322. $this->dom->appendChild($root);
  3323. /* 4.4 Set up the parser's stack of open elements so that it
  3324. * contains just the single element root. */
  3325. $this->stack = [$root];
  3326. /* 4.5 Reset the parser's insertion mode appropriately. */
  3327. $this->resetInsertionMode($context);
  3328. /* 4.6 Set the parser's form element pointer to the nearest node
  3329. * to the context element that is a form element (going straight up
  3330. * the ancestor chain, and including the element itself, if it is a
  3331. * form element), or, if there is no such form element, to null. */
  3332. $node = $context;
  3333. do {
  3334. if ($node->tagName === 'form') {
  3335. $this->form_pointer = $node;
  3336. break;
  3337. }
  3338. } while ($node = $node->parentNode);
  3339. }
  3340. }
  3341. /**
  3342. * @param $token
  3343. * @return mixed
  3344. */
  3345. public function adjustMathMLAttributes($token) {
  3346. foreach ($token['attr'] as &$kp) {
  3347. if ($kp['name'] === 'definitionurl') {
  3348. $kp['name'] = 'definitionURL';
  3349. }
  3350. }
  3351. return $token;
  3352. }
  3353. /**
  3354. * @param $token
  3355. * @return mixed
  3356. */
  3357. public function adjustSVGAttributes($token) {
  3358. static $lookup = [
  3359. 'attributename' => 'attributeName',
  3360. 'attributetype' => 'attributeType',
  3361. 'basefrequency' => 'baseFrequency',
  3362. 'baseprofile' => 'baseProfile',
  3363. 'calcmode' => 'calcMode',
  3364. 'clippathunits' => 'clipPathUnits',
  3365. 'contentscripttype' => 'contentScriptType',
  3366. 'contentstyletype' => 'contentStyleType',
  3367. 'diffuseconstant' => 'diffuseConstant',
  3368. 'edgemode' => 'edgeMode',
  3369. 'externalresourcesrequired' => 'externalResourcesRequired',
  3370. 'filterres' => 'filterRes',
  3371. 'filterunits' => 'filterUnits',
  3372. 'glyphref' => 'glyphRef',
  3373. 'gradienttransform' => 'gradientTransform',
  3374. 'gradientunits' => 'gradientUnits',
  3375. 'kernelmatrix' => 'kernelMatrix',
  3376. 'kernelunitlength' => 'kernelUnitLength',
  3377. 'keypoints' => 'keyPoints',
  3378. 'keysplines' => 'keySplines',
  3379. 'keytimes' => 'keyTimes',
  3380. 'lengthadjust' => 'lengthAdjust',
  3381. 'limitingconeangle' => 'limitingConeAngle',
  3382. 'markerheight' => 'markerHeight',
  3383. 'markerunits' => 'markerUnits',
  3384. 'markerwidth' => 'markerWidth',
  3385. 'maskcontentunits' => 'maskContentUnits',
  3386. 'maskunits' => 'maskUnits',
  3387. 'numoctaves' => 'numOctaves',
  3388. 'pathlength' => 'pathLength',
  3389. 'patterncontentunits' => 'patternContentUnits',
  3390. 'patterntransform' => 'patternTransform',
  3391. 'patternunits' => 'patternUnits',
  3392. 'pointsatx' => 'pointsAtX',
  3393. 'pointsaty' => 'pointsAtY',
  3394. 'pointsatz' => 'pointsAtZ',
  3395. 'preservealpha' => 'preserveAlpha',
  3396. 'preserveaspectratio' => 'preserveAspectRatio',
  3397. 'primitiveunits' => 'primitiveUnits',
  3398. 'refx' => 'refX',
  3399. 'refy' => 'refY',
  3400. 'repeatcount' => 'repeatCount',
  3401. 'repeatdur' => 'repeatDur',
  3402. 'requiredextensions' => 'requiredExtensions',
  3403. 'requiredfeatures' => 'requiredFeatures',
  3404. 'specularconstant' => 'specularConstant',
  3405. 'specularexponent' => 'specularExponent',
  3406. 'spreadmethod' => 'spreadMethod',
  3407. 'startoffset' => 'startOffset',
  3408. 'stddeviation' => 'stdDeviation',
  3409. 'stitchtiles' => 'stitchTiles',
  3410. 'surfacescale' => 'surfaceScale',
  3411. 'systemlanguage' => 'systemLanguage',
  3412. 'tablevalues' => 'tableValues',
  3413. 'targetx' => 'targetX',
  3414. 'targety' => 'targetY',
  3415. 'textlength' => 'textLength',
  3416. 'viewbox' => 'viewBox',
  3417. 'viewtarget' => 'viewTarget',
  3418. 'xchannelselector' => 'xChannelSelector',
  3419. 'ychannelselector' => 'yChannelSelector',
  3420. 'zoomandpan' => 'zoomAndPan',
  3421. ];
  3422. foreach ($token['attr'] as &$kp) {
  3423. if (isset($lookup[$kp['name']])) {
  3424. $kp['name'] = $lookup[$kp['name']];
  3425. }
  3426. }
  3427. return $token;
  3428. }
  3429. /**
  3430. * @param $token
  3431. * @return mixed
  3432. */
  3433. public function adjustForeignAttributes($token) {
  3434. static $lookup = [
  3435. 'xlink:actuate' => ['xlink', 'actuate', self::NS_XLINK],
  3436. 'xlink:arcrole' => ['xlink', 'arcrole', self::NS_XLINK],
  3437. 'xlink:href' => ['xlink', 'href', self::NS_XLINK],
  3438. 'xlink:role' => ['xlink', 'role', self::NS_XLINK],
  3439. 'xlink:show' => ['xlink', 'show', self::NS_XLINK],
  3440. 'xlink:title' => ['xlink', 'title', self::NS_XLINK],
  3441. 'xlink:type' => ['xlink', 'type', self::NS_XLINK],
  3442. 'xml:base' => ['xml', 'base', self::NS_XML],
  3443. 'xml:lang' => ['xml', 'lang', self::NS_XML],
  3444. 'xml:space' => ['xml', 'space', self::NS_XML],
  3445. 'xmlns' => [null, 'xmlns', self::NS_XMLNS],
  3446. 'xmlns:xlink' => ['xmlns', 'xlink', self::NS_XMLNS],
  3447. ];
  3448. foreach ($token['attr'] as &$kp) {
  3449. if (isset($lookup[$kp['name']])) {
  3450. $kp['name'] = $lookup[$kp['name']];
  3451. }
  3452. }
  3453. return $token;
  3454. }
  3455. /**
  3456. * @param $token
  3457. * @param $namespaceURI
  3458. */
  3459. public function insertForeignElement($token, $namespaceURI) {
  3460. $el = $this->dom->createElementNS($namespaceURI, $token['name']);
  3461. if (!empty($token['attr'])) {
  3462. foreach ($token['attr'] as $kp) {
  3463. $attr = $kp['name'];
  3464. if (is_array($attr)) {
  3465. $ns = $attr[2];
  3466. $attr = $attr[1];
  3467. } else {
  3468. $ns = self::NS_HTML;
  3469. }
  3470. if (!$el->hasAttributeNS($ns, $attr)) {
  3471. // XSKETCHY: work around godawful libxml bug
  3472. if ($ns === self::NS_XLINK) {
  3473. $el->setAttribute('xlink:'.$attr, $kp['value']);
  3474. } elseif ($ns === self::NS_HTML) {
  3475. // Another godawful libxml bug
  3476. $el->setAttribute($attr, $kp['value']);
  3477. } else {
  3478. $el->setAttributeNS($ns, $attr, $kp['value']);
  3479. }
  3480. }
  3481. }
  3482. }
  3483. $this->appendToRealParent($el);
  3484. $this->stack[] = $el;
  3485. // XERROR: see below
  3486. /* If the newly created element has an xmlns attribute in the XMLNS
  3487. * namespace whose value is not exactly the same as the element's
  3488. * namespace, that is a parse error. Similarly, if the newly created
  3489. * element has an xmlns:xlink attribute in the XMLNS namespace whose
  3490. * value is not the XLink Namespace, that is a parse error. */
  3491. }
  3492. /**
  3493. * @return DOMDocument|DOMNodeList
  3494. */
  3495. public function save() {
  3496. $this->dom->normalize();
  3497. if (!$this->fragment) {
  3498. return $this->dom;
  3499. } else {
  3500. if ($this->root) {
  3501. return $this->root->childNodes;
  3502. } else {
  3503. return $this->dom->childNodes;
  3504. }
  3505. }
  3506. }
  3507. }