| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989 |
- <?php
- /*
- Copyright 2007 Jeroen van der Meer <http://jero.net/>
- Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
- Permission is hereby granted, free of charge, to any person obtaining a
- copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
- // Tags for FIX ME!!!: (in order of priority)
- // XXX - should be fixed NAO!
- // XERROR - with regards to parse errors
- // XSCRIPT - with regards to scripting mode
- // XENCODING - with regards to encoding (for reparsing tests)
- // XDOM - DOM specific code (tagName is explicitly not marked).
- // this is not (yet) in helper functions.
- class HTML5_TreeBuilder {
- public $stack = [];
- public $content_model;
- private $mode;
- private $original_mode;
- private $secondary_mode;
- private $dom;
- // Whether or not normal insertion of nodes should actually foster
- // parent (used in one case in spec)
- private $foster_parent = false;
- private $a_formatting = [];
- private $head_pointer = null;
- private $form_pointer = null;
- private $flag_frameset_ok = true;
- private $flag_force_quirks = false;
- private $ignored = false;
- private $quirks_mode = null;
- // this gets to 2 when we want to ignore the next lf character, and
- // is decrement at the beginning of each processed token (this way,
- // code can check for (bool)$ignore_lf_token, but it phases out
- // appropriately)
- private $ignore_lf_token = 0;
- private $fragment = false;
- private $root;
- private $scoping = ['applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject'];
- private $formatting = ['a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u'];
- // dl and ds are speculative
- private $special = ['address','area','article','aside','base','basefont','bgsound',
- 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
- 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
- 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
- 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
- 'p','param','plaintext','pre','script','select','spacer','style',
- 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'];
- private $pendingTableCharacters;
- private $pendingTableCharactersDirty;
- // Tree construction modes
- const INITIAL = 0;
- const BEFORE_HTML = 1;
- const BEFORE_HEAD = 2;
- const IN_HEAD = 3;
- const IN_HEAD_NOSCRIPT = 4;
- const AFTER_HEAD = 5;
- const IN_BODY = 6;
- const IN_CDATA_RCDATA = 7;
- const IN_TABLE = 8;
- const IN_TABLE_TEXT = 9;
- const IN_CAPTION = 10;
- const IN_COLUMN_GROUP = 11;
- const IN_TABLE_BODY = 12;
- const IN_ROW = 13;
- const IN_CELL = 14;
- const IN_SELECT = 15;
- const IN_SELECT_IN_TABLE= 16;
- const IN_FOREIGN_CONTENT= 17;
- const AFTER_BODY = 18;
- const IN_FRAMESET = 19;
- const AFTER_FRAMESET = 20;
- const AFTER_AFTER_BODY = 21;
- const AFTER_AFTER_FRAMESET = 22;
- /**
- * Converts a magic number to a readable name. Use for debugging.
- */
- private function strConst($number) {
- static $lookup;
- if (!$lookup) {
- $lookup = [];
- $r = new ReflectionClass('HTML5_TreeBuilder');
- $consts = $r->getConstants();
- foreach ($consts as $const => $num) {
- if (!is_int($num)) {
- continue;
- }
- $lookup[$num] = $const;
- }
- }
- return $lookup[$number];
- }
- // The different types of elements.
- const SPECIAL = 100;
- const SCOPING = 101;
- const FORMATTING = 102;
- const PHRASING = 103;
- // Quirks modes in $quirks_mode
- const NO_QUIRKS = 200;
- const QUIRKS_MODE = 201;
- const LIMITED_QUIRKS_MODE = 202;
- // Marker to be placed in $a_formatting
- const MARKER = 300;
- // Namespaces for foreign content
- const NS_HTML = null; // to prevent DOM from requiring NS on everything
- const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
- const NS_SVG = 'http://www.w3.org/2000/svg';
- const NS_XLINK = 'http://www.w3.org/1999/xlink';
- const NS_XML = 'http://www.w3.org/XML/1998/namespace';
- const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
- // Different types of scopes to test for elements
- const SCOPE = 0;
- const SCOPE_LISTITEM = 1;
- const SCOPE_TABLE = 2;
- /**
- * HTML5_TreeBuilder constructor.
- */
- public function __construct() {
- $this->mode = self::INITIAL;
- $this->dom = new DOMDocument;
- $this->dom->encoding = 'UTF-8';
- $this->dom->preserveWhiteSpace = true;
- $this->dom->substituteEntities = true;
- $this->dom->strictErrorChecking = false;
- }
- public function getQuirksMode(){
- return $this->quirks_mode;
- }
- /**
- * Process tag tokens
- *
- * @param $token
- * @param null $mode
- */
- public function emitToken($token, $mode = null) {
- // XXX: ignore parse errors... why are we emitting them, again?
- if ($token['type'] === HTML5_Tokenizer::PARSEERROR) {
- return;
- }
- if ($mode === null) {
- $mode = $this->mode;
- }
- /*
- $backtrace = debug_backtrace();
- if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
- echo $this->strConst($mode);
- if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
- echo "\n ";
- token_dump($token);
- $this->printStack();
- $this->printActiveFormattingElements();
- if ($this->foster_parent) echo " -> this is a foster parent mode\n";
- if ($this->flag_frameset_ok) echo " -> frameset ok\n";
- */
- if ($this->ignore_lf_token) {
- $this->ignore_lf_token--;
- }
- $this->ignored = false;
- switch ($mode) {
- case self::INITIAL:
- /* A character token that is one of U+0009 CHARACTER TABULATION,
- * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Ignore the token. */
- $this->ignored = true;
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- if (
- $token['name'] !== 'html' || !empty($token['public']) ||
- !empty($token['system']) || $token !== 'about:legacy-compat'
- ) {
- /* If the DOCTYPE token's name is not a case-sensitive match
- * for the string "html", or if the token's public identifier
- * is not missing, or if the token's system identifier is
- * neither missing nor a case-sensitive match for the string
- * "about:legacy-compat", then there is a parse error (this
- * is the DOCTYPE parse error). */
- // DOCTYPE parse error
- }
- /* Append a DocumentType node to the Document node, with the name
- * attribute set to the name given in the DOCTYPE token, or the
- * empty string if the name was missing; the publicId attribute
- * set to the public identifier given in the DOCTYPE token, or
- * the empty string if the public identifier was missing; the
- * systemId attribute set to the system identifier given in the
- * DOCTYPE token, or the empty string if the system identifier
- * was missing; and the other attributes specific to
- * DocumentType objects set to null and empty lists as
- * appropriate. Associate the DocumentType node with the
- * Document object so that it is returned as the value of the
- * doctype attribute of the Document object. */
- if (!isset($token['public'])) {
- $token['public'] = null;
- }
- if (!isset($token['system'])) {
- $token['system'] = null;
- }
- // XDOM
- // Yes this is hacky. I'm kind of annoyed that I can't appendChild
- // a doctype to DOMDocument. Maybe I haven't chanted the right
- // syllables.
- $impl = new DOMImplementation();
- // This call can fail for particularly pathological cases (namely,
- // the qualifiedName parameter ($token['name']) could be missing.
- if ($token['name']) {
- $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
- $this->dom->appendChild($doctype);
- } else {
- // It looks like libxml's not actually *able* to express this case.
- // So... don't.
- $this->dom->emptyDoctype = true;
- }
- $public = is_null($token['public']) ? false : strtolower($token['public']);
- $system = is_null($token['system']) ? false : strtolower($token['system']);
- $publicStartsWithForQuirks = [
- "+//silmaril//dtd html pro v0r11 19970101//",
- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
- "-//as//dtd html 3.0 aswedit + extensions//",
- "-//ietf//dtd html 2.0 level 1//",
- "-//ietf//dtd html 2.0 level 2//",
- "-//ietf//dtd html 2.0 strict level 1//",
- "-//ietf//dtd html 2.0 strict level 2//",
- "-//ietf//dtd html 2.0 strict//",
- "-//ietf//dtd html 2.0//",
- "-//ietf//dtd html 2.1e//",
- "-//ietf//dtd html 3.0//",
- "-//ietf//dtd html 3.2 final//",
- "-//ietf//dtd html 3.2//",
- "-//ietf//dtd html 3//",
- "-//ietf//dtd html level 0//",
- "-//ietf//dtd html level 1//",
- "-//ietf//dtd html level 2//",
- "-//ietf//dtd html level 3//",
- "-//ietf//dtd html strict level 0//",
- "-//ietf//dtd html strict level 1//",
- "-//ietf//dtd html strict level 2//",
- "-//ietf//dtd html strict level 3//",
- "-//ietf//dtd html strict//",
- "-//ietf//dtd html//",
- "-//metrius//dtd metrius presentational//",
- "-//microsoft//dtd internet explorer 2.0 html strict//",
- "-//microsoft//dtd internet explorer 2.0 html//",
- "-//microsoft//dtd internet explorer 2.0 tables//",
- "-//microsoft//dtd internet explorer 3.0 html strict//",
- "-//microsoft//dtd internet explorer 3.0 html//",
- "-//microsoft//dtd internet explorer 3.0 tables//",
- "-//netscape comm. corp.//dtd html//",
- "-//netscape comm. corp.//dtd strict html//",
- "-//o'reilly and associates//dtd html 2.0//",
- "-//o'reilly and associates//dtd html extended 1.0//",
- "-//o'reilly and associates//dtd html extended relaxed 1.0//",
- "-//spyglass//dtd html 2.0 extended//",
- "-//sq//dtd html 2.0 hotmetal + extensions//",
- "-//sun microsystems corp.//dtd hotjava html//",
- "-//sun microsystems corp.//dtd hotjava strict html//",
- "-//w3c//dtd html 3 1995-03-24//",
- "-//w3c//dtd html 3.2 draft//",
- "-//w3c//dtd html 3.2 final//",
- "-//w3c//dtd html 3.2//",
- "-//w3c//dtd html 3.2s draft//",
- "-//w3c//dtd html 4.0 frameset//",
- "-//w3c//dtd html 4.0 transitional//",
- "-//w3c//dtd html experimental 19960712//",
- "-//w3c//dtd html experimental 970421//",
- "-//w3c//dtd w3 html//",
- "-//w3o//dtd w3 html 3.0//",
- "-//webtechs//dtd mozilla html 2.0//",
- "-//webtechs//dtd mozilla html//",
- ];
- $publicSetToForQuirks = [
- "-//w3o//dtd w3 html strict 3.0//",
- "-/w3c/dtd html 4.0 transitional/en",
- "html",
- ];
- $publicStartsWithAndSystemForQuirks = [
- "-//w3c//dtd html 4.01 frameset//",
- "-//w3c//dtd html 4.01 transitional//",
- ];
- $publicStartsWithForLimitedQuirks = [
- "-//w3c//dtd xhtml 1.0 frameset//",
- "-//w3c//dtd xhtml 1.0 transitional//",
- ];
- $publicStartsWithAndSystemForLimitedQuirks = [
- "-//w3c//dtd html 4.01 frameset//",
- "-//w3c//dtd html 4.01 transitional//",
- ];
- // first, do easy checks
- if (
- !empty($token['force-quirks']) ||
- strtolower($token['name']) !== 'html'
- ) {
- $this->quirks_mode = self::QUIRKS_MODE;
- } else {
- do {
- if ($system) {
- foreach ($publicStartsWithAndSystemForQuirks as $x) {
- if (strncmp($public, $x, strlen($x)) === 0) {
- $this->quirks_mode = self::QUIRKS_MODE;
- break;
- }
- }
- if (!is_null($this->quirks_mode)) {
- break;
- }
- foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
- if (strncmp($public, $x, strlen($x)) === 0) {
- $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
- break;
- }
- }
- if (!is_null($this->quirks_mode)) {
- break;
- }
- }
- foreach ($publicSetToForQuirks as $x) {
- if ($public === $x) {
- $this->quirks_mode = self::QUIRKS_MODE;
- break;
- }
- }
- if (!is_null($this->quirks_mode)) {
- break;
- }
- foreach ($publicStartsWithForLimitedQuirks as $x) {
- if (strncmp($public, $x, strlen($x)) === 0) {
- $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
- }
- }
- if (!is_null($this->quirks_mode)) {
- break;
- }
- if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
- $this->quirks_mode = self::QUIRKS_MODE;
- break;
- }
- foreach ($publicStartsWithForQuirks as $x) {
- if (strncmp($public, $x, strlen($x)) === 0) {
- $this->quirks_mode = self::QUIRKS_MODE;
- break;
- }
- }
- if (is_null($this->quirks_mode)) {
- $this->quirks_mode = self::NO_QUIRKS;
- }
- } while (false);
- }
- $this->mode = self::BEFORE_HTML;
- } else {
- // parse error
- /* Switch the insertion mode to "before html", then reprocess the
- * current token. */
- $this->mode = self::BEFORE_HTML;
- $this->quirks_mode = self::QUIRKS_MODE;
- $this->emitToken($token);
- }
- break;
- case self::BEFORE_HTML:
- /* A DOCTYPE token */
- if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // Parse error. Ignore the token.
- $this->ignored = true;
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the Document object with the data
- attribute set to the data given in the comment token. */
- // XDOM
- $comment = $this->dom->createComment($token['data']);
- $this->dom->appendChild($comment);
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Ignore the token. */
- $this->ignored = true;
- /* A start tag whose tag name is "html" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
- /* Create an element for the token in the HTML namespace. Append it
- * to the Document object. Put this element in the stack of open
- * elements. */
- // XDOM
- $html = $this->insertElement($token, false);
- $this->dom->appendChild($html);
- $this->stack[] = $html;
- $this->mode = self::BEFORE_HEAD;
- } else {
- /* Create an html element. Append it to the Document object. Put
- * this element in the stack of open elements. */
- // XDOM
- $html = $this->dom->createElementNS(self::NS_HTML, 'html');
- $this->dom->appendChild($html);
- $this->stack[] = $html;
- /* Switch the insertion mode to "before head", then reprocess the
- * current token. */
- $this->mode = self::BEFORE_HEAD;
- $this->emitToken($token);
- }
- break;
- case self::BEFORE_HEAD:
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Ignore the token. */
- $this->ignored = true;
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data attribute
- set to the data given in the comment token. */
- $this->insertComment($token['data']);
- /* A DOCTYPE token */
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- /* Parse error. Ignore the token */
- $this->ignored = true;
- // parse error
- /* A start tag token with the tag name "html" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- /* Process the token using the rules for the "in body"
- * insertion mode. */
- $this->processWithRulesFor($token, self::IN_BODY);
- /* A start tag token with the tag name "head" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
- /* Insert an HTML element for the token. */
- $element = $this->insertElement($token);
- /* Set the head element pointer to this new element node. */
- $this->head_pointer = $element;
- /* Change the insertion mode to "in head". */
- $this->mode = self::IN_HEAD;
- /* An end tag whose tag name is one of: "head", "body", "html", "br" */
- } elseif (
- $token['type'] === HTML5_Tokenizer::ENDTAG && (
- $token['name'] === 'head' || $token['name'] === 'body' ||
- $token['name'] === 'html' || $token['name'] === 'br'
- )) {
- /* Act as if a start tag token with the tag name "head" and no
- * attributes had been seen, then reprocess the current token. */
- $this->emitToken([
- 'name' => 'head',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- $this->emitToken($token);
- /* Any other end tag */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- } else {
- /* Act as if a start tag token with the tag name "head" and no
- * attributes had been seen, then reprocess the current token.
- * Note: This will result in an empty head element being
- * generated, with the current token being reprocessed in the
- * "after head" insertion mode. */
- $this->emitToken([
- 'name' => 'head',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- $this->emitToken($token);
- }
- break;
- case self::IN_HEAD:
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE. */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Insert the character into the current node. */
- $this->insertText($token['data']);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data attribute
- set to the data given in the comment token. */
- $this->insertComment($token['data']);
- /* A DOCTYPE token */
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- // parse error
- /* A start tag whose tag name is "html" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* A start tag whose tag name is one of: "base", "command", "link" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ($token['name'] === 'base' || $token['name'] === 'command' ||
- $token['name'] === 'link')) {
- /* Insert an HTML element for the token. Immediately pop the
- * current node off the stack of open elements. */
- $this->insertElement($token);
- array_pop($this->stack);
- // YYY: Acknowledge the token's self-closing flag, if it is set.
- /* A start tag whose tag name is "meta" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
- /* Insert an HTML element for the token. Immediately pop the
- * current node off the stack of open elements. */
- $this->insertElement($token);
- array_pop($this->stack);
- // XERROR: Acknowledge the token's self-closing flag, if it is set.
- // XENCODING: If the element has a charset attribute, and its value is a
- // supported encoding, and the confidence is currently tentative,
- // then change the encoding to the encoding given by the value of
- // the charset attribute.
- //
- // Otherwise, if the element has a content attribute, and applying
- // the algorithm for extracting an encoding from a Content-Type to
- // its value returns a supported encoding encoding, and the
- // confidence is currently tentative, then change the encoding to
- // the encoding encoding.
- /* A start tag with the tag name "title" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
- $this->insertRCDATAElement($token);
- /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
- * A start tag whose tag name is one of: "noframes", "style" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
- // XSCRIPT: Scripting flag not respected
- $this->insertCDATAElement($token);
- // XSCRIPT: Scripting flag disable not implemented
- /* A start tag with the tag name "script" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
- /* 1. Create an element for the token in the HTML namespace. */
- $node = $this->insertElement($token, false);
- /* 2. Mark the element as being "parser-inserted" */
- // Uhhh... XSCRIPT
- /* 3. If the parser was originally created for the HTML
- * fragment parsing algorithm, then mark the script element as
- * "already executed". (fragment case) */
- // ditto... XSCRIPT
- /* 4. Append the new element to the current node and push it onto
- * the stack of open elements. */
- end($this->stack)->appendChild($node);
- $this->stack[] = $node;
- // I guess we could squash these together
- /* 6. Let the original insertion mode be the current insertion mode. */
- $this->original_mode = $this->mode;
- /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
- $this->mode = self::IN_CDATA_RCDATA;
- /* 5. Switch the tokeniser's content model flag to the CDATA state. */
- $this->content_model = HTML5_Tokenizer::CDATA;
- /* An end tag with the tag name "head" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
- /* Pop the current node (which will be the head element) off the stack of open elements. */
- array_pop($this->stack);
- /* Change the insertion mode to "after head". */
- $this->mode = self::AFTER_HEAD;
- // Slight logic inversion here to minimize duplication
- /* A start tag with the tag name "head". */
- /* An end tag whose tag name is not one of: "body", "html", "br" */
- } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
- ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
- $token['name'] !== 'body' && $token['name'] !== 'br')) {
- // Parse error. Ignore the token.
- $this->ignored = true;
- /* Anything else */
- } else {
- /* Act as if an end tag token with the tag name "head" had been
- * seen, and reprocess the current token. */
- $this->emitToken([
- 'name' => 'head',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- /* Then, reprocess the current token. */
- $this->emitToken($token);
- }
- break;
- case self::IN_HEAD_NOSCRIPT:
- if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
- /* Pop the current node (which will be a noscript element) from the
- * stack of open elements; the new current node will be a head
- * element. */
- array_pop($this->stack);
- $this->mode = self::IN_HEAD;
- } elseif (
- ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
- ($token['type'] === HTML5_Tokenizer::COMMENT) ||
- ($token['type'] === HTML5_Tokenizer::STARTTAG && (
- $token['name'] === 'link' || $token['name'] === 'meta' ||
- $token['name'] === 'noframes' || $token['name'] === 'style'))) {
- $this->processWithRulesFor($token, self::IN_HEAD);
- // inverted logic
- } elseif (
- ($token['type'] === HTML5_Tokenizer::STARTTAG && (
- $token['name'] === 'head' || $token['name'] === 'noscript')) ||
- ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] !== 'br')) {
- // parse error
- } else {
- // parse error
- $this->emitToken([
- 'type' => HTML5_Tokenizer::ENDTAG,
- 'name' => 'noscript',
- ]);
- $this->emitToken($token);
- }
- break;
- case self::AFTER_HEAD:
- /* Handle the token as follows: */
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Append the character to the current node. */
- $this->insertText($token['data']);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data attribute
- set to the data given in the comment token. */
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* A start tag token with the tag name "body" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
- $this->insertElement($token);
- /* Set the frameset-ok flag to "not ok". */
- $this->flag_frameset_ok = false;
- /* Change the insertion mode to "in body". */
- $this->mode = self::IN_BODY;
- /* A start tag token with the tag name "frameset" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
- /* Insert a frameset element for the token. */
- $this->insertElement($token);
- /* Change the insertion mode to "in frameset". */
- $this->mode = self::IN_FRAMESET;
- /* A start tag token whose tag name is one of: "base", "link", "meta",
- "script", "style", "title" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
- ['base', 'link', 'meta', 'noframes', 'script', 'style', 'title'])) {
- // parse error
- /* Push the node pointed to by the head element pointer onto the
- * stack of open elements. */
- $this->stack[] = $this->head_pointer;
- $this->processWithRulesFor($token, self::IN_HEAD);
- array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
- // inversion of specification
- } elseif (
- ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
- ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] !== 'body' && $token['name'] !== 'html' &&
- $token['name'] !== 'br')) {
- // parse error
- /* Anything else */
- } else {
- $this->emitToken([
- 'name' => 'body',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- $this->flag_frameset_ok = true;
- $this->emitToken($token);
- }
- break;
- case self::IN_BODY:
- /* Handle the token as follows: */
- switch($token['type']) {
- /* A character token */
- case HTML5_Tokenizer::CHARACTER:
- case HTML5_Tokenizer::SPACECHARACTER:
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Append the token's character to the current node. */
- $this->insertText($token['data']);
- /* If the token is not one of U+0009 CHARACTER TABULATION,
- * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
- * SPACE, then set the frameset-ok flag to "not ok". */
- // i.e., if any of the characters is not whitespace
- if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
- $this->flag_frameset_ok = false;
- }
- break;
- /* A comment token */
- case HTML5_Tokenizer::COMMENT:
- /* Append a Comment node to the current node with the data
- attribute set to the data given in the comment token. */
- $this->insertComment($token['data']);
- break;
- case HTML5_Tokenizer::DOCTYPE:
- // parse error
- break;
- case HTML5_Tokenizer::EOF:
- // parse error
- break;
- case HTML5_Tokenizer::STARTTAG:
- switch($token['name']) {
- case 'html':
- // parse error
- /* For each attribute on the token, check to see if the
- * attribute is already present on the top element of the
- * stack of open elements. If it is not, add the attribute
- * and its corresponding value to that element. */
- foreach($token['attr'] as $attr) {
- if (!$this->stack[0]->hasAttribute($attr['name'])) {
- $this->stack[0]->setAttribute($attr['name'], $attr['value']);
- }
- }
- break;
- case 'base': case 'command': case 'link': case 'meta': case 'noframes':
- case 'script': case 'style': case 'title':
- /* Process the token as if the insertion mode had been "in
- head". */
- $this->processWithRulesFor($token, self::IN_HEAD);
- break;
- /* A start tag token with the tag name "body" */
- case 'body':
- /* Parse error. If the second element on the stack of open
- elements is not a body element, or, if the stack of open
- elements has only one node on it, then ignore the token.
- (fragment case) */
- if (count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
- $this->ignored = true;
- // Ignore
- /* Otherwise, for each attribute on the token, check to see
- if the attribute is already present on the body element (the
- second element) on the stack of open elements. If it is not,
- add the attribute and its corresponding value to that
- element. */
- } else {
- foreach($token['attr'] as $attr) {
- if (!$this->stack[1]->hasAttribute($attr['name'])) {
- $this->stack[1]->setAttribute($attr['name'], $attr['value']);
- }
- }
- }
- break;
- case 'frameset':
- // parse error
- /* If the second element on the stack of open elements is
- * not a body element, or, if the stack of open elements
- * has only one node on it, then ignore the token.
- * (fragment case) */
- if (count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
- $this->ignored = true;
- // Ignore
- } elseif (!$this->flag_frameset_ok) {
- $this->ignored = true;
- // Ignore
- } else {
- /* 1. Remove the second element on the stack of open
- * elements from its parent node, if it has one. */
- if ($this->stack[1]->parentNode) {
- $this->stack[1]->parentNode->removeChild($this->stack[1]);
- }
- /* 2. Pop all the nodes from the bottom of the stack of
- * open elements, from the current node up to the root
- * html element. */
- array_splice($this->stack, 1);
- $this->insertElement($token);
- $this->mode = self::IN_FRAMESET;
- }
- break;
- // in spec, there is a diversion here
- case 'address': case 'article': case 'aside': case 'blockquote':
- case 'center': case 'datagrid': case 'details': case 'dir':
- case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
- case 'header': case 'hgroup': case 'menu': case 'nav':
- case 'ol': case 'p': case 'section': case 'ul':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been
- seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- break;
- /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
- "h5", "h6" */
- case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* If the current node is an element whose tag name is one
- * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
- * parse error; pop the current node off the stack of open
- * elements. */
- $peek = array_pop($this->stack);
- if (in_array($peek->tagName, ["h1", "h2", "h3", "h4", "h5", "h6"])) {
- // parse error
- } else {
- $this->stack[] = $peek;
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- break;
- case 'pre': case 'listing':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- $this->insertElement($token);
- /* If the next token is a U+000A LINE FEED (LF) character
- * token, then ignore that token and move on to the next
- * one. (Newlines at the start of pre blocks are ignored as
- * an authoring convenience.) */
- $this->ignore_lf_token = 2;
- $this->flag_frameset_ok = false;
- break;
- /* A start tag whose tag name is "form" */
- case 'form':
- /* If the form element pointer is not null, ignore the
- token with a parse error. */
- if ($this->form_pointer !== null) {
- $this->ignored = true;
- // Ignore.
- /* Otherwise: */
- } else {
- /* If the stack of open elements has a p element in
- scope, then act as if an end tag with the tag name p
- had been seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token, and set the
- form element pointer to point to the element created. */
- $element = $this->insertElement($token);
- $this->form_pointer = $element;
- }
- break;
- // condensed specification
- case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
- /* 1. Set the frameset-ok flag to "not ok". */
- $this->flag_frameset_ok = false;
- $stack_length = count($this->stack) - 1;
- for($n = $stack_length; 0 <= $n; $n--) {
- /* 2. Initialise node to be the current node (the
- bottommost node of the stack). */
- $stop = false;
- $node = $this->stack[$n];
- $cat = $this->getElementCategory($node);
- // for case 'li':
- /* 3. If node is an li element, then act as if an end
- * tag with the tag name "li" had been seen, then jump
- * to the last step. */
- // for case 'dc': case 'dd': case 'ds': case 'dt':
- /* If node is a dc, dd, ds or dt element, then act as if an end
- * tag with the same tag name as node had been seen, then
- * jump to the last step. */
- if (($token['name'] === 'li' && $node->tagName === 'li') ||
- ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
- $this->emitToken([
- 'type' => HTML5_Tokenizer::ENDTAG,
- 'name' => $node->tagName,
- ]);
- break;
- }
- /* 4. If node is not in the formatting category, and is
- not in the phrasing category, and is not an address,
- div or p element, then stop this algorithm. */
- if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
- $node->tagName !== 'address' && $node->tagName !== 'div' &&
- $node->tagName !== 'p') {
- break;
- }
- /* 5. Otherwise, set node to the previous entry in the
- * stack of open elements and return to step 2. */
- }
- /* 6. This is the last step. */
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been
- seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Finally, insert an HTML element with the same tag
- name as the token's. */
- $this->insertElement($token);
- break;
- /* A start tag token whose tag name is "plaintext" */
- case 'plaintext':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been
- seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- $this->content_model = HTML5_Tokenizer::PLAINTEXT;
- break;
- // more diversions
- /* A start tag whose tag name is "a" */
- case 'a':
- /* If the list of active formatting elements contains
- an element whose tag name is "a" between the end of the
- list and the last marker on the list (or the start of
- the list if there is no marker on the list), then this
- is a parse error; act as if an end tag with the tag name
- "a" had been seen, then remove that element from the list
- of active formatting elements and the stack of open
- elements if the end tag didn't already remove it (it
- might not have if the element is not in table scope). */
- $leng = count($this->a_formatting);
- for ($n = $leng - 1; $n >= 0; $n--) {
- if ($this->a_formatting[$n] === self::MARKER) {
- break;
- } elseif ($this->a_formatting[$n]->tagName === 'a') {
- $a = $this->a_formatting[$n];
- $this->emitToken([
- 'name' => 'a',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- if (in_array($a, $this->a_formatting)) {
- $a_i = array_search($a, $this->a_formatting, true);
- if ($a_i !== false) {
- array_splice($this->a_formatting, $a_i, 1);
- }
- }
- if (in_array($a, $this->stack)) {
- $a_i = array_search($a, $this->stack, true);
- if ($a_i !== false) {
- array_splice($this->stack, $a_i, 1);
- }
- }
- break;
- }
- }
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Insert an HTML element for the token. */
- $el = $this->insertElement($token);
- /* Add that element to the list of active formatting
- elements. */
- $this->a_formatting[] = $el;
- break;
- case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
- case 's': case 'small': case 'strike':
- case 'strong': case 'tt': case 'u':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Insert an HTML element for the token. */
- $el = $this->insertElement($token);
- /* Add that element to the list of active formatting
- elements. */
- $this->a_formatting[] = $el;
- break;
- case 'nobr':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* If the stack of open elements has a nobr element in
- * scope, then this is a parse error; act as if an end tag
- * with the tag name "nobr" had been seen, then once again
- * reconstruct the active formatting elements, if any. */
- if ($this->elementInScope('nobr')) {
- $this->emitToken([
- 'name' => 'nobr',
- 'type' => HTML5_Tokenizer::ENDTAG,
- ]);
- $this->reconstructActiveFormattingElements();
- }
- /* Insert an HTML element for the token. */
- $el = $this->insertElement($token);
- /* Add that element to the list of active formatting
- elements. */
- $this->a_formatting[] = $el;
- break;
- // another diversion
- /* A start tag token whose tag name is "button" */
- case 'button':
- /* If the stack of open elements has a button element in scope,
- then this is a parse error; act as if an end tag with the tag
- name "button" had been seen, then reprocess the token. (We don't
- do that. Unnecessary.) (I hope you're right! -- ezyang) */
- if ($this->elementInScope('button')) {
- $this->emitToken([
- 'name' => 'button',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* Insert a marker at the end of the list of active
- formatting elements. */
- $this->a_formatting[] = self::MARKER;
- $this->flag_frameset_ok = false;
- break;
- case 'applet': case 'marquee': case 'object':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* Insert a marker at the end of the list of active
- formatting elements. */
- $this->a_formatting[] = self::MARKER;
- $this->flag_frameset_ok = false;
- break;
- // spec diversion
- /* A start tag whose tag name is "table" */
- case 'table':
- /* If the Document is not set to quirks mode, and the
- * stack of open elements has a p element in scope, then
- * act as if an end tag with the tag name "p" had been
- * seen. */
- if ($this->quirks_mode !== self::QUIRKS_MODE &&
- $this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- $this->flag_frameset_ok = false;
- /* Change the insertion mode to "in table". */
- $this->mode = self::IN_TABLE;
- break;
- /* A start tag whose tag name is one of: "area", "basefont",
- "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
- case 'area': case 'basefont': case 'bgsound': case 'br':
- case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
- case 'wbr':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* Immediately pop the current node off the stack of open elements. */
- array_pop($this->stack);
- // YYY: Acknowledge the token's self-closing flag, if it is set.
- $this->flag_frameset_ok = false;
- break;
- case 'param': case 'source':
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* Immediately pop the current node off the stack of open elements. */
- array_pop($this->stack);
- // YYY: Acknowledge the token's self-closing flag, if it is set.
- break;
- /* A start tag whose tag name is "hr" */
- case 'hr':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* Immediately pop the current node off the stack of open elements. */
- array_pop($this->stack);
- // YYY: Acknowledge the token's self-closing flag, if it is set.
- $this->flag_frameset_ok = false;
- break;
- /* A start tag whose tag name is "image" */
- case 'image':
- /* Parse error. Change the token's tag name to "img" and
- reprocess it. (Don't ask.) */
- $token['name'] = 'img';
- $this->emitToken($token);
- break;
- /* A start tag whose tag name is "isindex" */
- case 'isindex':
- /* Parse error. */
- /* If the form element pointer is not null,
- then ignore the token. */
- if ($this->form_pointer === null) {
- /* Act as if a start tag token with the tag name "form" had
- been seen. */
- /* If the token has an attribute called "action", set
- * the action attribute on the resulting form
- * element to the value of the "action" attribute of
- * the token. */
- $attr = [];
- $action = $this->getAttr($token, 'action');
- if ($action !== false) {
- $attr[] = ['name' => 'action', 'value' => $action];
- }
- $this->emitToken([
- 'name' => 'form',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => $attr
- ]);
- /* Act as if a start tag token with the tag name "hr" had
- been seen. */
- $this->emitToken([
- 'name' => 'hr',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- /* Act as if a start tag token with the tag name "label"
- had been seen. */
- $this->emitToken([
- 'name' => 'label',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- /* Act as if a stream of character tokens had been seen. */
- $prompt = $this->getAttr($token, 'prompt');
- if ($prompt === false) {
- $prompt = 'This is a searchable index. '.
- 'Insert your search keywords here: ';
- }
- $this->emitToken([
- 'data' => $prompt,
- 'type' => HTML5_Tokenizer::CHARACTER,
- ]);
- /* Act as if a start tag token with the tag name "input"
- had been seen, with all the attributes from the "isindex"
- token, except with the "name" attribute set to the value
- "isindex" (ignoring any explicit "name" attribute). */
- $attr = [];
- foreach ($token['attr'] as $keypair) {
- if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
- $keypair['name'] === 'prompt') {
- continue;
- }
- $attr[] = $keypair;
- }
- $attr[] = ['name' => 'name', 'value' => 'isindex'];
- $this->emitToken([
- 'name' => 'input',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => $attr
- ]);
- /* Act as if an end tag token with the tag name "label"
- had been seen. */
- $this->emitToken([
- 'name' => 'label',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- /* Act as if a start tag token with the tag name "hr" had
- been seen. */
- $this->emitToken([
- 'name' => 'hr',
- 'type' => HTML5_Tokenizer::STARTTAG
- ]);
- /* Act as if an end tag token with the tag name "form" had
- been seen. */
- $this->emitToken([
- 'name' => 'form',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- } else {
- $this->ignored = true;
- }
- break;
- /* A start tag whose tag name is "textarea" */
- case 'textarea':
- $this->insertElement($token);
- /* If the next token is a U+000A LINE FEED (LF)
- * character token, then ignore that token and move on to
- * the next one. (Newlines at the start of textarea
- * elements are ignored as an authoring convenience.)
- * need flag, see also <pre> */
- $this->ignore_lf_token = 2;
- $this->original_mode = $this->mode;
- $this->flag_frameset_ok = false;
- $this->mode = self::IN_CDATA_RCDATA;
- /* Switch the tokeniser's content model flag to the
- RCDATA state. */
- $this->content_model = HTML5_Tokenizer::RCDATA;
- break;
- /* A start tag token whose tag name is "xmp" */
- case 'xmp':
- /* If the stack of open elements has a p element in
- scope, then act as if an end tag with the tag name
- "p" has been seen. */
- if ($this->elementInScope('p')) {
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- $this->flag_frameset_ok = false;
- $this->insertCDATAElement($token);
- break;
- case 'iframe':
- $this->flag_frameset_ok = false;
- $this->insertCDATAElement($token);
- break;
- case 'noembed': case 'noscript':
- // XSCRIPT: should check scripting flag
- $this->insertCDATAElement($token);
- break;
- /* A start tag whose tag name is "select" */
- case 'select':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- $this->flag_frameset_ok = false;
- /* If the insertion mode is one of in table", "in caption",
- * "in column group", "in table body", "in row", or "in
- * cell", then switch the insertion mode to "in select in
- * table". Otherwise, switch the insertion mode to "in
- * select". */
- if (
- $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
- $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
- $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
- ) {
- $this->mode = self::IN_SELECT_IN_TABLE;
- } else {
- $this->mode = self::IN_SELECT;
- }
- break;
- case 'option': case 'optgroup':
- if ($this->elementInScope('option')) {
- $this->emitToken([
- 'name' => 'option',
- 'type' => HTML5_Tokenizer::ENDTAG,
- ]);
- }
- $this->reconstructActiveFormattingElements();
- $this->insertElement($token);
- break;
- case 'rp': case 'rt':
- /* If the stack of open elements has a ruby element in scope, then generate
- * implied end tags. If the current node is not then a ruby element, this is
- * a parse error; pop all the nodes from the current node up to the node
- * immediately before the bottommost ruby element on the stack of open elements.
- */
- if ($this->elementInScope('ruby')) {
- $this->generateImpliedEndTags();
- }
- $peek = false;
- do {
- /*if ($peek) {
- // parse error
- }*/
- $peek = array_pop($this->stack);
- } while ($peek->tagName !== 'ruby');
- $this->stack[] = $peek; // we popped one too many
- $this->insertElement($token);
- break;
- // spec diversion
- case 'math':
- $this->reconstructActiveFormattingElements();
- $token = $this->adjustMathMLAttributes($token);
- $token = $this->adjustForeignAttributes($token);
- $this->insertForeignElement($token, self::NS_MATHML);
- if (isset($token['self-closing'])) {
- // XERROR: acknowledge the token's self-closing flag
- array_pop($this->stack);
- }
- if ($this->mode !== self::IN_FOREIGN_CONTENT) {
- $this->secondary_mode = $this->mode;
- $this->mode = self::IN_FOREIGN_CONTENT;
- }
- break;
- case 'svg':
- $this->reconstructActiveFormattingElements();
- $token = $this->adjustSVGAttributes($token);
- $token = $this->adjustForeignAttributes($token);
- $this->insertForeignElement($token, self::NS_SVG);
- if (isset($token['self-closing'])) {
- // XERROR: acknowledge the token's self-closing flag
- array_pop($this->stack);
- }
- if ($this->mode !== self::IN_FOREIGN_CONTENT) {
- $this->secondary_mode = $this->mode;
- $this->mode = self::IN_FOREIGN_CONTENT;
- }
- break;
- case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
- case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
- // parse error
- break;
- /* A start tag token not covered by the previous entries */
- default:
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
- $this->insertElement($token);
- /* This element will be a phrasing element. */
- break;
- }
- break;
- case HTML5_Tokenizer::ENDTAG:
- switch ($token['name']) {
- /* An end tag with the tag name "body" */
- case 'body':
- /* If the stack of open elements does not have a body
- * element in scope, this is a parse error; ignore the
- * token. */
- if (!$this->elementInScope('body')) {
- $this->ignored = true;
- /* Otherwise, if there is a node in the stack of open
- * elements that is not either a dc element, a dd element,
- * a ds element, a dt element, an li element, an optgroup
- * element, an option element, a p element, an rp element,
- * an rt element, a tbody element, a td element, a tfoot
- * element, a th element, a thead element, a tr element,
- * the body element, or the html element, then this is a
- * parse error.
- */
- } else {
- // XERROR: implement this check for parse error
- }
- /* Change the insertion mode to "after body". */
- $this->mode = self::AFTER_BODY;
- break;
- /* An end tag with the tag name "html" */
- case 'html':
- /* Act as if an end tag with tag name "body" had been seen,
- then, if that token wasn't ignored, reprocess the current
- token. */
- $this->emitToken([
- 'name' => 'body',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- if (!$this->ignored) {
- $this->emitToken($token);
- }
- break;
- case 'address': case 'article': case 'aside': case 'blockquote':
- case 'center': case 'datagrid': case 'details': case 'dir':
- case 'div': case 'dl': case 'fieldset': case 'footer':
- case 'header': case 'hgroup': case 'listing': case 'menu':
- case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
- /* If the stack of open elements has an element in scope
- with the same tag name as that of the token, then generate
- implied end tags. */
- if ($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags();
- /* Now, if the current node is not an element with
- the same tag name as that of the token, then this
- is a parse error. */
- // XERROR: implement parse error logic
- /* If the stack of open elements has an element in
- scope with the same tag name as that of the token,
- then pop elements from this stack until an element
- with that tag name has been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== $token['name']);
- } else {
- // parse error
- }
- break;
- /* An end tag whose tag name is "form" */
- case 'form':
- /* Let node be the element that the form element pointer is set to. */
- $node = $this->form_pointer;
- /* Set the form element pointer to null. */
- $this->form_pointer = null;
- /* If node is null or the stack of open elements does not
- * have node in scope, then this is a parse error; ignore the token. */
- if ($node === null || !in_array($node, $this->stack)) {
- // parse error
- $this->ignored = true;
- } else {
- /* 1. Generate implied end tags. */
- $this->generateImpliedEndTags();
- /* 2. If the current node is not node, then this is a parse error. */
- if (end($this->stack) !== $node) {
- // parse error
- }
- /* 3. Remove node from the stack of open elements. */
- array_splice($this->stack, array_search($node, $this->stack, true), 1);
- }
- break;
- /* An end tag whose tag name is "p" */
- case 'p':
- /* If the stack of open elements has a p element in scope,
- then generate implied end tags, except for p elements. */
- if ($this->elementInScope('p')) {
- /* Generate implied end tags, except for elements with
- * the same tag name as the token. */
- $this->generateImpliedEndTags(['p']);
- /* If the current node is not a p element, then this is
- a parse error. */
- // XERROR: implement
- /* Pop elements from the stack of open elements until
- * an element with the same tag name as the token has
- * been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== 'p');
- } else {
- // parse error
- $this->emitToken([
- 'name' => 'p',
- 'type' => HTML5_Tokenizer::STARTTAG,
- ]);
- $this->emitToken($token);
- }
- break;
- /* An end tag whose tag name is "li" */
- case 'li':
- /* If the stack of open elements does not have an element
- * in list item scope with the same tag name as that of the
- * token, then this is a parse error; ignore the token. */
- if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
- /* Generate implied end tags, except for elements with the
- * same tag name as the token. */
- $this->generateImpliedEndTags([$token['name']]);
- /* If the current node is not an element with the same tag
- * name as that of the token, then this is a parse error. */
- // XERROR: parse error
- /* Pop elements from the stack of open elements until an
- * element with the same tag name as the token has been
- * popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== $token['name']);
- }
- /*else {
- // XERROR: parse error
- }*/
- break;
- /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
- case 'dc': case 'dd': case 'ds': case 'dt':
- if ($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags([$token['name']]);
- /* If the current node is not an element with the same
- tag name as the token, then this is a parse error. */
- // XERROR: implement parse error
- /* Pop elements from the stack of open elements until
- * an element with the same tag name as the token has
- * been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== $token['name']);
- }
- /*else {
- // XERROR: parse error
- }*/
- break;
- /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
- "h5", "h6" */
- case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
- $elements = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
- /* If the stack of open elements has in scope an element whose
- tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
- generate implied end tags. */
- if ($this->elementInScope($elements)) {
- $this->generateImpliedEndTags();
- /* Now, if the current node is not an element with the same
- tag name as that of the token, then this is a parse error. */
- // XERROR: implement parse error
- /* If the stack of open elements has in scope an element
- whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
- "h6", then pop elements from the stack until an element
- with one of those tag names has been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while (!in_array($node->tagName, $elements));
- }
- /*else {
- // parse error
- }*/
- break;
- /* An end tag whose tag name is one of: "a", "b", "big", "em",
- "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
- case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
- case 'i': case 'nobr': case 's': case 'small': case 'strike':
- case 'strong': case 'tt': case 'u':
- // XERROR: generally speaking this needs parse error logic
- /* 1. Let the formatting element be the last element in
- the list of active formatting elements that:
- * is between the end of the list and the last scope
- marker in the list, if any, or the start of the list
- otherwise, and
- * has the same tag name as the token.
- */
- while (true) {
- for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
- if ($this->a_formatting[$a] === self::MARKER) {
- break;
- } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
- $formatting_element = $this->a_formatting[$a];
- $in_stack = in_array($formatting_element, $this->stack, true);
- $fe_af_pos = $a;
- break;
- }
- }
- /* If there is no such node, or, if that node is
- also in the stack of open elements but the element
- is not in scope, then this is a parse error. Abort
- these steps. The token is ignored. */
- if (
- !isset($formatting_element) || (
- $in_stack &&
- !$this->elementInScope($token['name'])
- )
- ) {
- $this->ignored = true;
- break;
- /* Otherwise, if there is such a node, but that node
- is not in the stack of open elements, then this is a
- parse error; remove the element from the list, and
- abort these steps. */
- } elseif (isset($formatting_element) && !$in_stack) {
- unset($this->a_formatting[$fe_af_pos]);
- $this->a_formatting = array_merge($this->a_formatting);
- break;
- }
- /* Otherwise, there is a formatting element and that
- * element is in the stack and is in scope. If the
- * element is not the current node, this is a parse
- * error. In any case, proceed with the algorithm as
- * written in the following steps. */
- // XERROR: implement me
- /* 2. Let the furthest block be the topmost node in the
- stack of open elements that is lower in the stack
- than the formatting element, and is not an element in
- the phrasing or formatting categories. There might
- not be one. */
- $fe_s_pos = array_search($formatting_element, $this->stack, true);
- $length = count($this->stack);
- for ($s = $fe_s_pos + 1; $s < $length; $s++) {
- $category = $this->getElementCategory($this->stack[$s]);
- if ($category !== self::PHRASING && $category !== self::FORMATTING) {
- $furthest_block = $this->stack[$s];
- break;
- }
- }
- /* 3. If there is no furthest block, then the UA must
- skip the subsequent steps and instead just pop all
- the nodes from the bottom of the stack of open
- elements, from the current node up to the formatting
- element, and remove the formatting element from the
- list of active formatting elements. */
- if (!isset($furthest_block)) {
- for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
- array_pop($this->stack);
- }
- unset($this->a_formatting[$fe_af_pos]);
- $this->a_formatting = array_merge($this->a_formatting);
- break;
- }
- /* 4. Let the common ancestor be the element
- immediately above the formatting element in the stack
- of open elements. */
- $common_ancestor = $this->stack[$fe_s_pos - 1];
- /* 5. Let a bookmark note the position of the
- formatting element in the list of active formatting
- elements relative to the elements on either side
- of it in the list. */
- $bookmark = $fe_af_pos;
- /* 6. Let node and last node be the furthest block.
- Follow these steps: */
- $node = $furthest_block;
- $last_node = $furthest_block;
- while (true) {
- for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
- /* 6.1 Let node be the element immediately
- prior to node in the stack of open elements. */
- $node = $this->stack[$n];
- /* 6.2 If node is not in the list of active
- formatting elements, then remove node from
- the stack of open elements and then go back
- to step 1. */
- if (!in_array($node, $this->a_formatting, true)) {
- array_splice($this->stack, $n, 1);
- } else {
- break;
- }
- }
- /* 6.3 Otherwise, if node is the formatting
- element, then go to the next step in the overall
- algorithm. */
- if ($node === $formatting_element) {
- break;
- /* 6.4 Otherwise, if last node is the furthest
- block, then move the aforementioned bookmark to
- be immediately after the node in the list of
- active formatting elements. */
- } elseif ($last_node === $furthest_block) {
- $bookmark = array_search($node, $this->a_formatting, true) + 1;
- }
- /* 6.5 Create an element for the token for which
- * the element node was created, replace the entry
- * for node in the list of active formatting
- * elements with an entry for the new element,
- * replace the entry for node in the stack of open
- * elements with an entry for the new element, and
- * let node be the new element. */
- // we don't know what the token is anymore
- // XDOM
- $clone = $node->cloneNode();
- $a_pos = array_search($node, $this->a_formatting, true);
- $s_pos = array_search($node, $this->stack, true);
- $this->a_formatting[$a_pos] = $clone;
- $this->stack[$s_pos] = $clone;
- $node = $clone;
- /* 6.6 Insert last node into node, first removing
- it from its previous parent node if any. */
- // XDOM
- if ($last_node->parentNode !== null) {
- $last_node->parentNode->removeChild($last_node);
- }
- // XDOM
- $node->appendChild($last_node);
- /* 6.7 Let last node be node. */
- $last_node = $node;
- /* 6.8 Return to step 1 of this inner set of steps. */
- }
- /* 7. If the common ancestor node is a table, tbody,
- * tfoot, thead, or tr element, then, foster parent
- * whatever last node ended up being in the previous
- * step, first removing it from its previous parent
- * node if any. */
- // XDOM
- if ($last_node->parentNode) { // common step
- $last_node->parentNode->removeChild($last_node);
- }
- if (in_array($common_ancestor->tagName, ['table', 'tbody', 'tfoot', 'thead', 'tr'])) {
- $this->fosterParent($last_node);
- /* Otherwise, append whatever last node ended up being
- * in the previous step to the common ancestor node,
- * first removing it from its previous parent node if
- * any. */
- } else {
- // XDOM
- $common_ancestor->appendChild($last_node);
- }
- /* 8. Create an element for the token for which the
- * formatting element was created. */
- // XDOM
- $clone = $formatting_element->cloneNode();
- /* 9. Take all of the child nodes of the furthest
- block and append them to the element created in the
- last step. */
- // XDOM
- while ($furthest_block->hasChildNodes()) {
- $child = $furthest_block->firstChild;
- $furthest_block->removeChild($child);
- $clone->appendChild($child);
- }
- /* 10. Append that clone to the furthest block. */
- // XDOM
- $furthest_block->appendChild($clone);
- /* 11. Remove the formatting element from the list
- of active formatting elements, and insert the new element
- into the list of active formatting elements at the
- position of the aforementioned bookmark. */
- $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
- array_splice($this->a_formatting, $fe_af_pos, 1);
- $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
- $af_part2 = array_slice($this->a_formatting, $bookmark);
- $this->a_formatting = array_merge($af_part1, [$clone], $af_part2);
- /* 12. Remove the formatting element from the stack
- of open elements, and insert the new element into the stack
- of open elements immediately below the position of the
- furthest block in that stack. */
- $fe_s_pos = array_search($formatting_element, $this->stack, true);
- array_splice($this->stack, $fe_s_pos, 1);
- $fb_s_pos = array_search($furthest_block, $this->stack, true);
- $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
- $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
- $this->stack = array_merge($s_part1, [$clone], $s_part2);
- /* 13. Jump back to step 1 in this series of steps. */
- unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
- }
- break;
- case 'applet': case 'button': case 'marquee': case 'object':
- /* If the stack of open elements has an element in scope whose
- tag name matches the tag name of the token, then generate implied
- tags. */
- if ($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags();
- /* Now, if the current node is not an element with the same
- tag name as the token, then this is a parse error. */
- // XERROR: implement logic
- /* Pop elements from the stack of open elements until
- * an element with the same tag name as the token has
- * been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== $token['name']);
- /* Clear the list of active formatting elements up to the
- * last marker. */
- $keys = array_keys($this->a_formatting, self::MARKER, true);
- $marker = end($keys);
- for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
- array_pop($this->a_formatting);
- }
- }
- /*else {
- // parse error
- }*/
- break;
- case 'br':
- // Parse error
- $this->emitToken([
- 'name' => 'br',
- 'type' => HTML5_Tokenizer::STARTTAG,
- ]);
- break;
- /* An end tag token not covered by the previous entries */
- default:
- for ($n = count($this->stack) - 1; $n >= 0; $n--) {
- /* Initialise node to be the current node (the bottommost
- node of the stack). */
- $node = $this->stack[$n];
- /* If node has the same tag name as the end tag token,
- then: */
- if ($token['name'] === $node->tagName) {
- /* Generate implied end tags. */
- $this->generateImpliedEndTags();
- /* If the tag name of the end tag token does not
- match the tag name of the current node, this is a
- parse error. */
- // XERROR: implement this
- /* Pop all the nodes from the current node up to
- node, including node, then stop these steps. */
- // XSKETCHY
- do {
- $pop = array_pop($this->stack);
- } while ($pop !== $node);
- break;
- } else {
- $category = $this->getElementCategory($node);
- if ($category !== self::FORMATTING && $category !== self::PHRASING) {
- /* Otherwise, if node is in neither the formatting
- category nor the phrasing category, then this is a
- parse error. Stop this algorithm. The end tag token
- is ignored. */
- $this->ignored = true;
- break;
- // parse error
- }
- }
- /* Set node to the previous entry in the stack of open elements. Loop. */
- }
- break;
- }
- break;
- }
- break;
- case self::IN_CDATA_RCDATA:
- if (
- $token['type'] === HTML5_Tokenizer::CHARACTER ||
- $token['type'] === HTML5_Tokenizer::SPACECHARACTER
- ) {
- $this->insertText($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- // parse error
- /* If the current node is a script element, mark the script
- * element as "already executed". */
- // probably not necessary
- array_pop($this->stack);
- $this->mode = $this->original_mode;
- $this->emitToken($token);
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
- array_pop($this->stack);
- $this->mode = $this->original_mode;
- // we're ignoring all of the execution stuff
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
- array_pop($this->stack);
- $this->mode = $this->original_mode;
- }
- break;
- case self::IN_TABLE:
- $clear = ['html', 'table'];
- /* A character token */
- if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
- $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Let the pending table character tokens
- * be an empty list of tokens. */
- $this->pendingTableCharacters = "";
- $this->pendingTableCharactersDirty = false;
- /* Let the original insertion mode be the current
- * insertion mode. */
- $this->original_mode = $this->mode;
- /* Switch the insertion mode to
- * "in table text" and
- * reprocess the token. */
- $this->mode = self::IN_TABLE_TEXT;
- $this->emitToken($token);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data
- attribute set to the data given in the comment token. */
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- /* A start tag whose tag name is "caption" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'caption') {
- /* Clear the stack back to a table context. */
- $this->clearStackToTableContext($clear);
- /* Insert a marker at the end of the list of active
- formatting elements. */
- $this->a_formatting[] = self::MARKER;
- /* Insert an HTML element for the token, then switch the
- insertion mode to "in caption". */
- $this->insertElement($token);
- $this->mode = self::IN_CAPTION;
- /* A start tag whose tag name is "colgroup" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'colgroup') {
- /* Clear the stack back to a table context. */
- $this->clearStackToTableContext($clear);
- /* Insert an HTML element for the token, then switch the
- insertion mode to "in column group". */
- $this->insertElement($token);
- $this->mode = self::IN_COLUMN_GROUP;
- /* A start tag whose tag name is "col" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'col') {
- $this->emitToken([
- 'name' => 'colgroup',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- $this->emitToken($token);
- /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
- ['tbody', 'tfoot', 'thead'])) {
- /* Clear the stack back to a table context. */
- $this->clearStackToTableContext($clear);
- /* Insert an HTML element for the token, then switch the insertion
- mode to "in table body". */
- $this->insertElement($token);
- $this->mode = self::IN_TABLE_BODY;
- /* A start tag whose tag name is one of: "td", "th", "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- in_array($token['name'], ['td', 'th', 'tr'])) {
- /* Act as if a start tag token with the tag name "tbody" had been
- seen, then reprocess the current token. */
- $this->emitToken([
- 'name' => 'tbody',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- $this->emitToken($token);
- /* A start tag whose tag name is "table" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'table') {
- /* Parse error. Act as if an end tag token with the tag name "table"
- had been seen, then, if that token wasn't ignored, reprocess the
- current token. */
- $this->emitToken([
- 'name' => 'table',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- if (!$this->ignored) {
- $this->emitToken($token);
- }
- /* An end tag whose tag name is "table" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'table') {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as the token, this is a parse error.
- Ignore the token. (fragment case) */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- $this->ignored = true;
- } else {
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== 'table');
- /* Reset the insertion mode appropriately. */
- $this->resetInsertionMode();
- }
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
- ['body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
- 'tfoot', 'th', 'thead', 'tr'])) {
- // Parse error. Ignore the token.
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ($token['name'] === 'style' || $token['name'] === 'script')) {
- $this->processWithRulesFor($token, self::IN_HEAD);
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
- // assignment is intentional
- /* If the token does not have an attribute with the name "type", or
- * if it does, but that attribute's value is not an ASCII
- * case-insensitive match for the string "hidden", then: act as
- * described in the "anything else" entry below. */
- ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
- // I.e., if its an input with the type attribute == 'hidden'
- /* Otherwise */
- // parse error
- $this->insertElement($token);
- array_pop($this->stack);
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- /* If the current node is not the root html element, then this is a parse error. */
- if (end($this->stack)->tagName !== 'html') {
- // Note: It can only be the current node in the fragment case.
- // parse error
- }
- /* Stop parsing. */
- /* Anything else */
- } else {
- /* Parse error. Process the token as if the insertion mode was "in
- body", with the following exception: */
- $old = $this->foster_parent;
- $this->foster_parent = true;
- $this->processWithRulesFor($token, self::IN_BODY);
- $this->foster_parent = $old;
- }
- break;
- case self::IN_TABLE_TEXT:
- /* A character token */
- if ($token['type'] === HTML5_Tokenizer::CHARACTER) {
- /* Append the character token to the pending table
- * character tokens list. */
- $this->pendingTableCharacters .= $token['data'];
- $this->pendingTableCharactersDirty = true;
- } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- $this->pendingTableCharacters .= $token['data'];
- /* Anything else */
- } else {
- if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
- /* If any of the tokens in the pending table character tokens list
- * are character tokens that are not one of U+0009 CHARACTER
- * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
- * U+0020 SPACE, then reprocess those character tokens using the
- * rules given in the "anything else" entry in the in table"
- * insertion mode.*/
- if ($this->pendingTableCharactersDirty) {
- /* Parse error. Process the token using the rules for the
- * "in body" insertion mode, except that if the current
- * node is a table, tbody, tfoot, thead, or tr element,
- * then, whenever a node would be inserted into the current
- * node, it must instead be foster parented. */
- // XERROR
- $old = $this->foster_parent;
- $this->foster_parent = true;
- $text_token = [
- 'type' => HTML5_Tokenizer::CHARACTER,
- 'data' => $this->pendingTableCharacters,
- ];
- $this->processWithRulesFor($text_token, self::IN_BODY);
- $this->foster_parent = $old;
- /* Otherwise, insert the characters given by the pending table
- * character tokens list into the current node. */
- } else {
- $this->insertText($this->pendingTableCharacters);
- }
- $this->pendingTableCharacters = null;
- $this->pendingTableCharactersNull = null;
- }
- /* Switch the insertion mode to the original insertion mode and
- * reprocess the token.
- */
- $this->mode = $this->original_mode;
- $this->emitToken($token);
- }
- break;
- case self::IN_CAPTION:
- /* An end tag whose tag name is "caption" */
- if ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as the token, this is a parse error.
- Ignore the token. (fragment case) */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- $this->ignored = true;
- // Ignore
- /* Otherwise: */
- } else {
- /* Generate implied end tags. */
- $this->generateImpliedEndTags();
- /* Now, if the current node is not a caption element, then this
- is a parse error. */
- // XERROR: implement
- /* Pop elements from this stack until a caption element has
- been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== 'caption');
- /* Clear the list of active formatting elements up to the last
- marker. */
- $this->clearTheActiveFormattingElementsUpToTheLastMarker();
- /* Switch the insertion mode to "in table". */
- $this->mode = self::IN_TABLE;
- }
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
- name is "table" */
- } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
- ['caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
- 'thead', 'tr'])) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'table')) {
- /* Parse error. Act as if an end tag with the tag name "caption"
- had been seen, then, if that token wasn't ignored, reprocess the
- current token. */
- $this->emitToken([
- 'name' => 'caption',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- if (!$this->ignored) {
- $this->emitToken($token);
- }
- /* An end tag whose tag name is one of: "body", "col", "colgroup",
- "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
- ['body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
- 'thead', 'tr'])) {
- // Parse error. Ignore the token.
- $this->ignored = true;
- } else {
- /* Process the token as if the insertion mode was "in body". */
- $this->processWithRulesFor($token, self::IN_BODY);
- }
- break;
- case self::IN_COLUMN_GROUP:
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Append the character to the current node. */
- $this->insertText($token['data']);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data
- attribute set to the data given in the comment token. */
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* A start tag whose tag name is "col" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
- /* Insert a col element for the token. Immediately pop the current
- node off the stack of open elements. */
- $this->insertElement($token);
- array_pop($this->stack);
- // XERROR: Acknowledge the token's self-closing flag, if it is set.
- /* An end tag whose tag name is "colgroup" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'colgroup') {
- /* If the current node is the root html element, then this is a
- parse error, ignore the token. (fragment case) */
- if (end($this->stack)->tagName === 'html') {
- $this->ignored = true;
- /* Otherwise, pop the current node (which will be a colgroup
- element) from the stack of open elements. Switch the insertion
- mode to "in table". */
- } else {
- array_pop($this->stack);
- $this->mode = self::IN_TABLE;
- }
- /* An end tag whose tag name is "col" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- /* An end-of-file token */
- /* If the current node is the root html element */
- } elseif ($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
- /* Stop parsing */
- /* Anything else */
- } else {
- /* Act as if an end tag with the tag name "colgroup" had been seen,
- and then, if that token wasn't ignored, reprocess the current token. */
- $this->emitToken([
- 'name' => 'colgroup',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- if (!$this->ignored) {
- $this->emitToken($token);
- }
- }
- break;
- case self::IN_TABLE_BODY:
- $clear = ['tbody', 'tfoot', 'thead', 'html'];
- /* A start tag whose tag name is "tr" */
- if ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
- /* Clear the stack back to a table body context. */
- $this->clearStackToTableContext($clear);
- /* Insert a tr element for the token, then switch the insertion
- mode to "in row". */
- $this->insertElement($token);
- $this->mode = self::IN_ROW;
- /* A start tag whose tag name is one of: "th", "td" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ($token['name'] === 'th' || $token['name'] === 'td')) {
- /* Parse error. Act as if a start tag with the tag name "tr" had
- been seen, then reprocess the current token. */
- $this->emitToken([
- 'name' => 'tr',
- 'type' => HTML5_Tokenizer::STARTTAG,
- 'attr' => []
- ]);
- $this->emitToken($token);
- /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- in_array($token['name'], ['tbody', 'tfoot', 'thead'])) {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as the token, this is a parse error.
- Ignore the token. */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- // Parse error
- $this->ignored = true;
- /* Otherwise: */
- } else {
- /* Clear the stack back to a table body context. */
- $this->clearStackToTableContext($clear);
- /* Pop the current node from the stack of open elements. Switch
- the insertion mode to "in table". */
- array_pop($this->stack);
- $this->mode = self::IN_TABLE;
- }
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
- } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
- ['caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'])) ||
- ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
- /* If the stack of open elements does not have a tbody, thead, or
- tfoot element in table scope, this is a parse error. Ignore the
- token. (fragment case) */
- if (!$this->elementInScope(['tbody', 'thead', 'tfoot'], self::SCOPE_TABLE)) {
- // parse error
- $this->ignored = true;
- /* Otherwise: */
- } else {
- /* Clear the stack back to a table body context. */
- $this->clearStackToTableContext($clear);
- /* Act as if an end tag with the same tag name as the current
- node ("tbody", "tfoot", or "thead") had been seen, then
- reprocess the current token. */
- $this->emitToken([
- 'name' => end($this->stack)->tagName,
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- $this->emitToken($token);
- }
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html", "td", "th", "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
- ['body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'])) {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- /* Anything else */
- } else {
- /* Process the token as if the insertion mode was "in table". */
- $this->processWithRulesFor($token, self::IN_TABLE);
- }
- break;
- case self::IN_ROW:
- $clear = ['tr', 'html'];
- /* A start tag whose tag name is one of: "th", "td" */
- if ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ($token['name'] === 'th' || $token['name'] === 'td')) {
- /* Clear the stack back to a table row context. */
- $this->clearStackToTableContext($clear);
- /* Insert an HTML element for the token, then switch the insertion
- mode to "in cell". */
- $this->insertElement($token);
- $this->mode = self::IN_CELL;
- /* Insert a marker at the end of the list of active formatting
- elements. */
- $this->a_formatting[] = self::MARKER;
- /* An end tag whose tag name is "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as the token, this is a parse error.
- Ignore the token. (fragment case) */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- // Ignore.
- $this->ignored = true;
- } else {
- /* Clear the stack back to a table row context. */
- $this->clearStackToTableContext($clear);
- /* Pop the current node (which will be a tr element) from the
- stack of open elements. Switch the insertion mode to "in table
- body". */
- array_pop($this->stack);
- $this->mode = self::IN_TABLE_BODY;
- }
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
- } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
- ['caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'])) ||
- ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
- /* Act as if an end tag with the tag name "tr" had been seen, then,
- if that token wasn't ignored, reprocess the current token. */
- $this->emitToken([
- 'name' => 'tr',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- if (!$this->ignored) {
- $this->emitToken($token);
- }
- /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- in_array($token['name'], ['tbody', 'tfoot', 'thead'])) {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as the token, this is a parse error.
- Ignore the token. */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- $this->ignored = true;
- /* Otherwise: */
- } else {
- /* Otherwise, act as if an end tag with the tag name "tr" had
- been seen, then reprocess the current token. */
- $this->emitToken([
- 'name' => 'tr',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- $this->emitToken($token);
- }
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html", "td", "th" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
- ['body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'])) {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- /* Anything else */
- } else {
- /* Process the token as if the insertion mode was "in table". */
- $this->processWithRulesFor($token, self::IN_TABLE);
- }
- break;
- case self::IN_CELL:
- /* An end tag whose tag name is one of: "td", "th" */
- if ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- ($token['name'] === 'td' || $token['name'] === 'th')) {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as that of the token, then this is a
- parse error and the token must be ignored. */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- $this->ignored = true;
- /* Otherwise: */
- } else {
- /* Generate implied end tags, except for elements with the same
- tag name as the token. */
- $this->generateImpliedEndTags([$token['name']]);
- /* Now, if the current node is not an element with the same tag
- name as the token, then this is a parse error. */
- // XERROR: Implement parse error code
- /* Pop elements from this stack until an element with the same
- tag name as the token has been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== $token['name']);
- /* Clear the list of active formatting elements up to the last
- marker. */
- $this->clearTheActiveFormattingElementsUpToTheLastMarker();
- /* Switch the insertion mode to "in row". (The current node
- will be a tr element at this point.) */
- $this->mode = self::IN_ROW;
- }
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
- ['caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
- 'thead', 'tr'])) {
- /* If the stack of open elements does not have a td or th element
- in table scope, then this is a parse error; ignore the token.
- (fragment case) */
- if (!$this->elementInScope(['td', 'th'], self::SCOPE_TABLE)) {
- // parse error
- $this->ignored = true;
- /* Otherwise, close the cell (see below) and reprocess the current
- token. */
- } else {
- $this->closeCell();
- $this->emitToken($token);
- }
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
- ['body', 'caption', 'col', 'colgroup', 'html'])) {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
- "thead", "tr" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
- ['table', 'tbody', 'tfoot', 'thead', 'tr'])) {
- /* If the stack of open elements does not have a td or th element
- in table scope, then this is a parse error; ignore the token.
- (innerHTML case) */
- if (!$this->elementInScope(['td', 'th'], self::SCOPE_TABLE)) {
- // Parse error
- $this->ignored = true;
- /* Otherwise, close the cell (see below) and reprocess the current
- token. */
- } else {
- $this->closeCell();
- $this->emitToken($token);
- }
- /* Anything else */
- } else {
- /* Process the token as if the insertion mode was "in body". */
- $this->processWithRulesFor($token, self::IN_BODY);
- }
- break;
- case self::IN_SELECT:
- /* Handle the token as follows: */
- /* A character token */
- if (
- $token['type'] === HTML5_Tokenizer::CHARACTER ||
- $token['type'] === HTML5_Tokenizer::SPACECHARACTER
- ) {
- /* Append the token's character to the current node. */
- $this->insertText($token['data']);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data
- attribute set to the data given in the comment token. */
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* A start tag token whose tag name is "option" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'option') {
- /* If the current node is an option element, act as if an end tag
- with the tag name "option" had been seen. */
- if (end($this->stack)->tagName === 'option') {
- $this->emitToken([
- 'name' => 'option',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* A start tag token whose tag name is "optgroup" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'optgroup') {
- /* If the current node is an option element, act as if an end tag
- with the tag name "option" had been seen. */
- if (end($this->stack)->tagName === 'option') {
- $this->emitToken([
- 'name' => 'option',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* If the current node is an optgroup element, act as if an end tag
- with the tag name "optgroup" had been seen. */
- if (end($this->stack)->tagName === 'optgroup') {
- $this->emitToken([
- 'name' => 'optgroup',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* An end tag token whose tag name is "optgroup" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'optgroup') {
- /* First, if the current node is an option element, and the node
- immediately before it in the stack of open elements is an optgroup
- element, then act as if an end tag with the tag name "option" had
- been seen. */
- $elements_in_stack = count($this->stack);
- if ($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
- $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
- $this->emitToken([
- 'name' => 'option',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- }
- /* If the current node is an optgroup element, then pop that node
- from the stack of open elements. Otherwise, this is a parse error,
- ignore the token. */
- if (end($this->stack)->tagName === 'optgroup') {
- array_pop($this->stack);
- } else {
- // parse error
- $this->ignored = true;
- }
- /* An end tag token whose tag name is "option" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'option') {
- /* If the current node is an option element, then pop that node
- from the stack of open elements. Otherwise, this is a parse error,
- ignore the token. */
- if (end($this->stack)->tagName === 'option') {
- array_pop($this->stack);
- } else {
- // parse error
- $this->ignored = true;
- }
- /* An end tag whose tag name is "select" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'select') {
- /* If the stack of open elements does not have an element in table
- scope with the same tag name as the token, this is a parse error.
- Ignore the token. (fragment case) */
- if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- $this->ignored = true;
- // parse error
- /* Otherwise: */
- } else {
- /* Pop elements from the stack of open elements until a select
- element has been popped from the stack. */
- do {
- $node = array_pop($this->stack);
- } while ($node->tagName !== 'select');
- /* Reset the insertion mode appropriately. */
- $this->resetInsertionMode();
- }
- /* A start tag whose tag name is "select" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
- /* Parse error. Act as if the token had been an end tag with the
- tag name "select" instead. */
- $this->emitToken([
- 'name' => 'select',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) {
- // parse error
- $this->emitToken([
- 'name' => 'select',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- $this->emitToken($token);
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
- $this->processWithRulesFor($token, self::IN_HEAD);
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- // XERROR: If the current node is not the root html element, then this is a parse error.
- /* Stop parsing */
- /* Anything else */
- } else {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- }
- break;
- case self::IN_SELECT_IN_TABLE:
- if ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- in_array($token['name'], ['caption', 'table', 'tbody',
- 'tfoot', 'thead', 'tr', 'td', 'th'])) {
- // parse error
- $this->emitToken([
- 'name' => 'select',
- 'type' => HTML5_Tokenizer::ENDTAG,
- ]);
- $this->emitToken($token);
- /* An end tag whose tag name is one of: "caption", "table", "tbody",
- "tfoot", "thead", "tr", "td", "th" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- in_array($token['name'], ['caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'])) {
- /* Parse error. */
- // parse error
- /* If the stack of open elements has an element in table scope with
- the same tag name as that of the token, then act as if an end tag
- with the tag name "select" had been seen, and reprocess the token.
- Otherwise, ignore the token. */
- if ($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
- $this->emitToken([
- 'name' => 'select',
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- $this->emitToken($token);
- } else {
- $this->ignored = true;
- }
- } else {
- $this->processWithRulesFor($token, self::IN_SELECT);
- }
- break;
- case self::IN_FOREIGN_CONTENT:
- if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
- $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- $this->insertText($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // XERROR: parse error
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
- // XDOM
- end($this->stack)->namespaceURI === self::NS_SVG) {
- array_pop($this->stack);
- // a bunch of script running mumbo jumbo
- } elseif (
- ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- ((
- $token['name'] !== 'mglyph' &&
- $token['name'] !== 'malignmark' &&
- // XDOM
- end($this->stack)->namespaceURI === self::NS_MATHML &&
- in_array(end($this->stack)->tagName, ['mi', 'mo', 'mn', 'ms', 'mtext'])
- ) ||
- (
- $token['name'] === 'svg' &&
- // XDOM
- end($this->stack)->namespaceURI === self::NS_MATHML &&
- end($this->stack)->tagName === 'annotation-xml'
- ) ||
- (
- // XDOM
- end($this->stack)->namespaceURI === self::NS_SVG &&
- in_array(end($this->stack)->tagName, ['foreignObject', 'desc', 'title'])
- ) ||
- (
- // XSKETCHY && XDOM
- end($this->stack)->namespaceURI === self::NS_HTML
- ))
- ) || $token['type'] === HTML5_Tokenizer::ENDTAG
- ) {
- $this->processWithRulesFor($token, $this->secondary_mode);
- /* If, after doing so, the insertion mode is still "in foreign
- * content", but there is no element in scope that has a namespace
- * other than the HTML namespace, switch the insertion mode to the
- * secondary insertion mode. */
- if ($this->mode === self::IN_FOREIGN_CONTENT) {
- $found = false;
- // this basically duplicates elementInScope()
- for ($i = count($this->stack) - 1; $i >= 0; $i--) {
- // XDOM
- $node = $this->stack[$i];
- if ($node->namespaceURI !== self::NS_HTML) {
- $found = true;
- break;
- } elseif (in_array($node->tagName, ['table', 'html',
- 'applet', 'caption', 'td', 'th', 'button', 'marquee',
- 'object']) || ($node->tagName === 'foreignObject' &&
- $node->namespaceURI === self::NS_SVG)) {
- break;
- }
- }
- if (!$found) {
- $this->mode = $this->secondary_mode;
- }
- }
- } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
- $token['type'] === HTML5_Tokenizer::STARTTAG &&
- (in_array($token['name'], ['b', "big", "blockquote", "body", "br",
- "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
- "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
- "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
- "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
- "var"]) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
- $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
- // XERROR: parse error
- do {
- $node = array_pop($this->stack);
- // XDOM
- } while ($node->namespaceURI !== self::NS_HTML);
- $this->stack[] = $node;
- $this->mode = $this->secondary_mode;
- $this->emitToken($token);
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
- static $svg_lookup = [
- 'altglyph' => 'altGlyph',
- 'altglyphdef' => 'altGlyphDef',
- 'altglyphitem' => 'altGlyphItem',
- 'animatecolor' => 'animateColor',
- 'animatemotion' => 'animateMotion',
- 'animatetransform' => 'animateTransform',
- 'clippath' => 'clipPath',
- 'feblend' => 'feBlend',
- 'fecolormatrix' => 'feColorMatrix',
- 'fecomponenttransfer' => 'feComponentTransfer',
- 'fecomposite' => 'feComposite',
- 'feconvolvematrix' => 'feConvolveMatrix',
- 'fediffuselighting' => 'feDiffuseLighting',
- 'fedisplacementmap' => 'feDisplacementMap',
- 'fedistantlight' => 'feDistantLight',
- 'feflood' => 'feFlood',
- 'fefunca' => 'feFuncA',
- 'fefuncb' => 'feFuncB',
- 'fefuncg' => 'feFuncG',
- 'fefuncr' => 'feFuncR',
- 'fegaussianblur' => 'feGaussianBlur',
- 'feimage' => 'feImage',
- 'femerge' => 'feMerge',
- 'femergenode' => 'feMergeNode',
- 'femorphology' => 'feMorphology',
- 'feoffset' => 'feOffset',
- 'fepointlight' => 'fePointLight',
- 'fespecularlighting' => 'feSpecularLighting',
- 'fespotlight' => 'feSpotLight',
- 'fetile' => 'feTile',
- 'feturbulence' => 'feTurbulence',
- 'foreignobject' => 'foreignObject',
- 'glyphref' => 'glyphRef',
- 'lineargradient' => 'linearGradient',
- 'radialgradient' => 'radialGradient',
- 'textpath' => 'textPath',
- ];
- // XDOM
- $current = end($this->stack);
- if ($current->namespaceURI === self::NS_MATHML) {
- $token = $this->adjustMathMLAttributes($token);
- }
- if ($current->namespaceURI === self::NS_SVG &&
- isset($svg_lookup[$token['name']])) {
- $token['name'] = $svg_lookup[$token['name']];
- }
- if ($current->namespaceURI === self::NS_SVG) {
- $token = $this->adjustSVGAttributes($token);
- }
- $token = $this->adjustForeignAttributes($token);
- $this->insertForeignElement($token, $current->namespaceURI);
- if (isset($token['self-closing'])) {
- array_pop($this->stack);
- // XERROR: acknowledge self-closing flag
- }
- }
- break;
- case self::AFTER_BODY:
- /* Handle the token as follows: */
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Process the token as it would be processed if the insertion mode
- was "in body". */
- $this->processWithRulesFor($token, self::IN_BODY);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the first element in the stack of open
- elements (the html element), with the data attribute set to the
- data given in the comment token. */
- // XDOM
- $comment = $this->dom->createComment($token['data']);
- $this->stack[0]->appendChild($comment);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* An end tag with the tag name "html" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
- /* If the parser was originally created as part of the HTML
- * fragment parsing algorithm, this is a parse error; ignore
- * the token. (fragment case) */
- $this->ignored = true;
- // XERROR: implement this
- $this->mode = self::AFTER_AFTER_BODY;
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- /* Stop parsing */
- /* Anything else */
- } else {
- /* Parse error. Set the insertion mode to "in body" and reprocess
- the token. */
- $this->mode = self::IN_BODY;
- $this->emitToken($token);
- }
- break;
- case self::IN_FRAMESET:
- /* Handle the token as follows: */
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Append the character to the current node. */
- $this->insertText($token['data']);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data
- attribute set to the data given in the comment token. */
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- /* A start tag with the tag name "frameset" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'frameset') {
- $this->insertElement($token);
- /* An end tag with the tag name "frameset" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'frameset') {
- /* If the current node is the root html element, then this is a
- parse error; ignore the token. (fragment case) */
- if (end($this->stack)->tagName === 'html') {
- $this->ignored = true;
- // Parse error
- } else {
- /* Otherwise, pop the current node from the stack of open
- elements. */
- array_pop($this->stack);
- /* If the parser was not originally created as part of the HTML
- * fragment parsing algorithm (fragment case), and the current
- * node is no longer a frameset element, then switch the
- * insertion mode to "after frameset". */
- $this->mode = self::AFTER_FRAMESET;
- }
- /* A start tag with the tag name "frame" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'frame') {
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- /* Immediately pop the current node off the stack of open elements. */
- array_pop($this->stack);
- // XERROR: Acknowledge the token's self-closing flag, if it is set.
- /* A start tag with the tag name "noframes" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'noframes') {
- /* Process the token using the rules for the "in head" insertion mode. */
- $this->processwithRulesFor($token, self::IN_HEAD);
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- // XERROR: If the current node is not the root html element, then this is a parse error.
- /* Stop parsing */
- /* Anything else */
- } else {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- }
- break;
- case self::AFTER_FRAMESET:
- /* Handle the token as follows: */
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
- if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
- /* Append the character to the current node. */
- $this->insertText($token['data']);
- /* A comment token */
- } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the current node with the data
- attribute set to the data given in the comment token. */
- $this->insertComment($token['data']);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
- // parse error
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* An end tag with the tag name "html" */
- } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
- $token['name'] === 'html') {
- $this->mode = self::AFTER_AFTER_FRAMESET;
- /* A start tag with the tag name "noframes" */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
- $token['name'] === 'noframes') {
- $this->processWithRulesFor($token, self::IN_HEAD);
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- /* Stop parsing */
- /* Anything else */
- } else {
- /* Parse error. Ignore the token. */
- $this->ignored = true;
- }
- break;
- case self::AFTER_AFTER_BODY:
- /* A comment token */
- if ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the Document object with the data
- attribute set to the data given in the comment token. */
- // XDOM
- $comment = $this->dom->createComment($token['data']);
- $this->dom->appendChild($comment);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE ||
- $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
- ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* An end-of-file token */
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- /* OMG DONE!! */
- } else {
- // parse error
- $this->mode = self::IN_BODY;
- $this->emitToken($token);
- }
- break;
- case self::AFTER_AFTER_FRAMESET:
- /* A comment token */
- if ($token['type'] === HTML5_Tokenizer::COMMENT) {
- /* Append a Comment node to the Document object with the data
- attribute set to the data given in the comment token. */
- // XDOM
- $comment = $this->dom->createComment($token['data']);
- $this->dom->appendChild($comment);
- } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE ||
- $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
- ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
- $this->processWithRulesFor($token, self::IN_BODY);
- /* An end-of-file token */
- } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
- /* OMG DONE!! */
- } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
- $this->processWithRulesFor($token, self::IN_HEAD);
- } else {
- // parse error
- }
- break;
- }
- }
- private function insertElement($token, $append = true) {
- $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
- if (!empty($token['attr'])) {
- foreach ($token['attr'] as $attr) {
- if (!$el->hasAttribute($attr['name']) && preg_match("/^[a-zA-Z_:]/", $attr['name'])) {
- $el->setAttribute($attr['name'], $attr['value']);
- }
- }
- }
- if ($append) {
- $this->appendToRealParent($el);
- $this->stack[] = $el;
- }
- return $el;
- }
- /**
- * @param $data
- */
- private function insertText($data) {
- if ($data === '') {
- return;
- }
- if ($this->ignore_lf_token) {
- if ($data[0] === "\n") {
- $data = substr($data, 1);
- if ($data === false) {
- return;
- }
- }
- }
- $text = $this->dom->createTextNode($data);
- $this->appendToRealParent($text);
- }
- /**
- * @param $data
- */
- private function insertComment($data) {
- $comment = $this->dom->createComment($data);
- $this->appendToRealParent($comment);
- }
- /**
- * @param $node
- */
- private function appendToRealParent($node) {
- // this is only for the foster_parent case
- /* If the current node is a table, tbody, tfoot, thead, or tr
- element, then, whenever a node would be inserted into the current
- node, it must instead be inserted into the foster parent element. */
- if (
- !$this->foster_parent ||
- !in_array(
- end($this->stack)->tagName,
- ['table', 'tbody', 'tfoot', 'thead', 'tr']
- )
- ) {
- end($this->stack)->appendChild($node);
- } else {
- $this->fosterParent($node);
- }
- }
- /**
- * @param $el
- * @param int $scope
- * @return bool|null
- */
- private function elementInScope($el, $scope = self::SCOPE) {
- if (is_array($el)) {
- foreach($el as $element) {
- if ($this->elementInScope($element, $scope)) {
- return true;
- }
- }
- return false;
- }
- $leng = count($this->stack);
- for ($n = 0; $n < $leng; $n++) {
- /* 1. Initialise node to be the current node (the bottommost node of
- the stack). */
- $node = $this->stack[$leng - 1 - $n];
- if ($node->tagName === $el) {
- /* 2. If node is the target node, terminate in a match state. */
- return true;
- // We've expanded the logic for these states a little differently;
- // Hixie's refactoring into "specific scope" is more general, but
- // this "gets the job done"
- // these are the common states for all scopes
- } elseif ($node->tagName === 'table' || $node->tagName === 'html') {
- return false;
- // these are valid for "in scope" and "in list item scope"
- } elseif ($scope !== self::SCOPE_TABLE &&
- (in_array($node->tagName, ['applet', 'caption', 'td',
- 'th', 'button', 'marquee', 'object']) ||
- $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
- return false;
- // these are valid for "in list item scope"
- } elseif ($scope === self::SCOPE_LISTITEM && in_array($node->tagName, ['ol', 'ul'])) {
- return false;
- }
- /* Otherwise, set node to the previous entry in the stack of open
- elements and return to step 2. (This will never fail, since the loop
- will always terminate in the previous step if the top of the stack
- is reached.) */
- }
- // To fix warning. This never happens or should return true/false
- return null;
- }
- /**
- * @return bool
- */
- private function reconstructActiveFormattingElements() {
- /* 1. If there are no entries in the list of active formatting elements,
- then there is nothing to reconstruct; stop this algorithm. */
- $formatting_elements = count($this->a_formatting);
- if ($formatting_elements === 0) {
- return false;
- }
- /* 3. Let entry be the last (most recently added) element in the list
- of active formatting elements. */
- $entry = end($this->a_formatting);
- /* 2. If the last (most recently added) entry in the list of active
- formatting elements is a marker, or if it is an element that is in the
- stack of open elements, then there is nothing to reconstruct; stop this
- algorithm. */
- if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
- return false;
- }
- for ($a = $formatting_elements - 1; $a >= 0; true) {
- /* 4. If there are no entries before entry in the list of active
- formatting elements, then jump to step 8. */
- if ($a === 0) {
- $step_seven = false;
- break;
- }
- /* 5. Let entry be the entry one earlier than entry in the list of
- active formatting elements. */
- $a--;
- $entry = $this->a_formatting[$a];
- /* 6. If entry is neither a marker nor an element that is also in
- thetack of open elements, go to step 4. */
- if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
- break;
- }
- }
- while (true) {
- /* 7. Let entry be the element one later than entry in the list of
- active formatting elements. */
- if (isset($step_seven) && $step_seven === true) {
- $a++;
- $entry = $this->a_formatting[$a];
- }
- /* 8. Perform a shallow clone of the element entry to obtain clone. */
- $clone = $entry->cloneNode();
- /* 9. Append clone to the current node and push it onto the stack
- of open elements so that it is the new current node. */
- $this->appendToRealParent($clone);
- $this->stack[] = $clone;
- /* 10. Replace the entry for entry in the list with an entry for
- clone. */
- $this->a_formatting[$a] = $clone;
- /* 11. If the entry for clone in the list of active formatting
- elements is not the last entry in the list, return to step 7. */
- if (end($this->a_formatting) !== $clone) {
- $step_seven = true;
- } else {
- break;
- }
- }
- // Return value not in use ATM. Would just make sense to also return true here.
- return true;
- }
- /**
- *
- */
- private function clearTheActiveFormattingElementsUpToTheLastMarker() {
- /* When the steps below require the UA to clear the list of active
- formatting elements up to the last marker, the UA must perform the
- following steps: */
- while (true) {
- /* 1. Let entry be the last (most recently added) entry in the list
- of active formatting elements. */
- $entry = end($this->a_formatting);
- /* 2. Remove entry from the list of active formatting elements. */
- array_pop($this->a_formatting);
- /* 3. If entry was a marker, then stop the algorithm at this point.
- The list has been cleared up to the last marker. */
- if ($entry === self::MARKER) {
- break;
- }
- }
- }
- /**
- * @param array $exclude
- */
- private function generateImpliedEndTags($exclude = []) {
- /* When the steps below require the UA to generate implied end tags,
- * then, while the current node is a dc element, a dd element, a ds
- * element, a dt element, an li element, an option element, an optgroup
- * element, a p element, an rp element, or an rt element, the UA must
- * pop the current node off the stack of open elements. */
- $node = end($this->stack);
- $elements = array_diff(['dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'], $exclude);
- while (in_array(end($this->stack)->tagName, $elements)) {
- array_pop($this->stack);
- }
- }
- /**
- * @param $node
- * @return int
- */
- private function getElementCategory($node) {
- if (!is_object($node)) {
- debug_print_backtrace();
- }
- $name = $node->tagName;
- if (in_array($name, $this->special)) {
- return self::SPECIAL;
- } elseif (in_array($name, $this->scoping)) {
- return self::SCOPING;
- } elseif (in_array($name, $this->formatting)) {
- return self::FORMATTING;
- } else {
- return self::PHRASING;
- }
- }
- /**
- * @param $elements
- */
- private function clearStackToTableContext($elements) {
- /* When the steps above require the UA to clear the stack back to a
- table context, it means that the UA must, while the current node is not
- a table element or an html element, pop elements from the stack of open
- elements. */
- while (true) {
- $name = end($this->stack)->tagName;
- if (in_array($name, $elements)) {
- break;
- } else {
- array_pop($this->stack);
- }
- }
- }
- /**
- * @param null $context
- */
- private function resetInsertionMode($context = null) {
- /* 1. Let last be false. */
- $last = false;
- $leng = count($this->stack);
- for ($n = $leng - 1; $n >= 0; $n--) {
- /* 2. Let node be the last node in the stack of open elements. */
- $node = $this->stack[$n];
- /* 3. If node is the first node in the stack of open elements, then
- * set last to true and set node to the context element. (fragment
- * case) */
- if ($this->stack[0]->isSameNode($node)) {
- $last = true;
- $node = $context;
- }
- /* 4. If node is a select element, then switch the insertion mode to
- "in select" and abort these steps. (fragment case) */
- if ($node->tagName === 'select') {
- $this->mode = self::IN_SELECT;
- break;
- /* 5. If node is a td or th element, then switch the insertion mode
- to "in cell" and abort these steps. */
- } elseif ($node->tagName === 'td' || $node->nodeName === 'th') {
- $this->mode = self::IN_CELL;
- break;
- /* 6. If node is a tr element, then switch the insertion mode to
- "in row" and abort these steps. */
- } elseif ($node->tagName === 'tr') {
- $this->mode = self::IN_ROW;
- break;
- /* 7. If node is a tbody, thead, or tfoot element, then switch the
- insertion mode to "in table body" and abort these steps. */
- } elseif (in_array($node->tagName, ['tbody', 'thead', 'tfoot'])) {
- $this->mode = self::IN_TABLE_BODY;
- break;
- /* 8. If node is a caption element, then switch the insertion mode
- to "in caption" and abort these steps. */
- } elseif ($node->tagName === 'caption') {
- $this->mode = self::IN_CAPTION;
- break;
- /* 9. If node is a colgroup element, then switch the insertion mode
- to "in column group" and abort these steps. (innerHTML case) */
- } elseif ($node->tagName === 'colgroup') {
- $this->mode = self::IN_COLUMN_GROUP;
- break;
- /* 10. If node is a table element, then switch the insertion mode
- to "in table" and abort these steps. */
- } elseif ($node->tagName === 'table') {
- $this->mode = self::IN_TABLE;
- break;
- /* 11. If node is an element from the MathML namespace or the SVG
- * namespace, then switch the insertion mode to "in foreign
- * content", let the secondary insertion mode be "in body", and
- * abort these steps. */
- } elseif ($node->namespaceURI === self::NS_SVG ||
- $node->namespaceURI === self::NS_MATHML) {
- $this->mode = self::IN_FOREIGN_CONTENT;
- $this->secondary_mode = self::IN_BODY;
- break;
- /* 12. If node is a head element, then switch the insertion mode
- to "in body" ("in body"! not "in head"!) and abort these steps.
- (fragment case) */
- } elseif ($node->tagName === 'head') {
- $this->mode = self::IN_BODY;
- break;
- /* 13. If node is a body element, then switch the insertion mode to
- "in body" and abort these steps. */
- } elseif ($node->tagName === 'body') {
- $this->mode = self::IN_BODY;
- break;
- /* 14. If node is a frameset element, then switch the insertion
- mode to "in frameset" and abort these steps. (fragment case) */
- } elseif ($node->tagName === 'frameset') {
- $this->mode = self::IN_FRAMESET;
- break;
- /* 15. If node is an html element, then: if the head element
- pointer is null, switch the insertion mode to "before head",
- otherwise, switch the insertion mode to "after head". In either
- case, abort these steps. (fragment case) */
- } elseif ($node->tagName === 'html') {
- $this->mode = ($this->head_pointer === null)
- ? self::BEFORE_HEAD
- : self::AFTER_HEAD;
- break;
- /* 16. If last is true, then set the insertion mode to "in body"
- and abort these steps. (fragment case) */
- } elseif ($last) {
- $this->mode = self::IN_BODY;
- break;
- }
- }
- }
- /**
- *
- */
- private function closeCell() {
- /* If the stack of open elements has a td or th element in table scope,
- then act as if an end tag token with that tag name had been seen. */
- foreach (['td', 'th'] as $cell) {
- if ($this->elementInScope($cell, self::SCOPE_TABLE)) {
- $this->emitToken([
- 'name' => $cell,
- 'type' => HTML5_Tokenizer::ENDTAG
- ]);
- break;
- }
- }
- }
- /**
- * @param $token
- * @param $mode
- */
- private function processWithRulesFor($token, $mode) {
- /* "using the rules for the m insertion mode", where m is one of these
- * modes, the user agent must use the rules described under the m
- * insertion mode's section, but must leave the insertion mode
- * unchanged unless the rules in m themselves switch the insertion mode
- * to a new value. */
- $this->emitToken($token, $mode);
- }
- /**
- * @param $token
- */
- private function insertCDATAElement($token) {
- $this->insertElement($token);
- $this->original_mode = $this->mode;
- $this->mode = self::IN_CDATA_RCDATA;
- $this->content_model = HTML5_Tokenizer::CDATA;
- }
- /**
- * @param $token
- */
- private function insertRCDATAElement($token) {
- $this->insertElement($token);
- $this->original_mode = $this->mode;
- $this->mode = self::IN_CDATA_RCDATA;
- $this->content_model = HTML5_Tokenizer::RCDATA;
- }
- /**
- * @param $token
- * @param $key
- * @return bool
- */
- private function getAttr($token, $key) {
- if (!isset($token['attr'])) {
- return false;
- }
- $ret = false;
- foreach ($token['attr'] as $keypair) {
- if ($keypair['name'] === $key) {
- $ret = $keypair['value'];
- }
- }
- return $ret;
- }
- /**
- * @return mixed
- */
- private function getCurrentTable() {
- /* The current table is the last table element in the stack of open
- * elements, if there is one. If there is no table element in the stack
- * of open elements (fragment case), then the current table is the
- * first element in the stack of open elements (the html element). */
- for ($i = count($this->stack) - 1; $i >= 0; $i--) {
- if ($this->stack[$i]->tagName === 'table') {
- return $this->stack[$i];
- }
- }
- return $this->stack[0];
- }
- /**
- * @return mixed
- */
- private function getFosterParent() {
- /* The foster parent element is the parent element of the last
- table element in the stack of open elements, if there is a
- table element and it has such a parent element. If there is no
- table element in the stack of open elements (innerHTML case),
- then the foster parent element is the first element in the
- stack of open elements (the html element). Otherwise, if there
- is a table element in the stack of open elements, but the last
- table element in the stack of open elements has no parent, or
- its parent node is not an element, then the foster parent
- element is the element before the last table element in the
- stack of open elements. */
- for ($n = count($this->stack) - 1; $n >= 0; $n--) {
- if ($this->stack[$n]->tagName === 'table') {
- $table = $this->stack[$n];
- break;
- }
- }
- if (isset($table) && $table->parentNode !== null) {
- return $table->parentNode;
- } elseif (!isset($table)) {
- return $this->stack[0];
- } elseif (isset($table) && ($table->parentNode === null ||
- $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
- return $this->stack[$n - 1];
- }
- return null;
- }
- /**
- * @param $node
- */
- public function fosterParent($node) {
- $foster_parent = $this->getFosterParent();
- $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
- /* When a node node is to be foster parented, the node node must be
- * be inserted into the foster parent element. */
- /* If the foster parent element is the parent element of the last table
- * element in the stack of open elements, then node must be inserted
- * immediately before the last table element in the stack of open
- * elements in the foster parent element; otherwise, node must be
- * appended to the foster parent element. */
- if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
- $foster_parent->insertBefore($node, $table);
- } else {
- $foster_parent->appendChild($node);
- }
- }
- /**
- * For debugging, prints the stack
- */
- private function printStack() {
- $names = [];
- foreach ($this->stack as $i => $element) {
- $names[] = $element->tagName;
- }
- echo " -> stack [" . implode(', ', $names) . "]\n";
- }
- /**
- * For debugging, prints active formatting elements
- */
- private function printActiveFormattingElements() {
- if (!$this->a_formatting) {
- return;
- }
- $names = [];
- foreach ($this->a_formatting as $node) {
- if ($node === self::MARKER) {
- $names[] = 'MARKER';
- } else {
- $names[] = $node->tagName;
- }
- }
- echo " -> active formatting [" . implode(', ', $names) . "]\n";
- }
- /**
- * @return bool
- */
- public function currentTableIsTainted() {
- return !empty($this->getCurrentTable()->tainted);
- }
- /**
- * Sets up the tree constructor for building a fragment.
- *
- * @param null $context
- */
- public function setupContext($context = null) {
- $this->fragment = true;
- if ($context) {
- $context = $this->dom->createElementNS(self::NS_HTML, $context);
- /* 4.1. Set the HTML parser's tokenization stage's content model
- * flag according to the context element, as follows: */
- switch ($context->tagName) {
- case 'title': case 'textarea':
- $this->content_model = HTML5_Tokenizer::RCDATA;
- break;
- case 'style': case 'script': case 'xmp': case 'iframe':
- case 'noembed': case 'noframes':
- $this->content_model = HTML5_Tokenizer::CDATA;
- break;
- case 'noscript':
- // XSCRIPT: assuming scripting is enabled
- $this->content_model = HTML5_Tokenizer::CDATA;
- break;
- case 'plaintext':
- $this->content_model = HTML5_Tokenizer::PLAINTEXT;
- break;
- }
- /* 4.2. Let root be a new html element with no attributes. */
- $root = $this->dom->createElementNS(self::NS_HTML, 'html');
- $this->root = $root;
- /* 4.3 Append the element root to the Document node created above. */
- $this->dom->appendChild($root);
- /* 4.4 Set up the parser's stack of open elements so that it
- * contains just the single element root. */
- $this->stack = [$root];
- /* 4.5 Reset the parser's insertion mode appropriately. */
- $this->resetInsertionMode($context);
- /* 4.6 Set the parser's form element pointer to the nearest node
- * to the context element that is a form element (going straight up
- * the ancestor chain, and including the element itself, if it is a
- * form element), or, if there is no such form element, to null. */
- $node = $context;
- do {
- if ($node->tagName === 'form') {
- $this->form_pointer = $node;
- break;
- }
- } while ($node = $node->parentNode);
- }
- }
- /**
- * @param $token
- * @return mixed
- */
- public function adjustMathMLAttributes($token) {
- foreach ($token['attr'] as &$kp) {
- if ($kp['name'] === 'definitionurl') {
- $kp['name'] = 'definitionURL';
- }
- }
- return $token;
- }
- /**
- * @param $token
- * @return mixed
- */
- public function adjustSVGAttributes($token) {
- static $lookup = [
- 'attributename' => 'attributeName',
- 'attributetype' => 'attributeType',
- 'basefrequency' => 'baseFrequency',
- 'baseprofile' => 'baseProfile',
- 'calcmode' => 'calcMode',
- 'clippathunits' => 'clipPathUnits',
- 'contentscripttype' => 'contentScriptType',
- 'contentstyletype' => 'contentStyleType',
- 'diffuseconstant' => 'diffuseConstant',
- 'edgemode' => 'edgeMode',
- 'externalresourcesrequired' => 'externalResourcesRequired',
- 'filterres' => 'filterRes',
- 'filterunits' => 'filterUnits',
- 'glyphref' => 'glyphRef',
- 'gradienttransform' => 'gradientTransform',
- 'gradientunits' => 'gradientUnits',
- 'kernelmatrix' => 'kernelMatrix',
- 'kernelunitlength' => 'kernelUnitLength',
- 'keypoints' => 'keyPoints',
- 'keysplines' => 'keySplines',
- 'keytimes' => 'keyTimes',
- 'lengthadjust' => 'lengthAdjust',
- 'limitingconeangle' => 'limitingConeAngle',
- 'markerheight' => 'markerHeight',
- 'markerunits' => 'markerUnits',
- 'markerwidth' => 'markerWidth',
- 'maskcontentunits' => 'maskContentUnits',
- 'maskunits' => 'maskUnits',
- 'numoctaves' => 'numOctaves',
- 'pathlength' => 'pathLength',
- 'patterncontentunits' => 'patternContentUnits',
- 'patterntransform' => 'patternTransform',
- 'patternunits' => 'patternUnits',
- 'pointsatx' => 'pointsAtX',
- 'pointsaty' => 'pointsAtY',
- 'pointsatz' => 'pointsAtZ',
- 'preservealpha' => 'preserveAlpha',
- 'preserveaspectratio' => 'preserveAspectRatio',
- 'primitiveunits' => 'primitiveUnits',
- 'refx' => 'refX',
- 'refy' => 'refY',
- 'repeatcount' => 'repeatCount',
- 'repeatdur' => 'repeatDur',
- 'requiredextensions' => 'requiredExtensions',
- 'requiredfeatures' => 'requiredFeatures',
- 'specularconstant' => 'specularConstant',
- 'specularexponent' => 'specularExponent',
- 'spreadmethod' => 'spreadMethod',
- 'startoffset' => 'startOffset',
- 'stddeviation' => 'stdDeviation',
- 'stitchtiles' => 'stitchTiles',
- 'surfacescale' => 'surfaceScale',
- 'systemlanguage' => 'systemLanguage',
- 'tablevalues' => 'tableValues',
- 'targetx' => 'targetX',
- 'targety' => 'targetY',
- 'textlength' => 'textLength',
- 'viewbox' => 'viewBox',
- 'viewtarget' => 'viewTarget',
- 'xchannelselector' => 'xChannelSelector',
- 'ychannelselector' => 'yChannelSelector',
- 'zoomandpan' => 'zoomAndPan',
- ];
- foreach ($token['attr'] as &$kp) {
- if (isset($lookup[$kp['name']])) {
- $kp['name'] = $lookup[$kp['name']];
- }
- }
- return $token;
- }
- /**
- * @param $token
- * @return mixed
- */
- public function adjustForeignAttributes($token) {
- static $lookup = [
- 'xlink:actuate' => ['xlink', 'actuate', self::NS_XLINK],
- 'xlink:arcrole' => ['xlink', 'arcrole', self::NS_XLINK],
- 'xlink:href' => ['xlink', 'href', self::NS_XLINK],
- 'xlink:role' => ['xlink', 'role', self::NS_XLINK],
- 'xlink:show' => ['xlink', 'show', self::NS_XLINK],
- 'xlink:title' => ['xlink', 'title', self::NS_XLINK],
- 'xlink:type' => ['xlink', 'type', self::NS_XLINK],
- 'xml:base' => ['xml', 'base', self::NS_XML],
- 'xml:lang' => ['xml', 'lang', self::NS_XML],
- 'xml:space' => ['xml', 'space', self::NS_XML],
- 'xmlns' => [null, 'xmlns', self::NS_XMLNS],
- 'xmlns:xlink' => ['xmlns', 'xlink', self::NS_XMLNS],
- ];
- foreach ($token['attr'] as &$kp) {
- if (isset($lookup[$kp['name']])) {
- $kp['name'] = $lookup[$kp['name']];
- }
- }
- return $token;
- }
- /**
- * @param $token
- * @param $namespaceURI
- */
- public function insertForeignElement($token, $namespaceURI) {
- $el = $this->dom->createElementNS($namespaceURI, $token['name']);
- if (!empty($token['attr'])) {
- foreach ($token['attr'] as $kp) {
- $attr = $kp['name'];
- if (is_array($attr)) {
- $ns = $attr[2];
- $attr = $attr[1];
- } else {
- $ns = self::NS_HTML;
- }
- if (!$el->hasAttributeNS($ns, $attr)) {
- // XSKETCHY: work around godawful libxml bug
- if ($ns === self::NS_XLINK) {
- $el->setAttribute('xlink:'.$attr, $kp['value']);
- } elseif ($ns === self::NS_HTML) {
- // Another godawful libxml bug
- $el->setAttribute($attr, $kp['value']);
- } else {
- $el->setAttributeNS($ns, $attr, $kp['value']);
- }
- }
- }
- }
- $this->appendToRealParent($el);
- $this->stack[] = $el;
- // XERROR: see below
- /* If the newly created element has an xmlns attribute in the XMLNS
- * namespace whose value is not exactly the same as the element's
- * namespace, that is a parse error. Similarly, if the newly created
- * element has an xmlns:xlink attribute in the XMLNS namespace whose
- * value is not the XLink Namespace, that is a parse error. */
- }
- /**
- * @return DOMDocument|DOMNodeList
- */
- public function save() {
- $this->dom->normalize();
- if (!$this->fragment) {
- return $this->dom;
- } else {
- if ($this->root) {
- return $this->root->childNodes;
- } else {
- return $this->dom->childNodes;
- }
- }
- }
- }
|