Файловый менеджер - Редактировать - /var/www/html/ParserTests.zip
Ðазад
PK ! y�G G TestMode.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; /** * Represents a parser test mode, that is, a certain way of executing a * parser tests and evaluating the result. * * As a trivial example, a parser test will typically have a * "wikitext" section and an "html" section. Two possible modes for * evaluating the test are "wt2html" (where you programatically * convert the "wikitext" section to HTML and verify that the result * matches the "html" section, after normalization) and "html2wt" * (where you programmatically convert the "html" section back to * wikitext and verify that the result matches the "wikitext" section, * after normalization). */ class TestMode { /** Valid test modes, as keys for efficient query/set intersection. */ public const TEST_MODES = [ 'legacy' => true, // wt2html with legacy parser 'wt2html' => true, 'wt2wt' => true, 'html2html' => true, 'html2wt' => true, 'selser' => true, ]; /** * Selected test mode, typically one of the values from self::TEST_MODES. * @var string */ public $mode; /** * The "selser" test mode can operate with an explicit changetree * provided in this property. * @var ?array */ public $changetree; /** * Create a new test mode * @param string $mode The test mode. An external caller should use * one of `self::TEST_MODES`, although ParserTestRunner uses a few * additional values internally. * @param ?array $changetree The specific changes to apply in selser test * mode. */ public function __construct( string $mode, ?array $changetree = null ) { $this->mode = $mode; $this->changetree = $changetree; } /** * Helper function: returns true if this test mode is 'legacy'; that is, * is this test to run with the legacy parser. * @return bool */ public function isLegacy() { return $this->mode === 'legacy'; } /** * Helper function: returns true if we are running this test to cache some info * for use in later tests. * @return bool */ public function isCachingMode() { return $this->mode === 'cache'; } /** * Returns a string representation of this test mode, which can also * be used as an array key or for human-friendly output. * @return string */ public function __toString(): string { $s = $this->mode; if ( $this->changetree !== null ) { $s .= ' ' . json_encode( $this->changetree, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ); } return $s; } /** * Helper function: filter a given set of options against the * TEST_MODES. Optionally ensure that all modes are returned if * none are explicitly set. * * @param string[] $options The user-specified test modes * @param bool $ifEmptySetAll If true, ensure that the result always * includes at least one set test mode by setting all available test * modes if the passed $options array does not contain any. * @return string[] A filtered set of test modes */ public static function requestedTestModes( array $options, bool $ifEmptySetAll = true ) { if ( $ifEmptySetAll ) { $allModes = true; foreach ( self::TEST_MODES as $mode => $ignore ) { if ( $options[$mode] ?? false ) { $allModes = false; } } if ( $allModes ) { return array_keys( self::TEST_MODES ); } } return array_keys( array_intersect_assoc( $options, self::TEST_MODES ) ); } } PK ! C3; ; Article.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; /** * Represents a parser test */ class Article extends Item { /** @var string */ public $title; /** @var string */ public $text; /** * @param array $articleProps key-value mapping of properties * @param ?string $comment Optional comment describing the article */ public function __construct( array $articleProps, ?string $comment = null ) { parent::__construct( $articleProps, $comment ); $this->title = $articleProps['title']; $this->text = $articleProps['text']; } } PK ! ��!L L Grammar.pegphpnu �Iw�� /** * PEG.js grammar for reading MediaWiki parser tests files * 2011-07-20 Brion Vibber <brion@pobox.com> */ { /* File-scope initializer */ namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\Utils\PHPUtils; } { /** @var string */ private $filename = ''; /** @var int */ private $lineNum = 1; /** * @param string $filename * @return array */ public static function load( string $filename ) { $g = new Grammar(); $g->filename = $filename; $contents = file_get_contents( $filename ) ?: ''; if ( substr( $contents, -1 ) !== "\n" ) { # ensure that the file is terminated with a newline # to match `end_section` rule (and other uses of `eol`) $contents .= "\n"; } return $g->parse( $contents ); } private function addLines( int $lineStart, array $item ) { $item['filename'] = $this->filename; $item['lineNumStart'] = $lineStart; $item['lineNumEnd'] = $this->lineNum; return $item; } } testfile = comment_or_blank_line* format? comment_or_blank_line* testfile_options? lined_chunk+ testfile_options = l:("" { return $this->lineNum; }) sec:option_section end_section { return $this->addLines( $l, $sec ); } /* Line number bookkeeping. * Be careful about backtracking after you successfully match this production. */ eol = nl:"\n" { $this->lineNum++; return $nl; } lined_chunk = l:("" { return $this->lineNum; }) c:chunk { return $this->addLines($l, $c); } whitespace = [ \t]+ ws = whitespace rest_of_line = c:([^\n]*) eol { return implode($c); } line = (!"!!") line:rest_of_line { return $line; } text = lines:line* { return implode("\n", $lines); } chunk = comment_or_blank_line / article / test / hooks / functionhooks /* Final fallback production is a catch-all, since some ancient * parserTest files have garbage text between tests and in the old * hand-coded parser test parser this was just ignored as a comment. * We'll go ahead and parse it, then emit a warning in TestFileReader. */ / l: line { return [ 'type' => 'line', 'text' => $l ]; } format = l:("" { return $this->lineNum; }) "!!" ws? version_keyword ws+ v:$([0-9]+) rest_of_line { return $this->addLines( $l, [ 'type' => 'version', 'text' => $v ] ); } version_keyword = 'version'i comment = "#" text:rest_of_line { return [ 'type' => 'comment', 'text' => $text ]; } comment_or_blank_line = comment / ws? nl:eol { return [ 'type' => 'line', 'text' => $nl ]; } article = start_article title:line start_text text:text ( end_article / end_section ) { return [ 'type' => 'article', 'title' => $title, 'text' => $text ]; } start_article = "!!" ws? "article" ws? eol start_text = "!!" ws? "text" ws? eol end_article = "!!" ws? "endarticle" ws? eol // function hooks functionhooks = start_functionhooks text:text ( end_functionhooks / end_section ) { return [ 'type' => 'functionhooks', 'text' => $text ]; } start_functionhooks = "!!" ws? "functionhooks" ":"? ws? eol end_functionhooks = "!!" ws? "endfunctionhooks" ":"? ws? eol test = start_test testName:text sections:(section / config_section / option_section)* end_section { $test = [ 'type' => 'test', 'testName' => $testName ]; foreach ( $sections as $section ) { $test[$section['name']] = $section['text']; } // pegjs parser handles item options as follows: // item option value of item.options.parsoid // <none> undefined // parsoid "" // parsoid=wt2html "wt2html" // parsoid=wt2html,wt2wt ["wt2html","wt2wt"] // parsoid={"modes":["wt2wt"]} {modes:['wt2wt']} // treat 'parsoid=xxx,yyy' in options section as shorthand for // 'parsoid={modes:["xxx","yyy"]}' if ( isset($test['options']['parsoid'] ) ) { if ($test['options']['parsoid'] === '') { $test['options']['parsoid'] = []; } if ( is_string( $test['options']['parsoid'] ) ) { $test['options']['parsoid'] = [ $test['options']['parsoid'] ]; } if ( is_array( $test['options']['parsoid'] ) && isset( $test['options']['parsoid'][0] ) && !isset( $test['options']['parsoid']['modes'] ) ) { $test['options']['parsoid'] = [ 'modes' => $test['options']['parsoid'] ]; } } return $test; } section = "!!" ws? (!"test") (!"end") (!"options") (!"config") name:(c:[^ \t\r\n]+ { return implode( $c ); }) rest_of_line text:text { return [ 'name' => $name, 'text' => $text ]; } config_section = "!!" ws? "config" ws? eol items:config_list? { $c = []; if ( $items && count($items) > 0 ) { foreach ( $items as $item ) { $c[$item['k']] = $item['v']; } } return [ 'type' => 'section', 'name' => 'config', 'text' => $c ]; } option_section = "!!" ws? "options" ws? eol opts:option_list? { $o = []; if ( $opts && count($opts) > 0 ) { foreach ( $opts as $opt ) { $o[$opt['k']] = $opt['v']; } } return [ 'type' => 'section', 'name' => 'options', 'text' => $o ]; } config_list = c:a_config_line eol+ rest:config_list? { $result = [ $c ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } option_list = o:an_option ([ \t] / eol)+ rest:option_list? { $result = [ $o ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } a_config_line = k:option_name v:config_value { return [ 'k' => $k, 'v' => $v ]; } config_value = ws? "=" ws? v:valid_json_value { return $v; } valid_json_value = v:$(quoted_value / plain_value / array_value / json_value) &{ // validate this as acceptable JSON // (this ensures that wikipeg throws a syntax error if // the JSON is invalid; note that PHP 7.3 would allow us // to use JSON_THROW_ON_ERROR instead of json_last_error()...) $ignore = json_decode($v, true, 100); return (json_last_error() === JSON_ERROR_NONE); } { // The value is valid JSON; return the decoded value. return json_decode($v, true); } // from PHP parser in tests/parser/parserTest.inc:parseOptions() // foo // foo=bar // foo="bar baz" // foo=[[bar baz]] // foo={...json...} // foo=bar,"baz quux",[[bat]] an_option = k:option_name v:option_value? { return [ 'k' => strtolower( $k ), 'v' => $v ?? '' ]; } option_name = c:[^ \t\n=!]+ { return implode($c); } option_value = ws? "=" ws? ovl:option_value_list { return count( $ovl ) === 1 ? $ovl[0] : $ovl; } option_value_list = v:an_option_value rest:( ws? "," ws? ovl:option_value_list { return $ovl; })? { $result = [ $v ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } an_option_value = v:(link_target_value / quoted_value / plain_value / json_value) { if ( $v[0] === '"' || $v[0] === '{' ) { // } is needed to make pegjs happy return PHPUtils::jsonDecode( $v ); } return $v; } link_target_value = "[[" v:[^\]\n]* "]]" { // Perhaps we should canonicalize the title? // Protect with JSON.stringify just in case the link target starts with // double-quote or open-brace. return PHPUtils::jsonEncode( implode( $v ) ); } quoted_value = [\"] v:( [^\\\"\n] / ("\\" c:[^\n] { return "\\" . $c; } ) )* [\"] { return '"' . implode( $v ) . '"'; } plain_value = v:[^ \t\n\"\'\[\]=,!\{]+ { return implode( $v ); } array_value = "[" v:( [^\"\[\]\n] / quoted_value / array_value / eol )* "]" { return "[" . implode( $v ) . "]"; } json_value = "{" v:( [^\"\{\}\n] / quoted_value / json_value / eol )* "}" { return "{" . implode( $v ) . "}"; } start_test = "!!" ws? "test" ws? eol end_section = "!!" ws? "end" ws? eol hooks = start_hooks text:text ( end_hooks / end_section ) { return [ 'type' => 'hooks', 'text' => $text ]; } start_hooks = "!!" ws? "hooks" ":"? ws? eol end_hooks = "!!" ws? "endhooks" ws? eol PK ! �7� � RawHTML.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\DOM\DocumentFragment; use Wikimedia\Parsoid\Ext\ExtensionModule; use Wikimedia\Parsoid\Ext\ExtensionTagHandler; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; class RawHTML extends ExtensionTagHandler implements ExtensionModule { /** @inheritDoc */ public function sourceToDom( ParsoidExtensionAPI $extApi, string $content, array $args ): DocumentFragment { return $extApi->htmlToDom( $content ); } /** @inheritDoc */ public function getConfig(): array { return [ 'name' => 'RawHTML', 'tags' => [ [ 'name' => 'html', 'handler' => self::class ], ], ]; } } PK ! �ff� f� TestRunner.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Closure; use Psr\Log\LoggerInterface; use Wikimedia\Assert\Assert; use Wikimedia\Bcp47Code\Bcp47CodeValue; use Wikimedia\Parsoid\Config\Api\DataAccess; use Wikimedia\Parsoid\Config\Api\PageConfig; use Wikimedia\Parsoid\Config\Env; use Wikimedia\Parsoid\Config\StubMetadataCollector; use Wikimedia\Parsoid\Core\SelectiveUpdateData; use Wikimedia\Parsoid\DOM\Document; use Wikimedia\Parsoid\DOM\Element; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; use Wikimedia\Parsoid\Mocks\MockPageConfig; use Wikimedia\Parsoid\Mocks\MockPageContent; use Wikimedia\Parsoid\Utils\ContentUtils; use Wikimedia\Parsoid\Utils\DOMCompat; use Wikimedia\Parsoid\Utils\DOMDataUtils; use Wikimedia\Parsoid\Utils\ScriptUtils; use Wikimedia\Parsoid\Utils\Title; use Wikimedia\Parsoid\Utils\Utils; use Wikimedia\Parsoid\Wt2Html\PageConfigFrame; /** * Test runner for parser tests */ class TestRunner { // Hard-code some interwiki prefixes, as is done // in ParserTestRunner::appendInterwikiSetup() in core // Note that ApiQuerySiteInfo will always expand the URL to include a // protocol, but will set 'protorel' to indicate whether its internal // form included a protocol or not. So in this file 'url' will always // have a protocol and we'll include an explicit 'protorel' field; but // in core there is no 'protorel' field and 'url' will not always have // a protocol. private const PARSER_TESTS_IWPS = [ [ 'prefix' => 'wikinvest', 'local' => true, // This url doesn't have a $1 to exercise the fix in // ConfigUtils::computeInterwikiMap 'url' => 'https://meta.wikimedia.org/wiki/Interwiki_map/discontinued#Wikinvest', 'protorel' => false ], [ 'prefix' => 'local', 'url' => 'http://example.org/wiki/$1', 'local' => true, 'localinterwiki' => true ], [ // Local interwiki that matches a namespace name (T228616) 'prefix' => 'project', 'url' => 'http://example.org/wiki/$1', 'local' => true, 'localinterwiki' => true ], [ 'prefix' => 'wikipedia', 'url' => 'http://en.wikipedia.org/wiki/$1' ], [ 'prefix' => 'meatball', // this has been updated in the live wikis, but the parser tests // expect the old value (as set in parserTest.inc:setupInterwikis()) 'url' => 'http://www.usemod.com/cgi-bin/mb.pl?$1' ], [ 'prefix' => 'memoryalpha', 'url' => 'http://www.memory-alpha.org/en/index.php/$1' ], [ 'prefix' => 'zh', 'url' => 'http://zh.wikipedia.org/wiki/$1', 'language' => "中文", 'local' => true ], [ 'prefix' => 'es', 'url' => 'http://es.wikipedia.org/wiki/$1', 'language' => "español", 'local' => true ], [ 'prefix' => 'fr', 'url' => 'http://fr.wikipedia.org/wiki/$1', 'language' => "français", 'local' => true ], [ 'prefix' => 'ru', 'url' => 'http://ru.wikipedia.org/wiki/$1', 'language' => "русский", 'local' => true ], [ 'prefix' => 'mi', 'url' => 'http://example.org/wiki/$1', // better for testing if one of the // localinterwiki prefixes is also a language 'language' => 'Test', 'local' => true, 'localinterwiki' => true ], [ 'prefix' => 'mul', 'url' => 'http://wikisource.org/wiki/$1', 'extralanglink' => true, 'linktext' => 'Multilingual', 'sitename' => 'WikiSource', 'local' => true ], // added to core's ParserTestRunner::appendInterwikiSetup() to support // Parsoid tests [T254181] [ 'prefix' => 'en', 'url' => 'http://en.wikipedia.org/wiki/$1', 'language' => 'English', 'local' => true, 'protorel' => true ], [ 'prefix' => 'stats', 'local' => true, 'url' => 'https://stats.wikimedia.org/$1' ], [ 'prefix' => 'gerrit', 'local' => true, 'url' => 'https://gerrit.wikimedia.org/$1' ], ]; /** @var bool */ private $runDisabled; /** @var bool */ private $runPHP; /** @var string */ private $offsetType; /** @var string */ private $testFileName; /** @var string */ private $testFilePath; /** @var ?string */ private $knownFailuresInfix; /** @var string */ private $knownFailuresPath; /** @var array */ private $articles; /** @var LoggerInterface */ private $defaultLogger; /** * Sets one of 'regex' or 'string' properties * - $testFilter['raw'] is the value of the filter * - if $testFilter['regex'] is true, $testFilter['raw'] is used as a regex filter. * - If $testFilter['string'] is true, $testFilter['raw'] is used as a plain string filter. * @var ?array */ private $testFilter; /** @var Test[] */ private $testCases; /** @var Stats */ private $stats; /** @var MockApiHelper */ private $mockApi; /** @var SiteConfig */ private $siteConfig; /** @var DataAccess */ private $dataAccess; /** * Global cross-test env object only to be used for title processing while * reading the parserTests file. * * Every test constructs its own private $env object. * * @var Env */ private $dummyEnv; /** * Options needed to construct the per-test private $env object * @var array */ private $envOptions; /** * @param string $testFilePath * @param ?string $knownFailuresInfix * @param string[] $modes */ public function __construct( string $testFilePath, ?string $knownFailuresInfix, array $modes ) { $this->testFilePath = $testFilePath; $this->knownFailuresInfix = $knownFailuresInfix; $testFilePathInfo = pathinfo( $testFilePath ); $this->testFileName = $testFilePathInfo['basename']; $newModes = []; $modes[] = 'metadata'; foreach ( $modes as $mode ) { $newModes[$mode] = new Stats(); $newModes[$mode]->failList = []; $newModes[$mode]->result = ''; // XML reporter uses this. } $this->stats = new Stats(); $this->stats->modes = $newModes; $this->mockApi = new MockApiHelper( null, fn ( $title )=>$this->normalizeTitleKey( $title ) ); $this->siteConfig = new SiteConfig( $this->mockApi, [] ); $this->dataAccess = new DataAccess( $this->mockApi, $this->siteConfig, [ 'stripProto' => false ] ); $this->dummyEnv = new Env( $this->siteConfig, // Unused; needed to satisfy Env signature requirements new MockPageConfig( $this->siteConfig, [], new MockPageContent( [ 'main' => '' ] ) ), // Unused; needed to satisfy Env signature requirements $this->dataAccess, // Unused; needed to satisfy Env signature requirements new StubMetadataCollector( $this->siteConfig ) ); // Init interwiki map to parser tests info. // This suppresses interwiki info from cached configs. $this->siteConfig->setupInterwikiMap( self::PARSER_TESTS_IWPS ); $this->siteConfig->reset(); } private function newEnv( Test $test, string $wikitext ): Env { $title = $this->dummyEnv->makeTitleFromURLDecodedStr( $test->pageName() ); $opts = [ 'title' => $title, 'pageContent' => $wikitext, 'pageLanguage' => $this->siteConfig->langBcp47(), 'pageLanguagedir' => $this->siteConfig->rtl() ? 'rtl' : 'ltr' ]; $pageConfig = new PageConfig( null, $this->siteConfig, $opts ); $env = new Env( $this->siteConfig, $pageConfig, $this->dataAccess, new StubMetadataCollector( $this->siteConfig ), $this->envOptions ); $env->pageCache = $this->articles; // Set parsing resource limits. // $env->setResourceLimits(); return $env; } private function normalizeTitleKey( string $title ): string { return $this->dummyEnv->normalizedTitleKey( $title, false, true ); } private function addArticle( Article $art ): array { $key = $this->normalizeTitleKey( $art->title ); $oldVal = $this->articles[$key] ?? null; $this->articles[$key] = $art->text; $teardown = [ function () use ( $key, $oldVal ) { $this->articles[$key] = $oldVal; }, $this->mockApi->addArticle( $key, $art ), ]; return $teardown; } /** * Parse the test file and set up articles and test cases * @param array $options */ private function buildTests( array $options ): void { // Startup by loading .txt test file $warnFunc = static function ( string $warnMsg ): void { error_log( $warnMsg ); }; $normFunc = function ( string $title ): string { return $this->normalizeTitleKey( $title ); }; $testReader = TestFileReader::read( $this->testFilePath, $warnFunc, $normFunc, $this->knownFailuresInfix ); $this->knownFailuresPath = $testReader->knownFailuresPath; $this->testCases = $testReader->testCases; $this->articles = []; foreach ( $testReader->articles as $art ) { $this->addArticle( $art ); } if ( !ScriptUtils::booleanOption( $options['quieter'] ?? '' ) ) { if ( $this->knownFailuresPath ) { error_log( 'Loaded known failures from ' . $this->knownFailuresPath ); } else { error_log( 'No known failures found.' ); } } } /** * Convert a wikitext string to an HTML Node * * @param Env $env * @param Test $test * @param string $mode * @param string $wikitext * @return Document */ private function convertWt2Html( Env $env, Test $test, string $mode, string $wikitext ): Document { // FIXME: Ugly! Maybe we should switch to using the entrypoint to // the library for parserTests instead of reusing the environment // and touching these internals. $content = $env->getPageConfig()->getRevisionContent(); // @phan-suppress-next-line PhanUndeclaredProperty $content->data['main']['content'] = $wikitext; $env->topFrame = new PageConfigFrame( $env, $env->getPageConfig(), $env->getSiteConfig() ); if ( $mode === 'html2html' ) { // Since this was set when serializing we need to setup a new doc $env->setupTopLevelDoc(); } $handler = $env->getContentHandler(); $extApi = new ParsoidExtensionAPI( $env ); $doc = $handler->toDOM( $extApi ); return $doc; } /** * Convert a DOM to Wikitext. * * @param Env $env * @param Test $test * @param string $mode * @param Document $doc * @return string */ private function convertHtml2Wt( Env $env, Test $test, string $mode, Document $doc ): string { $startsAtWikitext = $mode === 'wt2wt' || $mode === 'wt2html' || $mode === 'selser'; if ( $mode === 'selser' ) { $selserData = new SelectiveUpdateData( $test->wikitext, $test->cachedBODYstr ); } else { $selserData = null; } $env->topLevelDoc = $doc; $extApi = new ParsoidExtensionAPI( $env ); return $env->getContentHandler()->fromDOM( $extApi, $selserData ); } /** * Run test in the requested mode * @param Test $test * @param string $mode * @param array $options */ private function runTest( Test $test, string $mode, array $options ): void { $test->time = []; $testOpts = $test->options; // These changes are for environment options that change between runs of // different modes. See `processTest` for changes per test. // Page language matches "wiki language" (which is set by // the item 'language' option). // Variant conversion is disabled by default $this->envOptions['wtVariantLanguage'] = null; $this->envOptions['htmlVariantLanguage'] = null; // The test can explicitly opt-in to variant conversion with the // 'langconv' option. if ( $testOpts['langconv'] ?? null ) { // These test option names are deprecated: // (Note that test options names are lowercased by the reader.) if ( $testOpts['sourcevariant'] ?? false ) { $this->envOptions['wtVariantLanguage'] = Utils::mwCodeToBcp47( $testOpts['sourcevariant'], true, $this->siteConfig->getLogger() ); } if ( $testOpts['variant'] ?? false ) { $this->envOptions['htmlVariantLanguage'] = Utils::mwCodeToBcp47( $testOpts['variant'], true, $this->siteConfig->getLogger() ); } // Preferred option names, which are also specified in bcp-47 codes // (Note that test options names are lowercased by the reader.) if ( $testOpts['wtvariantlanguage'] ?? false ) { $this->envOptions['wtVariantLanguage'] = new Bcp47CodeValue( $testOpts['wtvariantlanguage'] ); } if ( $testOpts['htmlvariantlanguage'] ?? false ) { $this->envOptions['htmlVariantLanguage'] = new Bcp47CodeValue( $testOpts['htmlvariantlanguage'] ); } } $env = $this->newEnv( $test, $test->wikitext ?? '' ); // Some useful booleans $startsAtHtml = $mode === 'html2html' || $mode === 'html2wt'; $endsAtHtml = $mode === 'wt2html' || $mode === 'html2html'; $parsoidOnly = isset( $test->sections['html/parsoid'] ) || isset( $test->sections['html/parsoid+standalone'] ) || ( !empty( $testOpts['parsoid'] ) && !isset( $testOpts['parsoid']['normalizePhp'] ) ); $test->time['start'] = microtime( true ); $doc = null; $wt = null; if ( isset( $test->sections['html/parsoid+standalone'] ) ) { $test->parsoidHtml = $test->sections['html/parsoid+standalone']; } // Source preparation if ( $startsAtHtml ) { $html = $test->parsoidHtml ?? ''; if ( !$parsoidOnly ) { // Strip some php output that has no wikitext representation // (like .mw-editsection) and won't html2html roundtrip and // therefore causes false failures. $html = TestUtils::normalizePhpOutput( $html ); } $doc = ContentUtils::createDocument( $html ); $wt = $this->convertHtml2Wt( $env, $test, $mode, $doc ); } else { // startsAtWikitext // Always serialize DOM to string and reparse before passing to wt2wt if ( $test->cachedBODYstr === null ) { $doc = $this->convertWt2Html( $env, $test, $mode, $test->wikitext ); // Cache parsed HTML $test->cachedBODYstr = ContentUtils::toXML( DOMCompat::getBody( $doc ) ); // - In wt2html mode, pass through original DOM // so that it is serialized just once. // - In wt2wt and selser modes, pass through serialized and // reparsed DOM so that fostering/normalization effects // are reproduced. if ( $mode === 'wt2html' ) { // no-op } else { $doc = ContentUtils::createDocument( $test->cachedBODYstr ); } } else { $doc = ContentUtils::createDocument( $test->cachedBODYstr ); } } // Generate and make changes for the selser test mode $testManualChanges = $testOpts['parsoid']['changes'] ?? null; if ( $mode === 'selser' ) { if ( $testManualChanges && $test->changetree === [ 'manual' ] ) { $test->applyManualChanges( $doc ); } else { $changetree = isset( $options['changetree'] ) ? json_decode( $options['changetree'] ) : $test->changetree; if ( !$changetree ) { $changetree = $test->generateChanges( $doc ); } $dumpOpts = [ 'dom:post-changes' => $env->hasDumpFlag( 'dom:post-changes' ), 'logger' => $env->getSiteConfig()->getLogger() ]; $test->applyChanges( $dumpOpts, $doc, $changetree ); } // Save the modified DOM so we can re-test it later. // Always serialize to string and reparse before passing to selser/wt2wt. $test->changedHTMLStr = ContentUtils::toXML( DOMCompat::getBody( $doc ) ); $doc = ContentUtils::createDocument( $test->changedHTMLStr ); } elseif ( $mode === 'wt2wt' ) { // Handle a 'changes' option if present. if ( $testManualChanges ) { $test->applyManualChanges( $doc ); } } // Roundtrip stage if ( $mode === 'wt2wt' || $mode === 'selser' ) { $wt = $this->convertHtml2Wt( $env, $test, $mode, $doc ); } elseif ( $mode === 'html2html' ) { $doc = $this->convertWt2Html( $env, $test, $mode, $wt ); } // Result verification stage if ( $endsAtHtml ) { $this->processParsedHTML( $env, $test, $options, $mode, $doc ); } else { $this->processSerializedWT( $env, $test, $options, $mode, $wt ); } } /** * Process test options that impact output. * These are almost always only pertinent in wt2html test modes. * Returns: * - null if there are no applicable output options. * - true if the output matches expected output for the requested option(s). * - false otherwise * * @param Env $env * @param Test $test * @param array $options * @param string $mode * @param Document $doc * @param ?string $metadataExpected A metadata section from the test, * or null if none present. If a metadata section is not present, * the metadata output is added to $doc, otherwise it is returned * in $metadataActual * @param ?string &$metadataActual The "actual" metadata output for * this test. */ private function addParserOutputInfo( Env $env, Test $test, array $options, string $mode, Document $doc, ?string $metadataExpected, ?string &$metadataActual ): void { $output = $env->getMetadata(); $opts = $test->options; '@phan-var StubMetadataCollector $output'; // @var StubMetadataCollector $metadata // See ParserTestRunner::addParserOutputInfo() in core. $before = []; $after = []; // 'showtitle' not yet supported // unlike other link types, this dumps the 'sort' property as well if ( isset( $opts['cat'] ) ) { $defaultSortKey = $output->getPageProperty( 'defaultsort' ) ?? ''; foreach ( $output->getLinkList( StubMetadataCollector::LINKTYPE_CATEGORY ) as [ 'link' => $link, 'sort' => $sort ] ) { $sortkey = $sort ?: $defaultSortKey; $name = $link->getDBkey(); $after[] = "cat=$name sort=$sortkey"; } } if ( isset( $opts['extlinks'] ) ) { foreach ( $output->getExternalLinks() as $url => $ignore ) { $after[] = "extlink=$url"; } } // Unlike other link types, this is stored as text, not dbkey if ( isset( $opts['ill'] ) ) { foreach ( $output->getLinkList( StubMetadataCollector::LINKTYPE_LANGUAGE ) as [ 'link' => $ll ] ) { $after[] = "ill=" . Title::newFromLinkTarget( $ll, $this->siteConfig )->getFullText(); } } $linkoptions = [ [ 'iwl', 'iwl=', StubMetadataCollector::LINKTYPE_INTERWIKI ], [ 'links', 'link=', StubMetadataCollector::LINKTYPE_LOCAL ], [ 'special', 'special=', StubMetadataCollector::LINKTYPE_SPECIAL ], [ 'templates', 'template=', StubMetadataCollector::LINKTYPE_TEMPLATE ], ]; foreach ( $linkoptions as [ $optName, $prefix, $type ] ) { if ( isset( $opts[$optName] ) ) { foreach ( $output->getLinkList( $type ) as [ 'link' => $ll ] ) { $after[] = $prefix . Title::newFromLinkTarget( $ll, $this->siteConfig )->getPrefixedDBkey(); } } } if ( isset( $opts['extension'] ) ) { $extList = $opts['extension']; if ( !is_array( $extList ) ) { $extList = [ $extList ]; } foreach ( $extList as $ext ) { $after[] = "extension[$ext]=" . // XXX should use JsonCodec json_encode( $output->getExtensionData( $ext ), JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT ); } } if ( isset( $opts['property'] ) ) { $propList = $opts['property']; if ( !is_array( $propList ) ) { $propList = [ $propList ]; } foreach ( $propList as $prop ) { $after[] = "property[$prop]=" . ( $output->getPageProperty( $prop ) ?? '' ); } } if ( isset( $opts['showflags'] ) ) { $actualFlags = $output->getOutputFlags(); sort( $actualFlags ); $after[] = "flags=" . implode( ', ', $actualFlags ); } if ( isset( $opts['showtocdata'] ) ) { $tocData = $output->getTOCData(); if ( $tocData !== null ) { $after[] = $tocData->prettyPrint(); } } if ( isset( $opts['showindicators'] ) ) { foreach ( $output->getIndicators() as $name => $content ) { $after[] = "$name=$content"; } } if ( isset( $opts['showmedia'] ) ) { $images = array_map( fn ( $item ) => $item['link']->getDBkey(), $output->getLinkList( StubMetadataCollector::LINKTYPE_MEDIA ) ); $after[] = 'images=' . implode( ', ', $images ); } if ( $metadataExpected === null ) { // legacy format, add $before and $after to $doc $body = DOMCompat::getBody( $doc ); if ( count( $before ) ) { $before = $doc->createTextNode( implode( "\n", $before ) ); $body->insertBefore( $before, $body->firstChild ); } if ( count( $after ) ) { $after = $doc->createTextNode( implode( "\n", $after ) ); $body->appendChild( $after ); } } else { $metadataActual = implode( "\n", array_merge( $before, $after ) ); } } /** * Return the appropriate metadata section for this test, given that * we are running in parsoid "standalone" mode, or 'null' if none is * present. * @param Test $test * @return ?string The expected metadata for this test */ public static function getStandaloneMetadataSection( Test $test ): ?string { return // specific results for parsoid standalone mode $test->sections['metadata/parsoid+standalone'] ?? // specific results for parsoid $test->sections['metadata/parsoid'] ?? // generic for all parsers (even standalone) $test->sections['metadata'] ?? // missing (== use legacy combined output format) null; } /** * Check the given HTML result against the expected result, * and throw an exception if necessary. * * @param Env $env * @param Test $test * @param array $options * @param string $mode * @param Document $doc */ private function processParsedHTML( Env $env, Test $test, array $options, string $mode, Document $doc ): void { $modeObj = new TestMode( $mode ); $test->time['end'] = microtime( true ); $metadataExpected = self::getStandaloneMetadataSection( $test ); $metadataActual = null; if ( isset( $test->options['nohtml'] ) ) { $body = DOMCompat::getBody( $doc ); while ( $body->hasChildNodes() ) { $body->removeChild( $body->firstChild ); } } $this->addParserOutputInfo( $env, $test, $options, $mode, $doc, $metadataExpected, $metadataActual ); if ( $test->parsoidHtml !== null ) { $checkPassed = $this->checkHTML( $test, DOMCompat::getBody( $doc ), $options, $mode ); } else { // Running the test for metadata, presumably. $checkPassed = true; } // We could also check metadata in the html2html or wt2wt // modes, but (a) we'd need a separate key for known failures // to avoid overwriting the wt2html metadata results, and (b) // any failures would probably be redundant with html2wt // failures and not indicative of a "real" root cause bug. if ( $metadataExpected !== null && !$modeObj->isCachingMode() && $mode === 'wt2html' ) { $metadataResult = $this->checkMetadata( $test, $metadataExpected, $metadataActual ?? '', $options ); $checkPassed = $checkPassed && $metadataResult; } // Only throw an error if --exit-unexpected was set and there was an error // Otherwise, continue running tests if ( $options['exit-unexpected'] && !$checkPassed ) { throw new UnexpectedException; } } /** * Check the given wikitext result against the expected result, * and throw an exception if necessary. * * @param Env $env * @param Test $test * @param array $options * @param string $mode * @param string $wikitext */ private function processSerializedWT( Env $env, Test $test, array $options, string $mode, string $wikitext ): void { $test->time['end'] = microtime( true ); if ( $mode === 'selser' && $options['selser'] !== 'noauto' ) { if ( $test->changetree === [ 5 ] ) { $test->resultWT = $test->wikitext; } else { $doc = ContentUtils::createDocument( $test->changedHTMLStr ); $test->resultWT = $this->convertHtml2Wt( $env, $test, 'wt2wt', $doc ); } } $checkPassed = $this->checkWikitext( $test, $wikitext, $options, $mode ); // Only throw an error if --exit-unexpected was set and there was an error // Otherwise, continue running tests if ( $options['exit-unexpected'] && !$checkPassed ) { throw new UnexpectedException; } } private function checkHTML( Test $test, Element $out, array $options, string $mode ): bool { [ $normOut, $normExpected ] = $test->normalizeHTML( $out, $test->cachedNormalizedHTML ); $expected = [ 'normal' => $normExpected, 'raw' => $test->parsoidHtml ]; $actual = [ 'normal' => $normOut, 'raw' => ContentUtils::toXML( $out, [ 'innerXML' => true ] ), 'input' => ( $mode === 'html2html' ) ? $test->parsoidHtml : $test->wikitext ]; return $options['reportResult']( $this->stats, $test, $options, $mode, $expected, $actual ); } private function checkMetadata( Test $test, string $metadataExpected, string $metadataActual, array $options ): bool { $expected = [ 'normal' => $metadataExpected, 'raw' => $metadataExpected ]; $actual = [ 'normal' => $metadataActual, 'raw' => $metadataActual, 'input' => $test->wikitext, ]; $mode = 'metadata'; return $options['reportResult']( $this->stats, $test, $options, $mode, $expected, $actual ); } /** * Removes DSR from data-parsoid for test normalization of a complet document. If * data-parsoid gets subsequently empty, removes it too. * @param string $raw * @return string */ private function filterDsr( string $raw ): string { $doc = ContentUtils::createAndLoadDocument( $raw ); foreach ( $doc->childNodes as $child ) { if ( $child instanceof Element ) { $this->filterNodeDsr( $child ); } } DOMDataUtils::visitAndStoreDataAttribs( $doc ); $ret = ContentUtils::toXML( DOMCompat::getBody( $doc ), [ 'innerXML' => true ] ); $ret = preg_replace( '/\sdata-parsoid="{}"/', '', $ret ); return $ret; } /** * Removes DSR from data-parsoid for test normalization of an element. */ private function filterNodeDsr( Element $el ) { $dp = DOMDataUtils::getDataParsoid( $el ); unset( $dp->dsr ); foreach ( $el->childNodes as $child ) { if ( $child instanceof Element ) { $this->filterNodeDsr( $child ); } } } private function checkWikitext( Test $test, string $out, array $options, string $mode ): bool { if ( $mode === 'html2wt' ) { $input = $test->parsoidHtml; $testWikitext = $test->wikitext; } elseif ( $mode === 'wt2wt' ) { if ( isset( $test->options['parsoid']['changes'] ) ) { $input = $test->wikitext; $testWikitext = $test->sections['wikitext/edited']; } else { $input = $testWikitext = $test->wikitext; } } else { /* selser */ if ( $test->changetree === [ 5 ] ) { /* selser with oracle */ $input = $test->changedHTMLStr; $testWikitext = $test->wikitext; $out = preg_replace( '/<!--' . Test::STATIC_RANDOM_STRING . '-->/', '', $out ); } elseif ( $test->changetree === [ 'manual' ] && isset( $test->options['parsoid']['changes'] ) ) { /* manual changes */ $input = $test->wikitext; $testWikitext = $test->sections['wikitext/edited']; } else { /* automated selser changes, no oracle */ $input = $test->changedHTMLStr; $testWikitext = $test->resultWT; } } [ $normalizedOut, $normalizedExpected ] = $test->normalizeWT( $out, $testWikitext ); $expected = [ 'normal' => $normalizedExpected, 'raw' => $testWikitext ]; $actual = [ 'normal' => $normalizedOut, 'raw' => $out, 'input' => $input ]; return $options['reportResult']( $this->stats, $test, $options, $mode, $expected, $actual ); } private function updateKnownFailures( array $options ): array { // Check in case any tests were removed but we didn't update // the knownFailures $knownFailuresChanged = false; $allModes = $options['wt2html'] && $options['wt2wt'] && $options['html2wt'] && $options['html2html'] && isset( $options['selser'] ) && !( isset( $options['filter'] ) || isset( $options['regex'] ) || isset( $options['maxtests'] ) ); $offsetType = $options['offsetType'] ?? 'byte'; // Update knownFailures, if requested if ( $allModes || ScriptUtils::booleanOption( $options['updateKnownFailures'] ?? null ) ) { if ( $this->knownFailuresPath !== null ) { $old = file_get_contents( $this->knownFailuresPath ); } else { // If file doesn't exist, use the JSON representation of an // empty array, so it compares equal in the case that we // end up with an empty array of known failures below. $old = '{}'; } $testKnownFailures = []; $kfModes = array_merge( $options['modes'], [ 'metadata' ] ); foreach ( $kfModes as $mode ) { foreach ( $this->stats->modes[$mode]->failList as $fail ) { $testKnownFailures[$fail['testName']] ??= []; Assert::invariant( !isset( $testKnownFailures[$fail['testName']][$mode . $fail['suffix']] ), "Overwriting known failures result for " . $fail['testName'] . " " . $mode . $fail['suffix'] ); $testKnownFailures[$fail['testName']][$mode . $fail['suffix']] = $fail['raw']; } } // Sort, otherwise, titles get added above based on the first // failing mode, which can make diffs harder to verify when // failing modes change. ksort( $testKnownFailures ); $contents = json_encode( $testKnownFailures, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_FORCE_OBJECT | JSON_UNESCAPED_UNICODE ) . "\n"; if ( ScriptUtils::booleanOption( $options['updateKnownFailures'] ?? null ) ) { if ( $this->knownFailuresPath !== null ) { file_put_contents( $this->knownFailuresPath, $contents ); } else { // To be safe, we don't try to write a file that doesn't // (yet) exist. Create an empty file if you need to, and // then we'll happily update it for you. throw new \RuntimeException( "Known failures file for {$this->testFileName} does not exist, " . "and so won't be updated." ); } } elseif ( $allModes && $offsetType === 'byte' ) { $knownFailuresChanged = $contents !== $old; } } // Write updated tests from failed ones if ( ScriptUtils::booleanOption( $options['update-tests'] ?? null ) || ScriptUtils::booleanOption( $options['update-unexpected'] ?? null ) ) { $updateFormat = $options['update-format']; if ( $updateFormat !== 'raw' && $updateFormat !== 'actualNormalized' ) { $updateFormat = 'noDsr'; } $fileContent = file_get_contents( $this->testFilePath ); foreach ( [ 'wt2html', 'metadata' ] as $mode ) { foreach ( $this->stats->modes[$mode]->failList as $fail ) { if ( $options['update-tests'] || $fail['unexpected'] ) { $exp = '/(!!\s*test\s*' . preg_quote( $fail['testName'], '/' ) . '(?:(?!!!\s*end)[\s\S])*' . ')(' . preg_quote( $fail['expected'], '/' ) . ')/m'; $fail['noDsr'] = $fail['raw']; if ( $updateFormat === 'noDsr' && $mode !== 'metadata' ) { $fail['noDsr'] = $this->filterDsr( $fail['noDsr'] ); } $fileContent = preg_replace_callback( $exp, static function ( array $matches ) use ( $fail, $updateFormat ) { return $matches[1] . $fail[$updateFormat]; }, $fileContent ); } } } file_put_contents( $this->testFilePath, $fileContent ); } // print out the summary $options['reportSummary']( $options['modes'], $this->stats, $this->testFileName, $this->testFilter, $knownFailuresChanged, $options ); // we're done! // exit status 1 == uncaught exception $failures = $this->stats->allFailures(); $exitCode = ( $failures > 0 || $knownFailuresChanged ) ? 2 : 0; if ( ScriptUtils::booleanOption( $options['exit-zero'] ?? null ) ) { $exitCode = 0; } return [ 'exitCode' => $exitCode, 'stats' => $this->stats, 'file' => $this->testFileName, 'knownFailuresChanged' => $knownFailuresChanged ]; } /** * Run the test in all requested modes. * * @param Test $test * @param array $options */ private function processTest( Test $test, array $options ): void { if ( !$test->options ) { $test->options = []; } $testOpts = $test->options; // ensure that test is not skipped if it has a wikitext/edited or // html/parsoid+langconv section (but not a parsoid html section) $haveHtml = ( $test->parsoidHtml !== null ) || isset( $test->sections['wikitext/edited'] ) || isset( $test->sections['html/parsoid+standalone'] ) || isset( $test->sections['html/parsoid+langconv'] ) || self::getStandaloneMetadataSection( $test ) !== null; $hasHtmlParsoid = isset( $test->sections['html/parsoid'] ) || isset( $test->sections['html/parsoid+standalone'] ); // Skip test whose title does not match --filter // or which is disabled or php-only if ( $test->wikitext === null || !$haveHtml || ( isset( $testOpts['disabled'] ) && !$this->runDisabled ) || ( isset( $testOpts['php'] ) && !( $hasHtmlParsoid || $this->runPHP ) ) || !$test->matchesFilter( $this->testFilter ) ) { return; } $suppressErrors = !empty( $testOpts['parsoid']['suppressErrors'] ); $this->siteConfig->setLogger( $suppressErrors ? $this->siteConfig->suppressLogger : $this->defaultLogger ); $targetModes = $test->computeTestModes( $options['modes'] ); // Filter out html2* tests if we don't have an HTML section // (Most likely there's either a metadata section or a html/php // section but not html/parsoid section.) if ( $test->parsoidHtml === null && !isset( $test->sections['html/parsoid+standalone'] ) ) { $targetModes = array_diff( $targetModes, [ 'html2wt', 'html2html' ] ); } if ( !count( $targetModes ) ) { return; } // Honor language option $prefix = $testOpts['language'] ?? 'enwiki'; if ( !str_contains( $prefix, 'wiki' ) ) { // Convert to our enwiki.. format $prefix .= 'wiki'; } // Switch to requested wiki $this->mockApi->setApiPrefix( $prefix ); $this->siteConfig->reset(); // Add the title associated with the current test as a known title to // be consistent with the test runner in the core repo. $teardown = $this->addArticle( new Article( [ 'title' => $test->pageName(), 'text' => $test->wikitext ?? '', // Fake it 'type' => 'article', 'filename' => 'fake', 'lineNumStart' => 0, 'lineNumEnd' => 0, ] ) ); // We don't do any sanity checking or type casting on $test->config // values here: if you set a bogus value in a parser test it *should* // blow things up, so that you fix your test case. // Update $wgInterwikiMagic flag // default (undefined) setting is true $this->siteConfig->setInterwikiMagic( $test->config['wgInterwikiMagic'] ?? true ); // Update $wgEnableMagicLinks flag // default (undefined) setting is true for all types foreach ( [ "RFC", "ISBN", "PMID" ] as $v ) { $this->siteConfig->setMagicLinkEnabled( $v, ( $test->config['wgEnableMagicLinks'] ?? [] )[$v] ?? true ); } if ( isset( $testOpts['pmid-interwiki'] ) ) { $this->siteConfig->setupInterwikiMap( array_merge( self::PARSER_TESTS_IWPS, [ // Added to support T145590#8608455 [ 'prefix' => 'pmid', 'local' => true, 'url' => '//www.ncbi.nlm.nih.gov/pubmed/$1?dopt=Abstract', ] ] ) ); $teardown[] = fn () => $this->siteConfig->setupInterwikiMap( self::PARSER_TESTS_IWPS ); } // FIXME: Cite-specific hack $this->siteConfig->responsiveReferences = [ 'enabled' => $test->config['wgCiteResponsiveReferences'] ?? $this->siteConfig->responsiveReferences['enabled'], 'threshold' => $test->config['wgCiteResponsiveReferencesThreshold'] ?? $this->siteConfig->responsiveReferences['threshold'], ]; if ( isset( $test->config['wgNoFollowLinks'] ) ) { $this->siteConfig->setNoFollowConfig( 'nofollow', $test->config['wgNoFollowLinks'] ); } if ( isset( $test->config['wgNoFollowDomainExceptions'] ) ) { $this->siteConfig->setNoFollowConfig( 'domainexceptions', $test->config['wgNoFollowDomainExceptions'] ); } // FIXME: Redundant with $testOpts['externallinktarget'] below if ( isset( $test->config['wgExternalLinkTarget'] ) ) { $this->siteConfig->setExternalLinkTarget( $test->config['wgExternalLinkTarget'] ); } // Process test-specific options if ( $testOpts ) { Assert::invariant( !isset( $testOpts['extensions'] ), 'Cannot configure extensions in tests' ); $availableParsoidTestOpts = [ 'wrapSections' ]; foreach ( $availableParsoidTestOpts as $opt ) { if ( isset( $testOpts['parsoid'][$opt] ) ) { $this->envOptions[$opt] = $testOpts['parsoid'][$opt]; } } $this->siteConfig->disableSubpagesForNS( 0 ); if ( isset( $testOpts['subpage'] ) ) { $this->siteConfig->enableSubpagesForNS( 0 ); } $allowedPrefixes = [ '' ]; // all allowed if ( isset( $testOpts['wgallowexternalimages'] ) && !preg_match( '/^(1|true|)$/D', $testOpts['wgallowexternalimages'] ) ) { $allowedPrefixes = []; } $this->siteConfig->allowedExternalImagePrefixes = $allowedPrefixes; // Emulate PHP parser's tag hook to tunnel content past the sanitizer if ( isset( $testOpts['styletag'] ) ) { $this->siteConfig->registerParserTestExtension( new StyleTag() ); } if ( ( $testOpts['wgrawhtml'] ?? null ) === '1' ) { $this->siteConfig->registerParserTestExtension( new RawHTML() ); } if ( isset( $testOpts['thumbsize'] ) ) { $this->siteConfig->thumbsize = (int)$testOpts['thumbsize']; } if ( isset( $testOpts['annotations'] ) ) { $this->siteConfig->registerParserTestExtension( new DummyAnnotation() ); } if ( isset( $testOpts['i18next'] ) ) { $this->siteConfig->registerParserTestExtension( new I18nTag() ); } if ( isset( $testOpts['externallinktarget'] ) ) { $this->siteConfig->setExternalLinkTarget( $testOpts['externallinktarget'] ); } } // Ensure ParserHook is always registered! $this->siteConfig->registerParserTestExtension( new ParserHook() ); $runner = $this; $test->testAllModes( $targetModes, $options, Closure::fromCallable( [ $this, 'runTest' ] ) ); foreach ( $teardown as $t ) { $t(); } } /** * Run parser tests for the file with the provided options * * @param array $options * @return array */ public function run( array $options ): array { $this->runDisabled = ScriptUtils::booleanOption( $options['run-disabled'] ?? null ); $this->runPHP = ScriptUtils::booleanOption( $options['run-php'] ?? null ); $this->offsetType = $options['offsetType'] ?? 'byte'; // Test case filtering $this->testFilter = null; if ( isset( $options['filter'] ) || isset( $options['regex'] ) ) { $this->testFilter = [ 'raw' => $options['regex'] ?? $options['filter'], 'regex' => isset( $options['regex'] ), 'string' => isset( $options['filter'] ) ]; } $this->buildTests( $options ); // Trim test cases to the desired amount if ( isset( $options['maxtests'] ) ) { $n = $options['maxtests']; if ( $n > 0 ) { $this->testCases = array_slice( $this->testCases, 0, $n ); } } $defaultOpts = [ 'wrapSections' => false, 'nativeTemplateExpansion' => true, 'offsetType' => $this->offsetType, ]; ScriptUtils::setDebuggingFlags( $defaultOpts, $options ); ScriptUtils::setTemplatingAndProcessingFlags( $defaultOpts, $options ); if ( ScriptUtils::booleanOption( $options['quiet'] ?? null ) || ScriptUtils::booleanOption( $options['quieter'] ?? null ) ) { $defaultOpts['logLevels'] = [ 'fatal', 'error' ]; } // Save default logger so we can be reset it after temporarily // switching to the suppressLogger to suppress expected error messages. $this->defaultLogger = $this->siteConfig->getLogger(); /** * PORT-FIXME(T238722) * // Enable sampling to assert it's working while testing. * $parsoidConfig->loggerSampling = [ [ '/^warn(\/|$)/', 100 ] ]; * * // Override env's `setLogger` to record if we see `fatal` or `error` * // while running parser tests. (Keep it clean, folks! Use * // "suppressError" option on the test if error is expected.) * $env->setLogger = ( ( function ( $parserTests, $superSetLogger ) { * return function ( $_logger ) use ( &$parserTests ) { * call_user_func( 'superSetLogger', $_logger ); * $this->log = function ( $level ) use ( &$_logger, &$parserTests ) { * if ( $_logger !== $parserTests->suppressLogger && * preg_match( '/^(fatal|error)\b/', $level ) * ) { * $parserTests->stats->loggedErrorCount++; * } * return call_user_func_array( [ $_logger, 'log' ], $arguments ); * }; * }; * } ) ); */ $options['reportStart'](); // Run tests foreach ( $this->testCases as $test ) { try { $this->envOptions = $defaultOpts; $this->processTest( $test, $options ); } catch ( UnexpectedException $e ) { // Exit unexpected break; } } // Update knownFailures return $this->updateKnownFailures( $options ); } } PK ! ���� � I18nTag.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\DOM\DocumentFragment; use Wikimedia\Parsoid\Ext\ExtensionModule; use Wikimedia\Parsoid\Ext\ExtensionTagHandler; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; class I18nTag extends ExtensionTagHandler implements ExtensionModule { /** @inheritDoc */ public function sourceToDom( ParsoidExtensionAPI $extApi, string $content, array $args ): DocumentFragment { $tag = $extApi->extTag; if ( $tag->getName() === 'i18ntag' ) { return $extApi->createPageContentI18nFragment( $content, null ); } else { $frag = $extApi->getTopLevelDoc()->createDocumentFragment(); $span = $extApi->getTopLevelDoc()->createElement( 'span' ); $frag->appendChild( $span ); $span->appendChild( $extApi->getTopLevelDoc()->createTextNode( $content ) ); $extApi->addInterfaceI18nAttribute( $span, 'message', $args[0]->v, null ); return $frag; } } /** @inheritDoc */ public function getConfig(): array { return [ 'name' => 'I18nTag', 'tags' => [ [ 'name' => 'i18ntag', 'handler' => self::class ], [ 'name' => 'i18nattr', 'handler' => self::class ], ], ]; } } PK ! V��y� � StyleTag.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\Core\Sanitizer; use Wikimedia\Parsoid\DOM\DocumentFragment; use Wikimedia\Parsoid\Ext\ExtensionModule; use Wikimedia\Parsoid\Ext\ExtensionTagHandler; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; use Wikimedia\Parsoid\Utils\DOMCompat; class StyleTag extends ExtensionTagHandler implements ExtensionModule { /** @inheritDoc */ public function sourceToDom( ParsoidExtensionAPI $extApi, string $content, array $args ): DocumentFragment { $domFragment = $extApi->htmlToDom( '' ); $style = $domFragment->ownerDocument->createElement( 'style' ); DOMCompat::setInnerHTML( $style, $content ); Sanitizer::applySanitizedArgs( $extApi->getSiteConfig(), $style, $args ); $domFragment->appendChild( $style ); return $domFragment; } /** @inheritDoc */ public function getConfig(): array { return [ 'name' => 'StyleTag', 'tags' => [ [ 'name' => 'style', 'handler' => self::class ], ], ]; } } PK ! ��g�0@ 0@ TestUtils.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Error; use Exception; use Wikimedia\Parsoid\DOM\Comment; use Wikimedia\Parsoid\DOM\Element; use Wikimedia\Parsoid\DOM\Node; use Wikimedia\Parsoid\DOM\Text; use Wikimedia\Parsoid\Html2Wt\DOMNormalizer; use Wikimedia\Parsoid\Html2Wt\SerializerState; use Wikimedia\Parsoid\Html2Wt\WikitextSerializer; use Wikimedia\Parsoid\Mocks\MockEnv; use Wikimedia\Parsoid\Utils\ContentUtils; use Wikimedia\Parsoid\Utils\DOMCompat; use Wikimedia\Parsoid\Utils\DOMDataUtils; use Wikimedia\Parsoid\Utils\DOMUtils; use Wikimedia\Parsoid\Utils\Utils; use Wikimedia\Parsoid\Utils\WTUtils; /** * This class contains helper functions which should not be directly used * outside of Parsoid. * * Per T332457, most of the code in Wikimedia\Parsoid\ParserTests is * "for use in parser test runners only", including the core parser * test runner, but this file is "more internal" than that: core's * parser test runner should not use these helpers directly. * * @internal */ class TestUtils { /** @var mixed */ private static $consoleColor; /** * Little helper function for encoding XML entities. * * @param string $str * @return string */ public static function encodeXml( string $str ): string { // PORT-FIXME: Find replacement // return entities::encodeXML( $str ); return $str; } /** * Strip the actual about id from the string * @param string $str * @return string */ public static function normalizeAbout( string $str ): string { return preg_replace( "/(about=\\\\?[\"']#mwt)\d+/", '$1', $str ); } /** * Specialized normalization of the PHP parser & Parsoid output, to ignore * a few known-ok differences in parser test runs. * * This code is also used by the Parsoid round-trip testing code. * * If parsoidOnly is true-ish, we allow more markup through (like property * and typeof attributes), for better checking of parsoid-only test cases. * * @param Element|string $domBody * @param array $options * - parsoidOnly (bool) Is this test Parsoid Only? Optional. Default: false * - preserveIEW (bool) Should inter-element WS be preserved? Optional. Default: false * - hackyNormalize (bool) Apply the normalizer to the html. Optional. Default: false * @return string */ public static function normalizeOut( $domBody, array $options = [] ): string { $parsoidOnly = !empty( $options['parsoidOnly'] ); $preserveIEW = !empty( $options['preserveIEW'] ); if ( !empty( $options['hackyNormalize'] ) ) { // Mock env obj // // FIXME: This is ugly. // (a) The normalizer shouldn't need the full env. // Pass options and a logger instead? // (b) DOM diff code is using page-id for some reason. // That feels like a carryover of 2013 era code. // If possible, get rid of it and diff-mark dependency // on the env object. $mockEnv = new MockEnv( [] ); $mockSerializer = new WikitextSerializer( $mockEnv, [] ); $mockState = new SerializerState( $mockSerializer, [ 'selserMode' => false ] ); if ( is_string( $domBody ) ) { // Careful about the lifetime of this document $doc = ContentUtils::createDocument( $domBody ); $domBody = DOMCompat::getBody( $doc ); } DOMDataUtils::visitAndLoadDataAttribs( $domBody, [ 'markNew' => true ] ); ( new DOMNormalizer( $mockState ) )->normalize( $domBody ); DOMDataUtils::visitAndStoreDataAttribs( $domBody ); } elseif ( is_string( $domBody ) ) { $domBody = DOMCompat::getBody( DOMUtils::parseHTML( $domBody ) ); } $stripTypeof = $parsoidOnly ? '/^mw:Placeholder$/' : '/^mw:(?:DisplaySpace|Placeholder|Nowiki|Transclusion|Entity)$/'; $domBody = self::unwrapSpansAndNormalizeIEW( $domBody, $stripTypeof, $parsoidOnly, $preserveIEW ); $out = ContentUtils::toXML( $domBody, [ 'innerXML' => true ] ); // NOTE that we use a slightly restricted regexp for "attribute" // which works for the output of DOM serialization. For example, // we know that attribute values will be surrounded with double quotes, // not unquoted or quoted with single quotes. The serialization // algorithm is given by: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#serializing-html-fragments if ( !preg_match( '#[^<]*(<\w+(\s+[^\0-\cZ\s"\'>/=]+(="[^"]*")?)*/?>[^<]*)*#u', $out ) ) { throw new Error( 'normalizeOut input is not in standard serialized form' ); } // Eliminate a source of indeterminacy from leaked strip markers $out = preg_replace( '/UNIQ-.*?-QINU/u', '', $out ); // Normalize COINS ids -- they aren't stable $out = preg_replace( '/\s?id=[\'"]coins_\d+[\'"]/iu', '', $out ); // maplink extension $out = preg_replace( '/\s?data-overlays=\'[^\']*\'/u', '', $out ); // unnecessary attributes, we don't need to check these. $unnecessaryAttribs = 'data-parsoid|prefix|about|rev|datatype|inlist|usemap|vocab'; if ( $parsoidOnly ) { $unnecessaryAttribs = "/ ($unnecessaryAttribs)="; $out = preg_replace( $unnecessaryAttribs . '\\\\?"[^\"]*\\\\?"/u', '', $out ); $out = preg_replace( $unnecessaryAttribs . "\\\\?'[^\']*\\\\?'/u", '', $out ); // single-quoted variant $out = preg_replace( $unnecessaryAttribs . ''.*?'/u', '', $out ); // apos variant if ( !$options['externallinktarget'] ) { $out = preg_replace( '/ nofollow/', '', $out ); $out = str_replace( ' rel="nofollow"', '', $out ); $out = preg_replace( '/ noreferrer noopener/', '', $out ); } // strip self-closed <nowiki /> because we frequently test WTS // <nowiki> insertion by providing an html/parsoid section with the // <meta> tags stripped out, allowing the html2wt test to verify that // the <nowiki> is correctly added during WTS, while still allowing // the html2html and wt2html versions of the test to pass as a // validity check. If <meta>s were not stripped, these tests would all // have to be modified and split up. Not worth it at this time. // (see commit 689b22431ad690302420d049b10e689de6b7d426) $out = preg_replace( '#<span typeof="mw:Nowiki"></span>#', '', $out ); return $out; } // Normalize headings by stripping out Parsoid-added ids so that we don't // have to add these ids to every parser test that uses headings. // We will test the id generation scheme separately via mocha tests. $out = preg_replace( '/(<h[1-6].*?) id="[^\"]*"([^>]*>)/u', '$1$2', $out ); // strip meta/link elements $out = preg_replace( '#</?(?:meta|link)(?: [^\0-\cZ\s"\'>/=]+(?:=(?:"[^"]*"|\'[^\']*\'))?)*/?>#u', '', $out ); // Ignore troublesome attributes. // In addition to attributes listed above, strip other Parsoid-inserted attributes // since these won't be present in legacay parser output. $attribTroubleRE = "/ ($unnecessaryAttribs|data-mw|resource|rel|property|class)=\\\\?"; $out = preg_replace( $attribTroubleRE . '"[^"]*\\\\?"/u', '', $out ); $out = preg_replace( $attribTroubleRE . "'[^']*\\\\?'/u", '', $out ); // single-quoted variant // strip typeof last $out = preg_replace( '/ typeof="[^\"]*"/u', '', $out ); $out = self::stripParsoidIds( $out ); $out = preg_replace( '/<span[^>]+about="[^"]*"[^>]*>/u', '', $out ); $out = preg_replace( '#(\s)<span>\s*</span>\s*#u', '$1', $out ); $out = preg_replace( '#<span>\s*</span>#u', '', $out ); $out = preg_replace( '#(href=")(?:\.?\./)+#u', '$1', $out ); // replace unnecessary URL escaping $out = preg_replace_callback( '/ href="[^"]*"/u', static function ( $m ) { return Utils::decodeURI( $m[0] ); }, $out ); // strip thumbnail size prefixes return preg_replace( '#(src="[^"]*?)/thumb(/[0-9a-f]/[0-9a-f]{2}/[^/]+)/[0-9]+px-[^"/]+(?=")#u', '$1$2', $out ); } /** * Strip Parsoid ID attributes (id="mwXX", used to associate NodeData) from an HTML string * @param string $s * @return string */ public static function stripParsoidIds( string $s ): string { return preg_replace( '/ id="mw([-\w]{2,})"/u', '', $s ); } private static function cleanSpans( Node $node, ?string $stripSpanTypeof ): void { if ( !$stripSpanTypeof ) { return; } $child = null; $next = null; for ( $child = $node->firstChild; $child; $child = $next ) { $next = $child->nextSibling; if ( $child instanceof Element && DOMCompat::nodeName( $child ) === 'span' && preg_match( $stripSpanTypeof, DOMCompat::getAttribute( $child, 'typeof' ) ?? '' ) ) { self::unwrapSpan( $node, $child, $stripSpanTypeof ); } } } private static function unwrapSpan( Node $parent, Node $node, ?string $stripSpanTypeof ): void { // first recurse to unwrap any spans in the immediate children. self::cleanSpans( $node, $stripSpanTypeof ); // now unwrap this span. DOMUtils::migrateChildren( $node, $parent, $node ); $parent->removeChild( $node ); } private static function newlineAround( ?Node $node ): bool { return $node && preg_match( '/^(body|caption|div|dd|dt|li|p|table|tr|td|th|tbody|dl|ol|ul|h[1-6])$/D', DOMCompat::nodeName( $node ) ); } private static function normalizeIEWVisitor( Node $node, array $opts ): Node { $child = null; $next = null; $prev = null; if ( DOMCompat::nodeName( $node ) === 'pre' ) { // Preserve newlines in <pre> tags $opts['inPRE'] = true; } if ( !$opts['preserveIEW'] && $node instanceof Text ) { if ( !$opts['inPRE'] ) { $node->data = preg_replace( '/\s+/u', ' ', $node->data ); } if ( $opts['stripLeadingWS'] ) { $node->data = preg_replace( '/^\s+/u', '', $node->data, 1 ); } if ( $opts['stripTrailingWS'] ) { $node->data = preg_replace( '/\s+$/uD', '', $node->data, 1 ); } } // unwrap certain SPAN nodes self::cleanSpans( $node, $opts['stripSpanTypeof'] ); // now remove comment nodes if ( !$opts['parsoidOnly'] ) { for ( $child = $node->firstChild; $child; $child = $next ) { $next = $child->nextSibling; if ( $child instanceof Comment ) { $node->removeChild( $child ); } } } // reassemble text nodes split by a comment or span, if necessary if ( $node instanceof Element ) { DOMCompat::normalize( $node ); } // now recurse. if ( DOMCompat::nodeName( $node ) === 'pre' ) { // hack, since PHP adds a newline before </pre> $opts['stripLeadingWS'] = false; $opts['stripTrailingWS'] = true; } elseif ( DOMCompat::nodeName( $node ) === 'span' && DOMUtils::matchTypeOf( $node, '/^mw:/' ) ) { // SPAN is transparent; pass the strip parameters down to kids } else { $opts['stripLeadingWS'] = $opts['stripTrailingWS'] = self::newlineAround( $node ); } $child = $node->firstChild; // Skip over the empty mw:FallbackId <span> and strip leading WS // on the other side of it. if ( $child && DOMUtils::isHeading( $node ) && WTUtils::isFallbackIdSpan( $child ) ) { $child = $child->nextSibling; } for ( ; $child; $child = $next ) { $next = $child->nextSibling; $newOpts = $opts; $newOpts['stripTrailingWS'] = $opts['stripTrailingWS'] && !$child->nextSibling; self::normalizeIEWVisitor( $child, $newOpts ); $opts['stripLeadingWS'] = false; } if ( $opts['inPRE'] || $opts['preserveIEW'] ) { return $node; } // now add newlines around appropriate nodes. for ( $child = $node->firstChild; $child; $child = $next ) { $prev = $child->previousSibling; $next = $child->nextSibling; if ( self::newlineAround( $child ) ) { if ( $prev instanceof Text ) { $prev->data = preg_replace( '/\s*$/uD', "\n", $prev->data, 1 ); } else { $prev = $node->ownerDocument->createTextNode( "\n" ); $node->insertBefore( $prev, $child ); } if ( $next instanceof Text ) { $next->data = preg_replace( '/^\s*/u', "\n", $next->data, 1 ); } else { $next = $node->ownerDocument->createTextNode( "\n" ); $node->insertBefore( $next, $child->nextSibling ); } } } return $node; } /** * Normalize newlines in IEW to spaces instead. * * @param Element $body The document body node to normalize. * @param ?string $stripSpanTypeof Regular expression to strip typeof attributes * @param bool $parsoidOnly * @param bool $preserveIEW * @return Element */ public static function unwrapSpansAndNormalizeIEW( Element $body, ?string $stripSpanTypeof = null, bool $parsoidOnly = false, bool $preserveIEW = false ): Element { $opts = [ 'preserveIEW' => $preserveIEW, 'parsoidOnly' => $parsoidOnly, 'stripSpanTypeof' => $stripSpanTypeof, 'stripLeadingWS' => true, 'stripTrailingWS' => true, 'inPRE' => false ]; // clone body first, since we're going to destructively mutate it. // @phan-suppress-next-line PhanTypeMismatchReturnSuperType return self::normalizeIEWVisitor( $body->cloneNode( true ), $opts ); } /** * Strip some php output we aren't generating. * * @param string $html * @return string */ public static function normalizePhpOutput( string $html ): string { return preg_replace( // do not expect section editing for now '/<span[^>]+class="mw-headline"[^>]*>(.*?)<\/span> ' . '*(<span class="mw-editsection"><span class="mw-editsection-bracket">' . '\[<\/span>.*?<span class="mw-editsection-bracket">\]<\/span><\/span>)?/u', '$1', $html ); } /** * Normalize the expected parser output by parsing it using a HTML5 parser and * re-serializing it to HTML. Ideally, the parser would normalize inter-tag * whitespace for us. For now, we fake that by simply stripping all newlines. * * @param string $source * @return string */ public static function normalizeHTML( string $source ): string { try { $body = self::unwrapSpansAndNormalizeIEW( DOMCompat::getBody( DOMUtils::parseHTML( $source ) ) ); $html = ContentUtils::toXML( $body, [ 'innerXML' => true ] ); // a few things we ignore for now.. // .replace(/\/wiki\/Main_Page/g, 'Main Page') // do not expect a toc for now $html = preg_replace( '/<div[^>]+?id="toc"[^>]*>\s*<div id="toctitle"[^>]*>[\s\S]+?<\/div>[\s\S]+?<\/div>\s*/u', '', $html ); $html = self::normalizePhpOutput( $html ); // remove empty span tags $html = preg_replace( '/(\s)<span>\s*<\/span>\s*/u', '$1', $html ); $html = preg_replace( '/<span>\s*<\/span>/u', '', $html ); // general class and titles, typically on links $html = preg_replace( '/ (class|rel|about|typeof)="[^"]*"/', '', $html ); // strip red link markup, we do not check if a page exists yet $html = preg_replace( "#/index.php\\?title=([^']+?)&action=edit&redlink=1#", '/wiki/$1', $html ); // strip red link title info $html = preg_replace( "/ \\((?:page does not exist|encara no existeix|bet ele jaratılmaǵan|lonkásá ezalí tɛ̂)\\)/", '', $html ); // the expected html has some extra space in tags, strip it $html = preg_replace( '/<a +href/', '<a href', $html ); $html = preg_replace( '#href="/wiki/#', 'href="', $html ); $html = preg_replace( '/" +>/', '">', $html ); // parsoid always add a page name to lonely fragments $html = preg_replace( '/href="#/', 'href="Main Page#', $html ); // replace unnecessary URL escaping $html = preg_replace_callback( '/ href="[^"]*"/', static function ( $m ) { return Utils::decodeURI( $m[0] ); }, $html ); // strip empty spans $html = preg_replace( '#(\s)<span>\s*</span>\s*#u', '$1', $html ); return preg_replace( '#<span>\s*</span>#u', '', $html ); } catch ( Exception $e ) { error_log( 'normalizeHTML failed on' . $source . ' with the following error: ' . $e ); return $source; } } /** * @param string $string * @param string $color * @param bool $inverse * @return string * @suppress PhanUndeclaredClassMethod * @suppress UnusedSuppression */ public static function colorString( string $string, string $color, bool $inverse = false ): string { if ( $inverse ) { $color = [ $color, 'reverse' ]; } if ( !self::$consoleColor ) { // Attempt to instantiate this class to determine if the // (optional) php-console-color library is installed. try { self::$consoleColor = new \PHP_Parallel_Lint\PhpConsoleColor\ConsoleColor(); } catch ( Error $e ) { /* fall back to no-color mode */ } } if ( self::$consoleColor && self::$consoleColor->isSupported() ) { return self::$consoleColor->apply( $color, $string ); } else { return $string; } } } PK ! �" Item.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; class Item { /** @var string The type of this item. */ public $type; /** @var string The filename containing this item. */ public $filename; /** @var int The line number of the start of this item. */ public $lineNumStart; /** @var int The line number of the end of this item. */ public $lineNumEnd; /** @var ?string An optional comment describing this item. */ public $comment; /** * @param array $props Common item properties, including type. * @param ?string $comment Optional comment describing the item */ public function __construct( array $props, ?string $comment = null ) { $this->type = $props['type']; $this->filename = $props['filename']; $this->lineNumStart = $props['lineNumStart']; $this->lineNumEnd = $props['lineNumEnd']; $this->comment = $comment ?: null; } /** * Return a friendly error message related to this item. * @param string $desc The error description. * @param ?string $text Optional additional context. * @return string The error message string, including the line number and * filename of this item. */ public function errorMsg( string $desc, ?string $text = null ): string { $start = $this->lineNumStart; $end = $this->lineNumEnd; $lineDesc = $end > $start ? "lines $start-$end" : "line $start"; $fileDesc = $this->filename; // trim path in future? $extraText = $text ? ": $text" : ""; return "$desc on $lineDesc of $fileDesc$extraText"; } /** * Throw an error related to this item. * @param string $desc The error description. * @param ?string $text Optional additional context. * @throws \Error * @return never */ public function error( string $desc, ?string $text = null ) { throw new \Error( $this->errorMsg( $desc, $text ) ); } } PK ! Fz��Lu Lu Test.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Error; use Psr\Log\LogLevel; use Wikimedia\Alea\Alea; use Wikimedia\Assert\Assert; use Wikimedia\Parsoid\DOM\Document; use Wikimedia\Parsoid\DOM\Element; use Wikimedia\Parsoid\DOM\Node; use Wikimedia\Parsoid\Utils\ContentUtils; use Wikimedia\Parsoid\Utils\DOMCompat; use Wikimedia\Parsoid\Utils\DOMUtils; use Wikimedia\Parsoid\Utils\PHPUtils; use Wikimedia\Parsoid\Utils\Utils; use Wikimedia\Parsoid\Utils\WTUtils; /** * Represents a parser test */ class Test extends Item { // 'testAllModes' and 'TestRunner::runTest' assume that test modes are added // in this order for caching to work properly (and even when test objects are cloned). // This ordering is enforced in computeTestModes. public const ALL_TEST_MODES = [ 'wt2html', 'wt2wt', 'html2html', 'html2wt', 'selser' ]; /* --- These are test properties from the test file --- */ /** @var ?string This is the test name, not page title for the test */ public $testName = null; /** @var array */ public $options = []; /** @var array */ public $config = []; /** @var array */ public $sections = []; /** @var array Known failures for this test, indexed by testing mode. */ public $knownFailures = []; /* --- These next are computed based on an ordered list of preferred * section keys --- */ /** @var ?string */ public $wikitext = null; /** @var ?string */ public $parsoidHtml = null; /** @var ?string */ public $legacyHtml = null; /* --- The rest below are computed by Parsoid while running tests -- */ /** @var string */ private $pageName; /** @var int */ private $pageNs; /** @var array */ public $selserChangeTrees = []; /** @var ?array */ public $changetree = null; /** @var bool */ public $duplicateChange = false; /** @var ?string */ public $seed = null; /** @var ?string */ public $resultWT = null; /** @var ?bool */ public $wt2wtPassed = null; /** @var ?string */ public $wt2wtResult = null; /** @var ?string */ public $selser = null; /** @var ?string */ public $changedHTMLStr = null; /** @var ?string */ public $cachedBODYstr = null; /** @var ?string */ public $cachedWTstr = null; /** @var ?string */ public $cachedNormalizedHTML = null; /** @var array */ public $time = []; private const DIRECT_KEYS = [ 'type', 'filename', 'lineNumStart', 'lineNumEnd', 'testName', 'options', 'config', ]; private const WIKITEXT_KEYS = [ 'wikitext', # deprecated 'input', ]; private const LEGACY_HTML_KEYS = [ 'html/php', 'html/*', 'html', # deprecated 'result', 'html/php+tidy', 'html/*+tidy', 'html+tidy', ]; private const PARSOID_HTML_KEYS = [ 'html/parsoid', 'html/*', 'html', # deprecated 'result', 'html/*+tidy', 'html+tidy', ]; private const WARN_DEPRECATED_KEYS = [ 'input', 'result', 'html/php+tidy', 'html/*+tidy', 'html+tidy', 'html/php+untidy', 'html+untidy', ]; /** * @param array $testProperties key-value mapping of properties * @param array $knownFailures Known failures for this test, indexed by testing mode * @param ?string $comment Optional comment describing the test * @param ?callable $warnFunc Optional callback used to emit * deprecation warnings. */ public function __construct( array $testProperties, array $knownFailures = [], ?string $comment = null, ?callable $warnFunc = null ) { parent::__construct( $testProperties, $comment ); $this->knownFailures = $knownFailures; foreach ( $testProperties as $key => $value ) { if ( in_array( $key, self::DIRECT_KEYS, true ) ) { $this->$key = $value; } else { if ( isset( $this->sections[$key] ) ) { $this->error( "Duplicate test section", $key ); } $this->sections[$key] = $value; } } # Priority order for wikitext, legacyHtml, and parsoidHtml properties $cats = [ 'wikitext' => self::WIKITEXT_KEYS, 'legacyHtml' => self::LEGACY_HTML_KEYS, 'parsoidHtml' => self::PARSOID_HTML_KEYS, ]; foreach ( $cats as $prop => $keys ) { foreach ( $keys as $key ) { if ( isset( $this->sections[$key] ) ) { $this->$prop = $this->sections[$key]; break; } } } # Deprecation warnings if ( $warnFunc ) { foreach ( self::WARN_DEPRECATED_KEYS as $key ) { if ( isset( $this->sections[$key] ) ) { $warnFunc( $this->errorMsg( "Parser test section $key is deprecated" ) ); } } } } /** * @param array $testFilter Test Filter as set in TestRunner * @return bool if test matches the filter */ public function matchesFilter( $testFilter ): bool { if ( !$testFilter ) { return true; // Trivial match } if ( !empty( $testFilter['regex'] ) ) { $regex = isset( $testFilter['raw'] ) ? ( '/' . $testFilter['raw'] . '/' ) : $testFilter['regex']; return (bool)preg_match( $regex, $this->testName ); } if ( !empty( $testFilter['string'] ) ) { return strpos( $this->testName, $testFilter['raw'] ) !== false; } return true; // Trivial match because of a bad test filter } public function pageName(): string { if ( !$this->pageName ) { $this->pageName = $this->options['title'] ?? 'Parser test'; if ( is_array( $this->pageName ) ) { $this->pageName = 'Parser test'; } } return $this->pageName; } /** * Given a test runner that runs in a specific set of test modes ($testRunnerModes) * compute the list of valid test modes based on what modes have been enabled on the * test itself. * * @param array $testRunnerModes What test modes is the test runner running with? * @return array */ public function computeTestModes( array $testRunnerModes ): array { // Ensure we compute valid modes in the order specificed in ALL_TEST_MODES since // caching in the presence of test cloning rely on tests running in this order. $validModes = array_intersect( self::ALL_TEST_MODES, $testRunnerModes ); // Filter for modes the test has opted in for $testModes = $this->options['parsoid']['modes'] ?? null; if ( $testModes ) { $selserEnabled = in_array( 'selser', $testRunnerModes, true ); // Avoid filtering out the selser test if ( $selserEnabled && !in_array( 'selser', $testModes, true ) && in_array( 'wt2wt', $testModes, true ) ) { $testModes[] = 'selser'; } $validModes = array_intersect( $validModes, $testModes ); } return $validModes; } // Random string used as selser comment content public const STATIC_RANDOM_STRING = 'ahseeyooxooZ8Oon0boh'; /** * Apply manually-specified changes, which are provided in a pseudo-jQuery * format. * * @param Document $doc */ public function applyManualChanges( Document $doc ) { $changes = $this->options['parsoid']['changes']; $err = null; // changes are specified using jquery methods. // [x,y,z...] becomes $(x)[y](z....) // that is, ['fig', 'attr', 'width', '120'] is interpreted as // $('fig').attr('width', '120') // See http://api.jquery.com/ for documentation of these methods. // "contents" as second argument calls the jquery .contents() method // on the results of the selector in the first argument, which is // a good way to get at the text and comment nodes $jquery = [ 'after' => static function ( Node $node, string $html ) { $div = null; $tbl = null; if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { $tbl = $node->ownerDocument->createElement( 'table' ); DOMCompat::setInnerHTML( $tbl, $html ); // <tbody> is implicitly added when inner html is set to <tr>..</tr> DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node->nextSibling ); } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { $tbl = $node->ownerDocument->createElement( 'table' ); DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); $tr = $tbl->firstChild->firstChild; '@phan-var Element $tr'; // @var Element $tr DOMCompat::setInnerHTML( $tr, $html ); DOMUtils::migrateChildren( $tbl->firstChild->firstChild, $node->parentNode, $node->nextSibling ); } else { $div = $node->ownerDocument->createElement( 'div' ); DOMCompat::setInnerHTML( $div, $html ); DOMUtils::migrateChildren( $div, $node->parentNode, $node->nextSibling ); } }, 'append' => static function ( Node $node, string $html ) { if ( DOMCompat::nodeName( $node ) === 'tr' ) { $tbl = $node->ownerDocument->createElement( 'table' ); DOMCompat::setInnerHTML( $tbl, $html ); // <tbody> is implicitly added when inner html is set to <tr>..</tr> DOMUtils::migrateChildren( $tbl->firstChild, $node ); } else { $div = $node->ownerDocument->createElement( 'div' ); DOMCompat::setInnerHTML( $div, $html ); DOMUtils::migrateChildren( $div, $node ); } }, 'attr' => static function ( Node $node, string $name, string $val ) { '@phan-var Element $node'; // @var Element $node $node->setAttribute( $name, $val ); }, 'before' => static function ( Node $node, string $html ) { $div = null; $tbl = null; if ( DOMCompat::nodeName( $node->parentNode ) === 'tbody' ) { $tbl = $node->ownerDocument->createElement( 'table' ); DOMCompat::setInnerHTML( $tbl, $html ); // <tbody> is implicitly added when inner html is set to <tr>..</tr> DOMUtils::migrateChildren( $tbl->firstChild, $node->parentNode, $node ); } elseif ( DOMCompat::nodeName( $node->parentNode ) === 'tr' ) { $tbl = $node->ownerDocument->createElement( 'table' ); DOMCompat::setInnerHTML( $tbl, '<tbody><tr></tr></tbody>' ); $tr = $tbl->firstChild->firstChild; '@phan-var Element $tr'; // @var Element $tr DOMCompat::setInnerHTML( $tr, $html ); DOMUtils::migrateChildren( $tbl->firstChild->firstChild, $node->parentNode, $node ); } else { $div = $node->ownerDocument->createElement( 'div' ); DOMCompat::setInnerHTML( $div, $html ); DOMUtils::migrateChildren( $div, $node->parentNode, $node ); } }, 'removeAttr' => static function ( Node $node, string $name ) { '@phan-var Element $node'; // @var Element $node $node->removeAttribute( $name ); }, 'removeClass' => static function ( Node $node, string $c ) { '@phan-var Element $node'; // @var Element $node DOMCompat::getClassList( $node )->remove( $c ); }, 'addClass' => static function ( Node $node, string $c ) { '@phan-var Element $node'; // @var Element $node DOMCompat::getClassList( $node )->add( $c ); }, 'text' => static function ( Node $node, string $t ) { $node->textContent = $t; }, 'html' => static function ( Node $node, string $h ) { '@phan-var Element $node'; // @var Element $node DOMCompat::setInnerHTML( $node, $h ); }, 'remove' => static function ( Node $node, ?string $optSelector = null ) { // jquery lets us specify an optional selector to further // restrict the removed elements. // text nodes don't have the "querySelectorAll" method, so // just include them by default (jquery excludes them, which // is less useful) if ( !$optSelector ) { $what = [ $node ]; } elseif ( !( $node instanceof Element ) ) { $what = [ $node ];/* text node hack! */ } else { '@phan-var Element $node'; // @var Element $node $what = DOMCompat::querySelectorAll( $node, $optSelector ); } foreach ( $what as $node ) { if ( $node->parentNode ) { $node->parentNode->removeChild( $node ); } } }, 'empty' => static function ( Node $node ) { '@phan-var Element $node'; // @var Element $node DOMCompat::replaceChildren( $node ); }, 'wrap' => static function ( Node $node, string $w ) { $frag = $node->ownerDocument->createElement( 'div' ); DOMCompat::setInnerHTML( $frag, $w ); $first = $frag->firstChild; $node->parentNode->replaceChild( $first, $node ); while ( $first->firstChild ) { $first = $first->firstChild; } $first->appendChild( $node ); } ]; $body = DOMCompat::getBody( $doc ); foreach ( $changes as $change ) { if ( $err ) { continue; } if ( count( $change ) < 2 ) { $err = new Error( 'bad change: ' . $change ); continue; } // use document.querySelectorAll as a poor man's $(...) $els = PHPUtils::iterable_to_array( DOMCompat::querySelectorAll( $body, $change[0] ) ); if ( !count( $els ) ) { $err = new Error( $change[0] . ' did not match any elements: ' . DOMCompat::getOuterHTML( $body ) ); continue; } if ( $change[1] === 'contents' ) { $change = array_slice( $change, 1 ); $acc = []; foreach ( $els as $el ) { PHPUtils::pushArray( $acc, iterator_to_array( $el->childNodes ) ); } $els = $acc; } $fn = $jquery[$change[1]] ?? null; if ( !$fn ) { $err = new Error( 'bad mutator function: ' . $change[1] ); continue; } foreach ( $els as $el ) { call_user_func_array( $fn, array_merge( [ $el ], array_slice( $change, 2 ) ) ); } } if ( $err ) { print TestUtils::colorString( (string)$err, "red" ) . "\n"; throw $err; } } /** * Make changes to a DOM in order to run a selser test on it. * * @param array $dumpOpts * @param Document $doc * @param array $changelist */ public function applyChanges( array $dumpOpts, Document $doc, array $changelist ) { $logger = $dumpOpts['logger'] ?? null; // Seed the random-number generator based on the item title and changelist $alea = new Alea( ( json_encode( $changelist ) ) . ( $this->testName ?? '' ) ); // Keep the changes in the test object // to check for duplicates while building tasks $this->changetree = $changelist; // Helper function for getting a random string $randomString = static function () use ( &$alea ): string { return base_convert( (string)$alea->uint32(), 10, 36 ); }; $insertNewNode = static function ( Node $n ) use ( $randomString ): void { // Insert a text node, if not in a fosterable position. // If in foster position, enter a comment. // In either case, dom-diff should register a new node $str = $randomString(); $ownerDoc = $n->ownerDocument; $wrapperName = null; $newNode = null; // Don't separate legacy IDs from their H? node. if ( WTUtils::isFallbackIdSpan( $n ) ) { $n = $n->nextSibling ?? $n->parentNode; } // For these container nodes, it would be buggy // to insert text nodes as children switch ( DOMCompat::nodeName( $n->parentNode ) ) { case 'ol': case 'ul': $wrapperName = 'li'; break; case 'dl': $wrapperName = 'dd'; break; case 'tr': $prev = DOMCompat::getPreviousElementSibling( $n ); if ( $prev ) { // TH or TD $wrapperName = DOMCompat::nodeName( $prev ); } else { $next = DOMCompat::getNextElementSibling( $n ); if ( $next ) { // TH or TD $wrapperName = DOMCompat::nodeName( $next ); } else { $wrapperName = 'td'; } } break; case 'body': $wrapperName = 'p'; break; default: if ( WTUtils::isBlockNodeWithVisibleWT( $n ) ) { $wrapperName = 'p'; } break; } if ( DOMUtils::isFosterablePosition( $n ) && DOMCompat::nodeName( $n->parentNode ) !== 'tr' ) { $newNode = $ownerDoc->createComment( $str ); } elseif ( $wrapperName ) { $newNode = $ownerDoc->createElement( $wrapperName ); $newNode->appendChild( $ownerDoc->createTextNode( $str ) ); } else { $newNode = $ownerDoc->createTextNode( $str ); } $n->parentNode->insertBefore( $newNode, $n ); }; $removeNode = static function ( Node $n ): void { $n->parentNode->removeChild( $n ); }; $applyChangesInternal = static function ( Node $node, array $changes ) use ( &$applyChangesInternal, $removeNode, $insertNewNode, $randomString, $logger ): void { if ( count( $node->childNodes ) < count( $changes ) ) { throw new Error( "Error: more changes than nodes to apply them to!" ); } // Clone array since we are mutating the children in the changes loop below $nodeArray = []; foreach ( $node->childNodes as $n ) { $nodeArray[] = $n; } foreach ( $changes as $i => $change ) { $child = $nodeArray[$i]; if ( is_array( $change ) ) { $applyChangesInternal( $child, $change ); } else { switch ( $change ) { // No change case 0: break; // Change node wrapper // (sufficient to insert a random attr) case 1: if ( $child instanceof Element ) { $child->setAttribute( 'data-foobar', $randomString() ); } elseif ( $logger ) { $logger->log( LogLevel::ERROR, 'Buggy changetree. changetype 1 (modify attribute)' . ' cannot be applied on text/comment nodes.' ); } break; // Insert new node before child case 2: $insertNewNode( $child ); break; // Delete tree rooted at child case 3: $removeNode( $child ); break; // Change tree rooted at child case 4: $insertNewNode( $child ); $removeNode( $child ); break; } } } }; $body = DOMCompat::getBody( $doc ); if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { $logger->log( LogLevel::ERROR, "----- Original DOM -----" ); $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); } if ( $this->changetree === [ 5 ] ) { // Hack so that we can work on the parent node rather than just the // children: Append a comment with known content. This is later // stripped from the output, and the result is compared to the // original wikitext rather than the non-selser wt2wt result. $body->appendChild( $doc->createComment( self::STATIC_RANDOM_STRING ) ); } elseif ( $this->changetree !== [] ) { $applyChangesInternal( $body, $this->changetree ); } if ( $logger && ( $dumpOpts['dom:post-changes'] ?? false ) ) { $logger->log( LogLevel::ERROR, "----- Change Tree -----" ); $logger->log( LogLevel::ERROR, json_encode( $this->changetree ) ); $logger->log( LogLevel::ERROR, "----- Edited DOM -----" ); $logger->log( LogLevel::ERROR, ContentUtils::dumpDOM( $body, '', [ 'quiet' => true ] ) ); } } /** * For a selser test, check if a change we could make has already been * tested in this round. * Used for generating unique tests. * * @param array $change Candidate change. * @return bool */ public function isDuplicateChangeTree( array $change ): bool { $allChanges = $this->selserChangeTrees; foreach ( $allChanges as $c ) { if ( $c == $change ) { return true; } } return false; } /** * Generate a change object for a document, so we can apply it during a selser test. * * @param Document $doc * @return array The list of changes. */ public function generateChanges( Document $doc ): array { $alea = new Alea( ( $this->seed ?? '' ) . ( $this->testName ?? '' ) ); /** * If no node in the DOM subtree rooted at 'node' is editable in the VE, * this function should return false. * * Currently true for template and extension content, and for entities. */ $domSubtreeIsEditable = static function ( Node $node ): bool { return !( $node instanceof Element ) || ( !WTUtils::isEncapsulationWrapper( $node ) && // These wrappers can only be edited in restricted ways. // Simpler to just block all editing on them. !DOMUtils::matchTypeOf( $node, '#^mw:(Entity|Placeholder|DisplaySpace|Annotation|ExtendedAnnRange)(/|$)#' ) && // Deleting these wrappers is tantamount to removing the // references-tag encapsulation wrappers, which results in errors. !DOMUtils::hasClass( $node, 'mw-references-wrap' ) ); }; /** * Even if a DOM subtree might be editable in the VE, * certain nodes in the DOM might not be directly editable. * * Currently, this restriction is only applied to DOMs generated for images. * Possibly, there are other candidates. */ $nodeIsUneditable = static function ( Node $node ) use ( &$nodeIsUneditable ): bool { // Text and comment nodes are always editable if ( !( $node instanceof Element ) ) { return false; } if ( WTUtils::isMarkerAnnotation( $node ) ) { return true; } // - File wrapper is an uneditable elt. // - Any node nested in a file wrapper that is not a figcaption // is an uneditable elt. // - Entity spans are uneditable as well // - Placeholder is defined to be uneditable in the spec // - ExtendedAnnRange is an "unknown" type in the spec, and hence uneditable return DOMUtils::matchTypeOf( $node, '#^mw:(File|Entity|Placeholder|DisplaySpace|ExtendedAnnRange)(/|$)#' ) || ( DOMCompat::nodeName( $node ) !== 'figcaption' && $node->parentNode && DOMCompat::nodeName( $node->parentNode ) !== 'body' && $nodeIsUneditable( $node->parentNode ) ); }; $defaultChangeType = 0; $hasChangeMarkers = static function ( array $list ) use ( &$hasChangeMarkers, $defaultChangeType ): bool { // If all recorded changes are 0, then nothing has been modified foreach ( $list as $c ) { if ( ( is_array( $c ) && $hasChangeMarkers( $c ) ) || ( !is_array( $c ) && $c !== $defaultChangeType ) ) { return true; } } return false; }; $genChangesInternal = static function ( Node $node ) use ( &$genChangesInternal, &$hasChangeMarkers, $domSubtreeIsEditable, $nodeIsUneditable, $alea, $defaultChangeType ): array { // Seed the random-number generator based on the item title $changelist = []; $children = $node->childNodes ? iterator_to_array( $node->childNodes ) : []; foreach ( $children as $child ) { $changeType = $defaultChangeType; if ( $domSubtreeIsEditable( $child ) ) { if ( $nodeIsUneditable( $child ) || $alea->random() < 0.5 ) { // This call to random is a hack to preserve the current // determined state of our knownFailures entries after a // refactor. $alea->uint32(); $changeType = $genChangesInternal( $child ); // `$genChangesInternal` returns an array, which can be // empty. Revert to the `$defaultChangeType` if that's // the case. if ( count( $changeType ) === 0 ) { $changeType = $defaultChangeType; } } else { if ( !( $child instanceof Element ) ) { // Text or comment node -- valid changes: 2, 3, 4 // since we cannot set attributes on these $changeType = floor( $alea->random() * 3 ) + 2; } else { $changeType = floor( $alea->random() * 4 ) + 1; } } } $changelist[] = $changeType; } return $hasChangeMarkers( $changelist ) ? $changelist : []; }; $body = DOMCompat::getBody( $doc ); $changetree = null; $numAttempts = 0; do { $numAttempts++; $changetree = $genChangesInternal( $body ); } while ( $numAttempts < 1000 && ( count( $changetree ) === 0 || $this->isDuplicateChangeTree( $changetree ) ) ); if ( $numAttempts === 1000 ) { // couldn't generate a change ... marking as such $this->duplicateChange = true; } return $changetree; } /** * FIXME: clean up this mess! * - generate all changes at once (generateChanges should return a tree really) * rather than going to all these lengths of interleaving change * generation with tests * - set up the changes in item directly rather than juggling around with * indexes etc * - indicate whether to compare to wt2wt or the original input * - maybe make a full selser test one method that uses others rather than the * current chain of methods that sometimes do something for selser * * @param array $targetModes * @param array $runnerOpts * @param callable $runTest */ public function testAllModes( // phpcs:ignore MediaWiki.Commenting.MissingCovers.MissingCovers array $targetModes, array $runnerOpts, callable $runTest ): void { if ( !$this->testName ) { throw new Error( 'Missing title from test case.' ); } $selserNoAuto = ( ( $runnerOpts['selser'] ?? false ) === 'noauto' ); foreach ( $targetModes as $targetMode ) { if ( $targetMode === 'selser' && !( $selserNoAuto || isset( $runnerOpts['changetree'] ) ) ) { // Run selser tests in the following order: // 1. Manual changes (if provided) // 2. changetree 5 (oracle exists for verifying output) // 3. All other change trees (no oracle exists for verifying output) if ( isset( $this->options['parsoid']['changes'] ) ) { // Mutating the item here is necessary to output 'manual' in // the test's title and to differentiate it for knownFailures. $this->changetree = [ 'manual' ]; $runTest( $this, 'selser', $runnerOpts ); } // Skip the rest if the test doesn't want changetrees if ( ( $this->options['parsoid']['selser'] ?? '' ) === 'noauto' ) { continue; } // Changetree 5 (append a comment to the root node) $this->changetree = [ 5 ]; $runTest( $this, 'selser', $runnerOpts ); // Automatically generated changed trees $this->selserChangeTrees = []; for ( $j = 0; $j < $runnerOpts['numchanges']; $j++ ) { // Set changetree to null to ensure we don't assume [ 5 ] in $runTest $this->changetree = null; $this->seed = $j . ''; $runTest( $this, 'selser', $runnerOpts ); if ( $this->isDuplicateChangeTree( $this->changetree ) ) { // Once we get a duplicate change tree, we can no longer // generate and run new tests. So, be done now! break; } else { $this->selserChangeTrees[$j] = $this->changetree; } } } elseif ( $targetMode === 'selser' && $selserNoAuto ) { // Manual changes were requested on the command line, // check that the item does have them. if ( isset( $this->options['parsoid']['changes'] ) ) { $this->changetree = [ 'manual' ]; $runTest( $this, 'selser', $runnerOpts ); } continue; } else { if ( $targetMode === 'wt2html' && isset( $this->sections['html/parsoid+langconv'] ) ) { // Since we are clobbering options and parsoidHtml, clone the test object $testClone = Utils::clone( $this ); $testClone->options['langconv'] = true; $testClone->parsoidHtml = $this->sections['html/parsoid+langconv']; $runTest( $testClone, $targetMode, $runnerOpts ); if ( $this->parsoidHtml === null ) { // Don't run the same test in non-langconv mode // unless we have a non-langconv section continue; } } Assert::invariant( $targetMode !== 'selser' || ( isset( $runnerOpts['changetree'] ) && !$selserNoAuto ), "Unexpected target mode $targetMode" ); $runTest( $this, $targetMode, $runnerOpts ); } } } /** * Normalize expected and actual HTML to suppress irrelevant differences. * The normalization is determined by the HTML sections present in the test * as well as other Parsoid-specific test options. * * @param Element|string $actual * @param ?string $normExpected * @param bool $standalone * @return array */ public function normalizeHTML( $actual, ?string $normExpected, bool $standalone = true ): array { $opts = $this->options; $haveStandaloneHTML = $standalone && isset( $this->sections['html/parsoid+standalone'] ); $haveIntegratedHTML = !$standalone && isset( $this->sections['html/parsoid+integrated'] ); $parsoidOnly = isset( $this->sections['html/parsoid'] ) || $haveStandaloneHTML || $haveIntegratedHTML || isset( $this->sections['html/parsoid+langconv'] ) || ( isset( $opts['parsoid'] ) && !isset( $opts['parsoid']['normalizePhp'] ) ); $externalLinkTarget = ( $opts['externallinktarget'] ?? false ) || isset( $this->config['wgExternalLinkTarget'] ) || isset( $this->config['wgNoFollowLinks'] ) || isset( $this->config['wgNoFollowDomainExceptions'] ); $normOpts = [ 'parsoidOnly' => $parsoidOnly, 'preserveIEW' => isset( $opts['parsoid']['preserveIEW'] ), 'externallinktarget' => $externalLinkTarget, ]; if ( $normExpected === null ) { if ( $haveIntegratedHTML ) { $parsoidHTML = $this->sections['html/parsoid+integrated']; } elseif ( $haveStandaloneHTML ) { $parsoidHTML = $this->sections['html/parsoid+standalone']; } else { $parsoidHTML = $this->parsoidHtml; } if ( $parsoidOnly ) { $normExpected = TestUtils::normalizeOut( $parsoidHTML, $normOpts ); } else { $normExpected = TestUtils::normalizeHTML( $parsoidHTML ); } $this->cachedNormalizedHTML = $normExpected; } return [ TestUtils::normalizeOut( $actual, $normOpts ), $normExpected ]; } /** * Normalize "known failure" output. * * This is an extremely light normalization, since the point of the * known failure file is to catch changes in output, even if we don't * know what "correct" is. But we do remove 'about' numbering, since * that is not guaranteed consistent from run to run. */ public function normalizeKnownFailure( string $out ): string { return TestUtils::normalizeAbout( $out ); } /** * Normalize expected and actual wikitext to suppress irrelevant differences. * * Because of selser as well as manual edit trees, expected wikitext isn't always * found in the same section for all tests ending in WT (unlike normalizeHTML). * Hence, * (a) this code has a different structure than normalizeHTML * (b) we cannot cache normalized wikitext * * @param string $actual * @param string $expected * @param bool $standalone * @return array */ public function normalizeWT( string $actual, string $expected, bool $standalone = true ): array { // No other normalizations at this time $normalizedActual = rtrim( $actual, "\n" ); $normalizedExpected = rtrim( $expected, "\n" ); return [ $normalizedActual, $normalizedExpected ]; } } PK ! WZU�� �� Grammar.phpnu �Iw�� <?php /* * Generated by WikiPEG */ /* File-scope initializer */ namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\Utils\PHPUtils; class Grammar extends \Wikimedia\WikiPEG\PEGParserBase { // initializer /** @var string */ private $filename = ''; /** @var int */ private $lineNum = 1; /** * @param string $filename * @return array */ public static function load( string $filename ) { $g = new Grammar(); $g->filename = $filename; $contents = file_get_contents( $filename ) ?: ''; if ( substr( $contents, -1 ) !== "\n" ) { # ensure that the file is terminated with a newline # to match `end_section` rule (and other uses of `eol`) $contents .= "\n"; } return $g->parse( $contents ); } private function addLines( int $lineStart, array $item ) { $item['filename'] = $this->filename; $item['lineNumStart'] = $lineStart; $item['lineNumEnd'] = $this->lineNum; return $item; } // cache init // expectations protected $expectations = [ 0 => ["type" => "end", "description" => "end of input"], 1 => ["type" => "literal", "value" => "!!", "description" => "\"!!\""], 2 => ["type" => "class", "value" => "[0-9]", "description" => "[0-9]"], 3 => ["type" => "literal", "value" => "#", "description" => "\"#\""], 4 => ["type" => "class", "value" => "[ \\t]", "description" => "[ \\t]"], 5 => ["type" => "literal", "value" => "\x0a", "description" => "\"\\n\""], 6 => ["type" => "literal", "value" => "version", "description" => "\"version\""], 7 => ["type" => "class", "value" => "[^\\n]", "description" => "[^\\n]"], 8 => ["type" => "literal", "value" => "options", "description" => "\"options\""], 9 => ["type" => "literal", "value" => "end", "description" => "\"end\""], 10 => ["type" => "literal", "value" => "article", "description" => "\"article\""], 11 => ["type" => "literal", "value" => "text", "description" => "\"text\""], 12 => ["type" => "literal", "value" => "endarticle", "description" => "\"endarticle\""], 13 => ["type" => "literal", "value" => "test", "description" => "\"test\""], 14 => ["type" => "class", "value" => "[^ \\t\\r\\n]", "description" => "[^ \\t\\r\\n]"], 15 => ["type" => "literal", "value" => "config", "description" => "\"config\""], 16 => ["type" => "literal", "value" => "hooks", "description" => "\"hooks\""], 17 => ["type" => "literal", "value" => ":", "description" => "\":\""], 18 => ["type" => "literal", "value" => "endhooks", "description" => "\"endhooks\""], 19 => ["type" => "literal", "value" => "functionhooks", "description" => "\"functionhooks\""], 20 => ["type" => "literal", "value" => "endfunctionhooks", "description" => "\"endfunctionhooks\""], 21 => ["type" => "class", "value" => "[^ \\t\\n=!]", "description" => "[^ \\t\\n=!]"], 22 => ["type" => "literal", "value" => "=", "description" => "\"=\""], 23 => ["type" => "literal", "value" => ",", "description" => "\",\""], 24 => ["type" => "literal", "value" => "[[", "description" => "\"[[\""], 25 => ["type" => "class", "value" => "[^\\]\\n]", "description" => "[^\\]\\n]"], 26 => ["type" => "literal", "value" => "]]", "description" => "\"]]\""], 27 => ["type" => "class", "value" => "[\\\"]", "description" => "[\\\"]"], 28 => ["type" => "class", "value" => "[^\\\\\\\"\\n]", "description" => "[^\\\\\\\"\\n]"], 29 => ["type" => "literal", "value" => "\\", "description" => "\"\\\\\""], 30 => ["type" => "class", "value" => "[^ \\t\\n\\\"\\'\\[\\]=,!\\{]", "description" => "[^ \\t\\n\\\"\\'\\[\\]=,!\\{]"], 31 => ["type" => "literal", "value" => "{", "description" => "\"{\""], 32 => ["type" => "class", "value" => "[^\\\"\\{\\}\\n]", "description" => "[^\\\"\\{\\}\\n]"], 33 => ["type" => "literal", "value" => "}", "description" => "\"}\""], 34 => ["type" => "literal", "value" => "[", "description" => "\"[\""], 35 => ["type" => "class", "value" => "[^\\\"\\[\\]\\n]", "description" => "[^\\\"\\[\\]\\n]"], 36 => ["type" => "literal", "value" => "]", "description" => "\"]\""], ]; // actions private function a0($nl) { return [ 'type' => 'line', 'text' => $nl ]; } private function a1() { return $this->lineNum; } private function a2($l, $v) { return $this->addLines( $l, [ 'type' => 'version', 'text' => $v ] ); } private function a3($l, $sec) { return $this->addLines( $l, $sec ); } private function a4($l, $c) { return $this->addLines($l, $c); } private function a5($text) { return [ 'type' => 'comment', 'text' => $text ]; } private function a6($nl) { $this->lineNum++; return $nl; } private function a7($c) { return implode($c); } private function a8($opts) { $o = []; if ( $opts && count($opts) > 0 ) { foreach ( $opts as $opt ) { $o[$opt['k']] = $opt['v']; } } return [ 'type' => 'section', 'name' => 'options', 'text' => $o ]; } private function a9($l) { return [ 'type' => 'line', 'text' => $l ]; } private function a10($o, $rest) { $result = [ $o ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } private function a11($title, $text) { return [ 'type' => 'article', 'title' => $title, 'text' => $text ]; } private function a12($testName, $sections) { $test = [ 'type' => 'test', 'testName' => $testName ]; foreach ( $sections as $section ) { $test[$section['name']] = $section['text']; } // pegjs parser handles item options as follows: // item option value of item.options.parsoid // <none> undefined // parsoid "" // parsoid=wt2html "wt2html" // parsoid=wt2html,wt2wt ["wt2html","wt2wt"] // parsoid={"modes":["wt2wt"]} {modes:['wt2wt']} // treat 'parsoid=xxx,yyy' in options section as shorthand for // 'parsoid={modes:["xxx","yyy"]}' if ( isset($test['options']['parsoid'] ) ) { if ($test['options']['parsoid'] === '') { $test['options']['parsoid'] = []; } if ( is_string( $test['options']['parsoid'] ) ) { $test['options']['parsoid'] = [ $test['options']['parsoid'] ]; } if ( is_array( $test['options']['parsoid'] ) && isset( $test['options']['parsoid'][0] ) && !isset( $test['options']['parsoid']['modes'] ) ) { $test['options']['parsoid'] = [ 'modes' => $test['options']['parsoid'] ]; } } return $test; } private function a13($text) { return [ 'type' => 'hooks', 'text' => $text ]; } private function a14($text) { return [ 'type' => 'functionhooks', 'text' => $text ]; } private function a15($line) { return $line; } private function a16($k, $v) { return [ 'k' => strtolower( $k ), 'v' => $v ?? '' ]; } private function a17($lines) { return implode("\n", $lines); } private function a18($c) { return implode( $c ); } private function a19($name, $text) { return [ 'name' => $name, 'text' => $text ]; } private function a20($items) { $c = []; if ( $items && count($items) > 0 ) { foreach ( $items as $item ) { $c[$item['k']] = $item['v']; } } return [ 'type' => 'section', 'name' => 'config', 'text' => $c ]; } private function a21($ovl) { return count( $ovl ) === 1 ? $ovl[0] : $ovl; } private function a22($c, $rest) { $result = [ $c ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } private function a23($v, $ovl) { return $ovl; } private function a24($v, $rest) { $result = [ $v ]; if ( $rest && count( $rest ) > 0 ) { $result = array_merge( $result, $rest ); } return $result; } private function a25($k, $v) { return [ 'k' => $k, 'v' => $v ]; } private function a26($v) { if ( $v[0] === '"' || $v[0] === '{' ) { // } is needed to make pegjs happy return PHPUtils::jsonDecode( $v ); } return $v; } private function a27($v) { return $v; } private function a28($v) { // Perhaps we should canonicalize the title? // Protect with JSON.stringify just in case the link target starts with // double-quote or open-brace. return PHPUtils::jsonEncode( implode( $v ) ); } private function a29($c) { return "\\" . $c; } private function a30($v) { return '"' . implode( $v ) . '"'; } private function a31($v) { return implode( $v ); } private function a32($v) { return "{" . implode( $v ) . "}"; } private function a33($v) { // validate this as acceptable JSON // (this ensures that wikipeg throws a syntax error if // the JSON is invalid; note that PHP 7.3 would allow us // to use JSON_THROW_ON_ERROR instead of json_last_error()...) $ignore = json_decode($v, true, 100); return (json_last_error() === JSON_ERROR_NONE); } private function a34($v) { // The value is valid JSON; return the decoded value. return json_decode($v, true); } private function a35($v) { return "[" . implode( $v ) . "]"; } // generated private function parsetestfile($silence) { // start seq_1 $p1 = $this->currPos; $r3 = []; for (;;) { $r4 = $this->parsecomment_or_blank_line($silence); if ($r4!==self::$FAILED) { $r3[] = $r4; } else { break; } } // free $r4 $r4 = $this->parseformat($silence); if ($r4===self::$FAILED) { $r4 = null; } $r5 = []; for (;;) { $r6 = $this->parsecomment_or_blank_line($silence); if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // free $r6 $r6 = $this->parsetestfile_options($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = []; for (;;) { $r8 = $this->parselined_chunk($silence); if ($r8!==self::$FAILED) { $r7[] = $r8; } else { break; } } if (count($r7) === 0) { $r7 = self::$FAILED; } if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } // free $r8 $r2 = [$r3,$r4,$r5,$r6,$r7]; seq_1: // free $r2,$p1 return $r2; } private function parsecomment_or_blank_line($silence) { // start choice_1 $r1 = $this->parsecomment($silence); if ($r1!==self::$FAILED) { goto choice_1; } $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } $r5 = $this->parseeol($silence); // nl <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a0($r5); } // free $p3 choice_1: return $r1; } private function parseformat($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $p5 = $this->currPos; $r4 = ''; // l <- $r4 if ($r4!==self::$FAILED) { $this->savedPos = $p5; $r4 = $this->a1(); } else { $r1 = self::$FAILED; goto seq_1; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r6 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r7 = $this->discardwhitespace($silence); if ($r7===self::$FAILED) { $r7 = null; } $r8 = $this->discardversion_keyword($silence); if ($r8===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r9 = self::$FAILED; for (;;) { $r10 = $this->discardwhitespace($silence); if ($r10!==self::$FAILED) { $r9 = true; } else { break; } } if ($r9===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $r10 $p11 = $this->currPos; $r10 = self::$FAILED; for (;;) { $r12 = $this->input[$this->currPos] ?? ''; if (preg_match("/^[0-9]/", $r12)) { $this->currPos++; $r10 = true; } else { $r12 = self::$FAILED; if (!$silence) {$this->fail(2);} break; } } // v <- $r10 if ($r10!==self::$FAILED) { $r10 = substr($this->input, $p11, $this->currPos - $p11); } else { $r10 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $r12 // free $p11 $r12 = $this->discardrest_of_line($silence); if ($r12===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a2($r4, $r10); } // free $p3 return $r1; } private function parsetestfile_options($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $p5 = $this->currPos; $r4 = ''; // l <- $r4 if ($r4!==self::$FAILED) { $this->savedPos = $p5; $r4 = $this->a1(); } else { $r1 = self::$FAILED; goto seq_1; } $r6 = $this->parseoption_section($silence); // sec <- $r6 if ($r6===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r7 = $this->discardend_section($silence); if ($r7===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a3($r4, $r6); } // free $p3 return $r1; } private function parselined_chunk($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $p5 = $this->currPos; $r4 = ''; // l <- $r4 if ($r4!==self::$FAILED) { $this->savedPos = $p5; $r4 = $this->a1(); } else { $r1 = self::$FAILED; goto seq_1; } $r6 = $this->parsechunk($silence); // c <- $r6 if ($r6===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a4($r4, $r6); } // free $p3 return $r1; } private function parsecomment($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "#") { $this->currPos++; $r4 = "#"; } else { if (!$silence) {$this->fail(3);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parserest_of_line($silence); // text <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a5($r5); } // free $p3 return $r1; } private function discardwhitespace($silence) { $r1 = self::$FAILED; for (;;) { $r2 = $this->input[$this->currPos] ?? ''; if ($r2 === " " || $r2 === "\x09") { $this->currPos++; $r1 = true; } else { $r2 = self::$FAILED; if (!$silence) {$this->fail(4);} break; } } // free $r2 return $r1; } private function parseeol($silence) { $p2 = $this->currPos; // nl <- $r3 if (($this->input[$this->currPos] ?? null) === "\x0a") { $this->currPos++; $r3 = "\x0a"; } else { if (!$silence) {$this->fail(5);} $r3 = self::$FAILED; } $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a6($r3); } return $r1; } private function discardversion_keyword($silence) { if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "version", $this->currPos, 7, true) === 0) { $r1 = substr($this->input, $this->currPos, 7); $this->currPos += 7; } else { if (!$silence) {$this->fail(6);} $r1 = self::$FAILED; } return $r1; } private function discardrest_of_line($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = []; for (;;) { $r5 = self::charAt($this->input, $this->currPos); if ($r5 !== '' && !($r5 === "\x0a")) { $this->currPos += strlen($r5); $r4[] = $r5; } else { $r5 = self::$FAILED; if (!$silence) {$this->fail(7);} break; } } // c <- $r4 // free $r5 $r5 = $this->discardeol($silence); if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a7($r4); } // free $p3 return $r1; } private function parseoption_section($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r4 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = $this->discardwhitespace($silence); if ($r5===self::$FAILED) { $r5 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "options", $this->currPos, 7, false) === 0) { $r6 = "options"; $this->currPos += 7; } else { if (!$silence) {$this->fail(8);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r7 = $this->discardwhitespace($silence); if ($r7===self::$FAILED) { $r7 = null; } $r8 = $this->discardeol($silence); if ($r8===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r9 = $this->parseoption_list($silence); if ($r9===self::$FAILED) { $r9 = null; } // opts <- $r9 $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a8($r9); } // free $p3 return $r1; } private function discardend_section($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "end", $this->currPos, 3, false) === 0) { $r5 = "end"; $this->currPos += 3; } else { if (!$silence) {$this->fail(9);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->discardeol($silence); if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function parsechunk($silence) { // start choice_1 $r1 = $this->parsecomment_or_blank_line($silence); if ($r1!==self::$FAILED) { goto choice_1; } $r1 = $this->parsearticle($silence); if ($r1!==self::$FAILED) { goto choice_1; } $r1 = $this->parsetest($silence); if ($r1!==self::$FAILED) { goto choice_1; } $r1 = $this->parsehooks($silence); if ($r1!==self::$FAILED) { goto choice_1; } $r1 = $this->parsefunctionhooks($silence); if ($r1!==self::$FAILED) { goto choice_1; } $p2 = $this->currPos; $r3 = $this->parseline($silence); // l <- $r3 $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a9($r3); } choice_1: return $r1; } private function parserest_of_line($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = []; for (;;) { $r5 = self::charAt($this->input, $this->currPos); if ($r5 !== '' && !($r5 === "\x0a")) { $this->currPos += strlen($r5); $r4[] = $r5; } else { $r5 = self::$FAILED; if (!$silence) {$this->fail(7);} break; } } // c <- $r4 // free $r5 $r5 = $this->discardeol($silence); if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a7($r4); } // free $p3 return $r1; } private function discardeol($silence) { $p2 = $this->currPos; // nl <- $r3 if (($this->input[$this->currPos] ?? null) === "\x0a") { $this->currPos++; $r3 = "\x0a"; } else { if (!$silence) {$this->fail(5);} $r3 = self::$FAILED; } $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a6($r3); } return $r1; } private function parseoption_list($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->parsean_option($silence); // o <- $r4 if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = self::$FAILED; for (;;) { // start choice_1 $r6 = $this->input[$this->currPos] ?? ''; if ($r6 === " " || $r6 === "\x09") { $this->currPos++; goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(4);} } $r6 = $this->discardeol($silence); choice_1: if ($r6!==self::$FAILED) { $r5 = true; } else { break; } } if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $r6 $r6 = $this->parseoption_list($silence); if ($r6===self::$FAILED) { $r6 = null; } // rest <- $r6 $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a10($r4, $r6); } // free $p3 return $r1; } private function parsearticle($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardstart_article($silence); if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parseline($silence); // title <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r6 = $this->discardstart_text($silence); if ($r6===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r7 = $this->parsetext($silence); // text <- $r7 if ($r7===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // start choice_1 $r8 = $this->discardend_article($silence); if ($r8!==self::$FAILED) { goto choice_1; } $r8 = $this->discardend_section($silence); choice_1: if ($r8===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a11($r5, $r7); } // free $p3 return $r1; } private function parsetest($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardstart_test($silence); if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parsetext($silence); // testName <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r6 = []; for (;;) { // start choice_1 $r7 = $this->parsesection($silence); if ($r7!==self::$FAILED) { goto choice_1; } $r7 = $this->parseconfig_section($silence); if ($r7!==self::$FAILED) { goto choice_1; } $r7 = $this->parseoption_section($silence); choice_1: if ($r7!==self::$FAILED) { $r6[] = $r7; } else { break; } } // sections <- $r6 // free $r7 $r7 = $this->discardend_section($silence); if ($r7===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a12($r5, $r6); } // free $p3 return $r1; } private function parsehooks($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardstart_hooks($silence); if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parsetext($silence); // text <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // start choice_1 $r6 = $this->discardend_hooks($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->discardend_section($silence); choice_1: if ($r6===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a13($r5); } // free $p3 return $r1; } private function parsefunctionhooks($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardstart_functionhooks($silence); if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parsetext($silence); // text <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // start choice_1 $r6 = $this->discardend_functionhooks($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->discardend_section($silence); choice_1: if ($r6===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a14($r5); } // free $p3 return $r1; } private function parseline($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $p4 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r5 = "!!"; $this->currPos += 2; } else { $r5 = self::$FAILED; } if ($r5 === self::$FAILED) { $r5 = false; } else { $r5 = self::$FAILED; $this->currPos = $p4; $r1 = self::$FAILED; goto seq_1; } // free $p4 $r6 = $this->parserest_of_line($silence); // line <- $r6 if ($r6===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a15($r6); } // free $p3 return $r1; } private function parsean_option($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->parseoption_name($silence); // k <- $r4 if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parseoption_value($silence); if ($r5===self::$FAILED) { $r5 = null; } // v <- $r5 $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a16($r4, $r5); } // free $p3 return $r1; } private function discardstart_article($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "article", $this->currPos, 7, false) === 0) { $r5 = "article"; $this->currPos += 7; } else { if (!$silence) {$this->fail(10);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->discardeol($silence); if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function discardstart_text($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "text", $this->currPos, 4, false) === 0) { $r5 = "text"; $this->currPos += 4; } else { if (!$silence) {$this->fail(11);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->discardeol($silence); if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function parsetext($silence) { $p2 = $this->currPos; $r3 = []; for (;;) { $r4 = $this->parseline($silence); if ($r4!==self::$FAILED) { $r3[] = $r4; } else { break; } } // lines <- $r3 // free $r4 $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a17($r3); } return $r1; } private function discardend_article($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "endarticle", $this->currPos, 10, false) === 0) { $r5 = "endarticle"; $this->currPos += 10; } else { if (!$silence) {$this->fail(12);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->discardeol($silence); if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function discardstart_test($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "test", $this->currPos, 4, false) === 0) { $r5 = "test"; $this->currPos += 4; } else { if (!$silence) {$this->fail(13);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->discardeol($silence); if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function parsesection($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r4 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = $this->discardwhitespace($silence); if ($r5===self::$FAILED) { $r5 = null; } $p6 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "test", $this->currPos, 4, false) === 0) { $r7 = "test"; $this->currPos += 4; } else { $r7 = self::$FAILED; } if ($r7 === self::$FAILED) { $r7 = false; } else { $r7 = self::$FAILED; $this->currPos = $p6; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $p6 $p6 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "end", $this->currPos, 3, false) === 0) { $r8 = "end"; $this->currPos += 3; } else { $r8 = self::$FAILED; } if ($r8 === self::$FAILED) { $r8 = false; } else { $r8 = self::$FAILED; $this->currPos = $p6; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $p6 $p6 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "options", $this->currPos, 7, false) === 0) { $r9 = "options"; $this->currPos += 7; } else { $r9 = self::$FAILED; } if ($r9 === self::$FAILED) { $r9 = false; } else { $r9 = self::$FAILED; $this->currPos = $p6; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $p6 $p6 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "config", $this->currPos, 6, false) === 0) { $r10 = "config"; $this->currPos += 6; } else { $r10 = self::$FAILED; } if ($r10 === self::$FAILED) { $r10 = false; } else { $r10 = self::$FAILED; $this->currPos = $p6; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $p6 $p6 = $this->currPos; $r12 = []; for (;;) { if (strcspn($this->input, " \x09\x0d\x0a", $this->currPos, 1) !== 0) { $r13 = self::consumeChar($this->input, $this->currPos); $r12[] = $r13; } else { $r13 = self::$FAILED; if (!$silence) {$this->fail(14);} break; } } if (count($r12) === 0) { $r12 = self::$FAILED; } // c <- $r12 // free $r13 $r11 = $r12; // name <- $r11 if ($r11!==self::$FAILED) { $this->savedPos = $p6; $r11 = $this->a18($r12); } else { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r13 = $this->discardrest_of_line($silence); if ($r13===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r14 = $this->parsetext($silence); // text <- $r14 if ($r14===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a19($r11, $r14); } // free $p3 return $r1; } private function parseconfig_section($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r4 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = $this->discardwhitespace($silence); if ($r5===self::$FAILED) { $r5 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "config", $this->currPos, 6, false) === 0) { $r6 = "config"; $this->currPos += 6; } else { if (!$silence) {$this->fail(15);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r7 = $this->discardwhitespace($silence); if ($r7===self::$FAILED) { $r7 = null; } $r8 = $this->discardeol($silence); if ($r8===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r9 = $this->parseconfig_list($silence); if ($r9===self::$FAILED) { $r9 = null; } // items <- $r9 $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a20($r9); } // free $p3 return $r1; } private function discardstart_hooks($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "hooks", $this->currPos, 5, false) === 0) { $r5 = "hooks"; $this->currPos += 5; } else { if (!$silence) {$this->fail(16);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } if (($this->input[$this->currPos] ?? null) === ":") { $this->currPos++; $r6 = ":"; } else { if (!$silence) {$this->fail(17);} $r6 = self::$FAILED; $r6 = null; } $r7 = $this->discardwhitespace($silence); if ($r7===self::$FAILED) { $r7 = null; } $r8 = $this->discardeol($silence); if ($r8===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function discardend_hooks($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "endhooks", $this->currPos, 8, false) === 0) { $r5 = "endhooks"; $this->currPos += 8; } else { if (!$silence) {$this->fail(18);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->discardeol($silence); if ($r7===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function discardstart_functionhooks($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "functionhooks", $this->currPos, 13, false) === 0) { $r5 = "functionhooks"; $this->currPos += 13; } else { if (!$silence) {$this->fail(19);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } if (($this->input[$this->currPos] ?? null) === ":") { $this->currPos++; $r6 = ":"; } else { if (!$silence) {$this->fail(17);} $r6 = self::$FAILED; $r6 = null; } $r7 = $this->discardwhitespace($silence); if ($r7===self::$FAILED) { $r7 = null; } $r8 = $this->discardeol($silence); if ($r8===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function discardend_functionhooks($silence) { // start seq_1 $p1 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "!!", $this->currPos, 2, false) === 0) { $r3 = "!!"; $this->currPos += 2; } else { if (!$silence) {$this->fail(1);} $r3 = self::$FAILED; $r2 = self::$FAILED; goto seq_1; } $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "endfunctionhooks", $this->currPos, 16, false) === 0) { $r5 = "endfunctionhooks"; $this->currPos += 16; } else { if (!$silence) {$this->fail(20);} $r5 = self::$FAILED; $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } if (($this->input[$this->currPos] ?? null) === ":") { $this->currPos++; $r6 = ":"; } else { if (!$silence) {$this->fail(17);} $r6 = self::$FAILED; $r6 = null; } $r7 = $this->discardwhitespace($silence); if ($r7===self::$FAILED) { $r7 = null; } $r8 = $this->discardeol($silence); if ($r8===self::$FAILED) { $this->currPos = $p1; $r2 = self::$FAILED; goto seq_1; } $r2 = true; seq_1: // free $r2,$p1 return $r2; } private function parseoption_name($silence) { $p2 = $this->currPos; $r3 = []; for (;;) { if (strcspn($this->input, " \x09\x0a=!", $this->currPos, 1) !== 0) { $r4 = self::consumeChar($this->input, $this->currPos); $r3[] = $r4; } else { $r4 = self::$FAILED; if (!$silence) {$this->fail(21);} break; } } if (count($r3) === 0) { $r3 = self::$FAILED; } // c <- $r3 // free $r4 $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a7($r3); } return $r1; } private function parseoption_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if (($this->input[$this->currPos] ?? null) === "=") { $this->currPos++; $r5 = "="; } else { if (!$silence) {$this->fail(22);} $r5 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->parseoption_value_list($silence); // ovl <- $r7 if ($r7===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a21($r7); } // free $p3 return $r1; } private function parseconfig_list($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->parsea_config_line($silence); // c <- $r4 if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = self::$FAILED; for (;;) { $r6 = $this->discardeol($silence); if ($r6!==self::$FAILED) { $r5 = true; } else { break; } } if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } // free $r6 $r6 = $this->parseconfig_list($silence); if ($r6===self::$FAILED) { $r6 = null; } // rest <- $r6 $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a22($r4, $r6); } // free $p3 return $r1; } private function parseoption_value_list($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->parsean_option_value($silence); // v <- $r4 if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $p6 = $this->currPos; // start seq_2 $p7 = $this->currPos; $r8 = $this->discardwhitespace($silence); if ($r8===self::$FAILED) { $r8 = null; } if (($this->input[$this->currPos] ?? null) === ",") { $this->currPos++; $r9 = ","; } else { if (!$silence) {$this->fail(23);} $r9 = self::$FAILED; $this->currPos = $p7; $r5 = self::$FAILED; goto seq_2; } $r10 = $this->discardwhitespace($silence); if ($r10===self::$FAILED) { $r10 = null; } $r11 = $this->parseoption_value_list($silence); // ovl <- $r11 if ($r11===self::$FAILED) { $this->currPos = $p7; $r5 = self::$FAILED; goto seq_2; } $r5 = true; seq_2: if ($r5!==self::$FAILED) { $this->savedPos = $p6; $r5 = $this->a23($r4, $r11); } else { $r5 = null; } // free $p7 // rest <- $r5 $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a24($r4, $r5); } // free $p3 return $r1; } private function parsea_config_line($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->parseoption_name($silence); // k <- $r4 if ($r4===self::$FAILED) { $r1 = self::$FAILED; goto seq_1; } $r5 = $this->parseconfig_value($silence); // v <- $r5 if ($r5===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a25($r4, $r5); } // free $p3 return $r1; } private function parsean_option_value($silence) { $p2 = $this->currPos; // start choice_1 $r3 = $this->parselink_target_value($silence); if ($r3!==self::$FAILED) { goto choice_1; } $r3 = $this->parsequoted_value($silence); if ($r3!==self::$FAILED) { goto choice_1; } $r3 = $this->parseplain_value($silence); if ($r3!==self::$FAILED) { goto choice_1; } $r3 = $this->parsejson_value($silence); choice_1: // v <- $r3 $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a26($r3); } return $r1; } private function parseconfig_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->discardwhitespace($silence); if ($r4===self::$FAILED) { $r4 = null; } if (($this->input[$this->currPos] ?? null) === "=") { $this->currPos++; $r5 = "="; } else { if (!$silence) {$this->fail(22);} $r5 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r6 = $this->discardwhitespace($silence); if ($r6===self::$FAILED) { $r6 = null; } $r7 = $this->parsevalid_json_value($silence); // v <- $r7 if ($r7===self::$FAILED) { $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a27($r7); } // free $p3 return $r1; } private function parselink_target_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "[[", $this->currPos, 2, false) === 0) { $r4 = "[["; $this->currPos += 2; } else { if (!$silence) {$this->fail(24);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { $r6 = self::charAt($this->input, $this->currPos); if ($r6 !== '' && !($r6 === "]" || $r6 === "\x0a")) { $this->currPos += strlen($r6); $r5[] = $r6; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(25);} break; } } // v <- $r5 // free $r6 if ($this->currPos >= $this->inputLength ? false : substr_compare($this->input, "]]", $this->currPos, 2, false) === 0) { $r6 = "]]"; $this->currPos += 2; } else { if (!$silence) {$this->fail(26);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a28($r5); } // free $p3 return $r1; } private function parsequoted_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->input[$this->currPos] ?? ''; if ($r4 === "\"") { $this->currPos++; } else { $r4 = self::$FAILED; if (!$silence) {$this->fail(27);} $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { // start choice_1 if (strcspn($this->input, "\\\"\x0a", $this->currPos, 1) !== 0) { $r6 = self::consumeChar($this->input, $this->currPos); goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(28);} } $p7 = $this->currPos; // start seq_2 $p8 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "\\") { $this->currPos++; $r9 = "\\"; } else { if (!$silence) {$this->fail(29);} $r9 = self::$FAILED; $r6 = self::$FAILED; goto seq_2; } $r10 = self::charAt($this->input, $this->currPos); // c <- $r10 if ($r10 !== '' && !($r10 === "\x0a")) { $this->currPos += strlen($r10); } else { $r10 = self::$FAILED; if (!$silence) {$this->fail(7);} $this->currPos = $p8; $r6 = self::$FAILED; goto seq_2; } $r6 = true; seq_2: if ($r6!==self::$FAILED) { $this->savedPos = $p7; $r6 = $this->a29($r10); } // free $p8 choice_1: if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // v <- $r5 // free $r6 $r6 = $this->input[$this->currPos] ?? ''; if ($r6 === "\"") { $this->currPos++; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(27);} $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a30($r5); } // free $p3 return $r1; } private function parseplain_value($silence) { $p2 = $this->currPos; $r3 = []; for (;;) { if (strcspn($this->input, " \x09\x0a\"'[]=,!{", $this->currPos, 1) !== 0) { $r4 = self::consumeChar($this->input, $this->currPos); $r3[] = $r4; } else { $r4 = self::$FAILED; if (!$silence) {$this->fail(30);} break; } } if (count($r3) === 0) { $r3 = self::$FAILED; } // v <- $r3 // free $r4 $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a31($r3); } return $r1; } private function parsejson_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "{") { $this->currPos++; $r4 = "{"; } else { if (!$silence) {$this->fail(31);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { // start choice_1 if (strcspn($this->input, "\"{}\x0a", $this->currPos, 1) !== 0) { $r6 = self::consumeChar($this->input, $this->currPos); goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(32);} } $r6 = $this->parsequoted_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parsejson_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parseeol($silence); choice_1: if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // v <- $r5 // free $r6 if (($this->input[$this->currPos] ?? null) === "}") { $this->currPos++; $r6 = "}"; } else { if (!$silence) {$this->fail(33);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a32($r5); } // free $p3 return $r1; } private function parsevalid_json_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $p5 = $this->currPos; // start choice_1 $r4 = $this->discardquoted_value($silence); if ($r4!==self::$FAILED) { goto choice_1; } $r4 = $this->discardplain_value($silence); if ($r4!==self::$FAILED) { goto choice_1; } $r4 = $this->discardarray_value($silence); if ($r4!==self::$FAILED) { goto choice_1; } $r4 = $this->discardjson_value($silence); choice_1: // v <- $r4 if ($r4!==self::$FAILED) { $r4 = substr($this->input, $p5, $this->currPos - $p5); } else { $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } // free $p5 $this->savedPos = $this->currPos; $r6 = $this->a33($r4); if ($r6) { $r6 = false; } else { $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a34($r4); } // free $p3 return $r1; } private function discardquoted_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; $r4 = $this->input[$this->currPos] ?? ''; if ($r4 === "\"") { $this->currPos++; } else { $r4 = self::$FAILED; if (!$silence) {$this->fail(27);} $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { // start choice_1 if (strcspn($this->input, "\\\"\x0a", $this->currPos, 1) !== 0) { $r6 = self::consumeChar($this->input, $this->currPos); goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(28);} } $p7 = $this->currPos; // start seq_2 $p8 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "\\") { $this->currPos++; $r9 = "\\"; } else { if (!$silence) {$this->fail(29);} $r9 = self::$FAILED; $r6 = self::$FAILED; goto seq_2; } $r10 = self::charAt($this->input, $this->currPos); // c <- $r10 if ($r10 !== '' && !($r10 === "\x0a")) { $this->currPos += strlen($r10); } else { $r10 = self::$FAILED; if (!$silence) {$this->fail(7);} $this->currPos = $p8; $r6 = self::$FAILED; goto seq_2; } $r6 = true; seq_2: if ($r6!==self::$FAILED) { $this->savedPos = $p7; $r6 = $this->a29($r10); } // free $p8 choice_1: if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // v <- $r5 // free $r6 $r6 = $this->input[$this->currPos] ?? ''; if ($r6 === "\"") { $this->currPos++; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(27);} $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a30($r5); } // free $p3 return $r1; } private function discardplain_value($silence) { $p2 = $this->currPos; $r3 = []; for (;;) { if (strcspn($this->input, " \x09\x0a\"'[]=,!{", $this->currPos, 1) !== 0) { $r4 = self::consumeChar($this->input, $this->currPos); $r3[] = $r4; } else { $r4 = self::$FAILED; if (!$silence) {$this->fail(30);} break; } } if (count($r3) === 0) { $r3 = self::$FAILED; } // v <- $r3 // free $r4 $r1 = $r3; if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a31($r3); } return $r1; } private function discardarray_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "[") { $this->currPos++; $r4 = "["; } else { if (!$silence) {$this->fail(34);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { // start choice_1 if (strcspn($this->input, "\"[]\x0a", $this->currPos, 1) !== 0) { $r6 = self::consumeChar($this->input, $this->currPos); goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(35);} } $r6 = $this->parsequoted_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parsearray_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parseeol($silence); choice_1: if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // v <- $r5 // free $r6 if (($this->input[$this->currPos] ?? null) === "]") { $this->currPos++; $r6 = "]"; } else { if (!$silence) {$this->fail(36);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a35($r5); } // free $p3 return $r1; } private function discardjson_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "{") { $this->currPos++; $r4 = "{"; } else { if (!$silence) {$this->fail(31);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { // start choice_1 if (strcspn($this->input, "\"{}\x0a", $this->currPos, 1) !== 0) { $r6 = self::consumeChar($this->input, $this->currPos); goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(32);} } $r6 = $this->parsequoted_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parsejson_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parseeol($silence); choice_1: if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // v <- $r5 // free $r6 if (($this->input[$this->currPos] ?? null) === "}") { $this->currPos++; $r6 = "}"; } else { if (!$silence) {$this->fail(33);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a32($r5); } // free $p3 return $r1; } private function parsearray_value($silence) { $p2 = $this->currPos; // start seq_1 $p3 = $this->currPos; if (($this->input[$this->currPos] ?? null) === "[") { $this->currPos++; $r4 = "["; } else { if (!$silence) {$this->fail(34);} $r4 = self::$FAILED; $r1 = self::$FAILED; goto seq_1; } $r5 = []; for (;;) { // start choice_1 if (strcspn($this->input, "\"[]\x0a", $this->currPos, 1) !== 0) { $r6 = self::consumeChar($this->input, $this->currPos); goto choice_1; } else { $r6 = self::$FAILED; if (!$silence) {$this->fail(35);} } $r6 = $this->parsequoted_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parsearray_value($silence); if ($r6!==self::$FAILED) { goto choice_1; } $r6 = $this->parseeol($silence); choice_1: if ($r6!==self::$FAILED) { $r5[] = $r6; } else { break; } } // v <- $r5 // free $r6 if (($this->input[$this->currPos] ?? null) === "]") { $this->currPos++; $r6 = "]"; } else { if (!$silence) {$this->fail(36);} $r6 = self::$FAILED; $this->currPos = $p3; $r1 = self::$FAILED; goto seq_1; } $r1 = true; seq_1: if ($r1!==self::$FAILED) { $this->savedPos = $p2; $r1 = $this->a35($r5); } // free $p3 return $r1; } public function parse( $input, $options = [] ) { $this->initInternal( $input, $options ); $startRule = $options['startRule'] ?? '(DEFAULT)'; $result = null; if ( !empty( $options['stream'] ) ) { switch ( $startRule ) { default: throw new \Wikimedia\WikiPEG\InternalError( "Can't stream rule $startRule." ); } } else { switch ( $startRule ) { case '(DEFAULT)': case "testfile": $result = $this->parsetestfile(false); break; default: throw new \Wikimedia\WikiPEG\InternalError( "Can't start parsing from rule $startRule." ); } } if ( $result !== self::$FAILED && $this->currPos === $this->inputLength ) { return $result; } else { if ( $result !== self::$FAILED && $this->currPos < $this->inputLength ) { $this->fail( 0 ); } throw $this->buildParseException(); } } } PK ! �D � � ParserHookProcessor.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use stdClass; use Wikimedia\Parsoid\DOM\Element; use Wikimedia\Parsoid\DOM\Node; use Wikimedia\Parsoid\Ext\DOMDataUtils; use Wikimedia\Parsoid\Ext\DOMProcessor as ExtDOMProcessor; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; use Wikimedia\Parsoid\Utils\DOMUtils; /** * See tests/parser/ParserTestParserHook.php in core. */ class ParserHookProcessor extends ExtDOMProcessor { public function staticTagPostProcessor( Node $node, stdClass $obj ): void { if ( $node instanceof Element ) { if ( DOMUtils::hasTypeOf( $node, 'mw:Extension/statictag' ) ) { $dataMw = DOMDataUtils::getDataMw( $node ); // T367616: ->attrs-> should be renamed to extAttrs if ( ( $dataMw->attrs->action ?? null ) === 'flush' ) { $node->appendChild( $node->ownerDocument->createTextNode( $obj->buf ) ); $obj->buf = ''; } else { $obj->buf .= $dataMw->body->extsrc; } } } } /** * @inheritDoc */ public function wtPostprocess( ParsoidExtensionAPI $extApi, Node $node, array $options ): void { // Pass an object since we want the data to be carried around across // nodes in the DOM. Passing an array won't work since visitDOM doesn't // use a reference on its end. Maybe we could fix that separately. DOMUtils::visitDOM( $node, [ $this, 'staticTagPostProcessor' ], (object)[ 'buf' => '' ] ); } } PK ! �&4Ώ � UnexpectedException.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Exception; class UnexpectedException extends Exception { } PK ! �R! TestFileReader.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; class TestFileReader { /** @var array File-level options and requirements for these parser tests */ public $fileOptions = []; /** @var Test[] */ public $testCases = []; /** @var Article[] */ public $articles = []; /** * @var ?string Path to known failures file, or null if does not exist * or is not readable. */ public $knownFailuresPath; /** * Read and parse a parserTest file. * @param string $testFilePath The parserTest file to read * @param ?callable(string) $warnFunc An optional function to use to * report the use of deprecated test section names * @param ?callable(string):string $normalizeFunc An optional function * to use to normalize article titles for uniqueness testing * @param ?string $knownFailuresInfix qualifier for the known failures file * (usually "standalone" to distinguish from the failures from the default * integrated test run) * @return TestFileReader */ public static function read( string $testFilePath, ?callable $warnFunc = null, ?callable $normalizeFunc = null, ?string $knownFailuresInfix = null ): TestFileReader { $info = pathinfo( $testFilePath ); $knownFailuresPath = $info['dirname'] . '/' . $info['filename'] . ( $knownFailuresInfix ? "-$knownFailuresInfix" : '' ) . '-knownFailures.json'; $reader = new self( $testFilePath, $knownFailuresPath, $warnFunc, $normalizeFunc ); return $reader; } /** * @param string $testFilePath The parserTest file to read * @param ?string $knownFailuresPath The known failures file to read * (or null, if there is no readable known failures file) * @param ?callable(string) $warnFunc An optional function to use to * report the use of deprecated test section names * @param ?callable(string):string $normalizeFunc An optional function * to use to normalize article titles for uniqueness testing */ private function __construct( string $testFilePath, ?string $knownFailuresPath, ?callable $warnFunc = null, ?callable $normalizeFunc = null ) { $this->knownFailuresPath = $knownFailuresPath && is_readable( $knownFailuresPath ) ? $knownFailuresPath : null; $parsedTests = Grammar::load( $testFilePath ); // Start off with any comments before `!! format` $rawTestItems = $parsedTests[0]; $testFormat = $parsedTests[1]; if ( $testFormat != null ) { // If `!!format` was present, existing comments applied to the // format declaration, not the first item. $rawTestItems = []; } // Add any comments after `!! format` array_splice( $rawTestItems, count( $rawTestItems ), 0, $parsedTests[2] ); if ( $parsedTests[3] == null ) { $this->fileOptions = []; } else { $this->fileOptions = $parsedTests[3]['text']; // If `!!options` was present, existing comments applied to the // file options, not the first item. $rawTestItems = []; } // Add the rest of the comments and items appearing after `!!options` array_splice( $rawTestItems, count( $rawTestItems ), 0, $parsedTests[4] ); if ( $testFormat !== null ) { if ( isset( $this->fileOptions['version'] ) ) { ( new Item( $parsedTests[3] ) )->error( 'Duplicate version specification' ); } else { $this->fileOptions['version'] = $testFormat['text']; } } $this->fileOptions['version'] ??= '1'; $knownFailures = $this->knownFailuresPath !== null ? json_decode( file_get_contents( $knownFailuresPath ), true ) : null; $testNames = []; $articleTitles = []; $lastComment = ''; foreach ( $rawTestItems as $item ) { if ( $item['type'] === 'article' ) { $art = new Article( $item, $lastComment ); $key = $normalizeFunc ? $normalizeFunc( $art->title ) : $art->title; if ( isset( $articleTitles[$key] ) ) { $art->error( 'Duplicate article', $art->title ); } $articleTitles[$key] = true; $this->articles[] = $art; $lastComment = ''; } elseif ( $item['type'] === 'test' ) { $test = new Test( $item, $knownFailures[$item['testName']] ?? [], $lastComment, $warnFunc ); if ( isset( $testNames[$test->testName] ) ) { $test->error( 'Duplicate test name', $test->testName ); } $testNames[$test->testName] = true; $this->testCases[] = $test; $lastComment = ''; } elseif ( $item['type'] === 'comment' ) { $lastComment .= $item['text']; } elseif ( $item['type'] === 'hooks' ) { foreach ( explode( "\n", $item['text'] ) as $line ) { $this->fileOptions['requirements'][] = [ 'type' => 'hook', 'name' => trim( $line ), ]; } $lastComment = ''; } elseif ( $item['type'] === 'functionhooks' ) { foreach ( explode( "\n", $item['text'] ) as $line ) { $this->fileOptions['requirements'][] = [ 'type' => 'functionHook', 'name' => trim( $line ), ]; } $lastComment = ''; } elseif ( $item['type'] === 'line' ) { if ( !empty( trim( $item['text'] ) ) ) { ( new Item( $item ) )->error( 'Invalid line', $item['text'] ); } } else { ( new Item( $item ) )->error( 'Unknown item type', $item['type'] ); } } // Convenience function to expand 'requirements' if ( isset( $this->fileOptions['requirements'] ) ) { if ( !is_array( $this->fileOptions['requirements'] ) ) { $this->fileOptions['requirements'] = [ $this->fileOptions['requirements'] ]; } foreach ( $this->fileOptions['requirements'] as &$item ) { if ( is_string( $item ) ) { $item = [ 'type' => 'hook', 'name' => "$item", ]; } } unset( $item ); } } } PK ! �e護 � Stats.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; class Stats { /** @var float */ public $startTime; /** @var Stats[] */ public $modes; /** @var int */ public $passedTests = 0; /** @var int */ public $passedTestsUnexpected = 0; /** @var int */ public $failedTests = 0; /** @var int */ public $failedTestsUnexpected = 0; /** @var int */ public $loggedErrorCount = 0; /** @var int */ public $failures = 0; /** @var array Array of elements representing test failures */ public $failList; /** @var string result */ public $result; public function __construct() { $this->startTime = microtime( true ); } public function allFailures(): int { $this->failures = $this->passedTestsUnexpected + $this->failedTestsUnexpected + $this->loggedErrorCount; return $this->failures; } public function accum( Stats $other ): void { $this->passedTests += $other->passedTests; $this->passedTestsUnexpected += $other->passedTestsUnexpected; $this->failedTests += $other->failedTests; $this->failedTestsUnexpected += $other->failedTestsUnexpected; $this->loggedErrorCount += $other->loggedErrorCount; $this->failures += $other->failures; } } PK ! =�dr dr MockApiHelper.phpnu �Iw�� <?php declare( strict_types = 1 ); // phpcs:disable Generic.Files.LineLength.TooLong namespace Wikimedia\Parsoid\ParserTests; use Error; use Wikimedia\Parsoid\Config\Api\ApiHelper; /** * This class supports the implementation of Parser Tests in a standalone mode * and without network access. * * In standalone mode, the config and data transformations needed by Parsoid * cannot come from MediaWiki's database or its core classes. * * Without network access, we cannot fetch site configs or do data transformations * on a remote wiki. This class supports this by intercepting network requests * and returning mock responses based on cached site configs, hardcoded network * responses and config, * * So, this API helper should be used with the Parsoid\Config\Api* set of config classes * (and any subclasses derived from them). * * A lot of the responses here are tuned to what ParserTests needed. But, presumably * this can be used by PHP Unit tests as long as the specific set of mocked responses * satisfies the needs of those tests. Ideally, this class should NOT be updated for * anything but the needs of running parser tests. * * Alternatively, PHP Unit tests could bypass the Api* classes altogether and use * a (sub)set of mocked classes (Env, SiteConfig, PageConfig, DataAccess) if those * classes and the data they provide satisfies the needs of those tests. */ class MockApiHelper extends ApiHelper { // configuration to match PHP parserTests private const IMAGE_BASE_URL = 'http://example.com/images'; private const IMAGE_DESC_URL = self::IMAGE_BASE_URL; private const FILE_PROPS = [ 'Foobar.jpg' => [ 'size' => 7881, 'width' => 1941, 'height' => 220, 'bits' => 8, 'mime' => 'image/jpeg' ], 'Thumb.png' => [ 'size' => 22589, 'width' => 135, 'height' => 135, 'bits' => 8, 'mime' => 'image/png' ], 'Foobar.svg' => [ 'size' => 12345, 'width' => 240, 'height' => 180, 'bits' => 24, 'mime' => 'image/svg+xml' ], 'Bad.jpg' => [ 'size' => 12345, 'width' => 320, 'height' => 240, 'bits' => 24, 'mime' => 'image/jpeg', ], 'LoremIpsum.djvu' => [ 'size' => 3249, 'width' => 2480, 'height' => 3508, 'bits' => 8, 'mime' => 'image/vnd.djvu', 'mediatype' => 'OFFICE', 'pagecount' => 5, ], 'Video.ogv' => [ 'size' => 12345, 'width' => 320, 'height' => 240, 'bits' => 0, # duration comes from # TimedMediaHandler/tests/phpunit/mocks/MockOggHandler::getLength() 'duration' => 4.3666666666667, 'mime' => 'video/ogg; codecs="theora"', 'mediatype' => 'VIDEO', # hacky way to get seek parameters to return the correct info 'extraParams' => [ 'seek=1.2' => 'seek%3D1.2', 'seek=85' => 'seek%3D3.3666666666667', # hard limited by duration ], ], 'Transcode.webm' => [ 'size' => 12345, 'width' => 492, 'height' => 360, 'bits' => 0, 'duration' => 4, 'mime' => 'video/webm; codecs="vp8, vorbis"', 'mediatype' => 'VIDEO', 'derivatives' => [ [ 'type' => 'video/webm; codecs="vp9, opus"', 'transcodekey' => '240p.vp9.webm', 'width' => 328, 'height' => 240, ], ], ], 'Audio.oga' => [ 'size' => 12345, 'width' => 0, 'height' => 0, 'bits' => 0, # duration comes from # TimedMediaHandler/tests/phpunit/mocks/MockOggHandler::getLength() 'duration' => 0.99875, 'mime' => 'audio/ogg; codecs="vorbis"', 'mediatype' => 'AUDIO', ], 'Hi-ho.jpg' => [ 'size' => 7881, 'width' => 1941, 'height' => 220, 'bits' => 8, 'mime' => 'image/jpeg' ], ]; private $articleCache = []; private $cachedConfigs = []; private static $MAIN_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 1, 'ns' => 0, 'title' => 'Main Page', 'revisions' => [ [ 'revid' => 1, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => "<strong>MediaWiki has been successfully installed.</strong>\n\nConsult the [//meta.wikimedia.org/wiki/Help:Contents User's Guide] for information on using the wiki software.\n\n== Getting started ==\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Manual:Configuration_settings Configuration settings list]\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Manual:FAQ MediaWiki FAQ]\n* [https://lists.wikimedia.org/mailman/listinfo/mediawiki-announce MediaWiki release mailing list]\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Localisation#Translation_resources Localise MediaWiki for your language]" ] ] ] ] ] ] ] ]; // Old response structure, pre-mcr private static $OLD_RESPONSE = [ 'query' => [ 'pages' => [ [ 'pageid' => 999, 'ns' => 0, 'title' => 'Old Response', 'revisions' => [ [ 'revid' => 999, 'parentid' => 0, 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', '*' => "<strong>MediaWiki was successfully installed.</strong>\n\nConsult the [//meta.wikimedia.org/wiki/Help:Contents User's Guide] for information on using the wiki software.\n\n== Getting started ==\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Manual:Configuration_settings Configuration settings list]\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Manual:FAQ MediaWiki FAQ]\n* [https://lists.wikimedia.org/mailman/listinfo/mediawiki-announce MediaWiki release mailing list]\n* [//www.mediawiki.org/wiki/Special:MyLanguage/Localisation#Translation_resources Localise MediaWiki for your language]" ] ] ] ] ] ]; private static $JUNK_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 2, 'ns' => 0, 'title' => 'Junk Page', 'revisions' => [ [ 'revid' => 2, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => '2. This is just some junk. See the comment above.' ] ] ] ] ] ] ] ]; private static $LARGE_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 3, 'ns' => 0, 'title' => 'Large_Page', 'revisions' => [ [ 'revid' => 3, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', /* content will be set separately */ ] ] ] ] ] ] ] ]; private static $REUSE_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 100, 'ns' => 0, 'title' => 'Reuse_Page', 'revisions' => [ [ 'revid' => 100, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => '{{colours of the rainbow}}' ] ] ] ] ] ] ] ]; private static $JSON_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 101, 'ns' => 0, 'title' => 'JSON_Page', 'revisions' => [ [ 'revid' => 101, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'json', 'contentformat' => 'text/json', 'content' => '[1]' ] ] ] ] ] ] ] ]; private static $LINT_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 102, 'ns' => 0, 'title' => 'Lint Page', 'revisions' => [ [ 'revid' => 102, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => "{|\nhi\n|ho\n|}" ] ] ] ] ] ] ] ]; private static $REDLINKS_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 103, 'ns' => 0, 'title' => 'Redlinks Page', 'revisions' => [ [ 'revid' => 103, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => '[[Special:Version]] [[Doesnotexist]] [[Redirected]]' ] ] ] ] ] ] ] ]; private static $VARIANT_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 104, 'ns' => 0, 'pagelanguage' => 'sr', 'pagelanguagedir' => 'ltr', 'title' => 'Variant Page', 'revisions' => [ [ 'revid' => 104, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => "абвг abcd" ] ] ] ] ] ] ] ]; private static $NOVARIANT_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 105, 'ns' => 0, 'pagelanguage' => 'sr', 'pagelanguagedir' => 'ltr', 'title' => 'No Variant Page', 'revisions' => [ [ 'revid' => 105, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => "абвг abcd\n__NOCONTENTCONVERT__" ] ] ] ] ] ] ] ]; private static $REVISION_PAGE = [ 'query' => [ 'pages' => [ [ 'pageid' => 63, 'ns' => 0, 'title' => 'Revision ID', 'revisions' => [ [ 'revid' => 63, 'parentid' => 0, 'slots' => [ 'main' => [ 'contentmodel' => 'wikitext', 'contentformat' => 'text/x-wiki', 'content' => '{{REVISIONID}}' ] ] ] ] ] ] ] ]; private static $missingTitles = [ 'Doesnotexist' ]; private static $specialTitles = [ 'Special:Version', 'Special:BookSources', 'Special:BookSources/isbn=4-00-026157-6', 'Special:BookSources/0978739256', ]; private static $redirectTitles = [ 'Redirected' ]; private static $disambigTitles = [ 'Disambiguation' ]; private const FNAMES = [ 'Image:Foobar.jpg' => 'Foobar.jpg', 'Datei:Foobar.jpg' => 'Foobar.jpg', 'File:Foobar.jpg' => 'Foobar.jpg', 'Archivo:Foobar.jpg' => 'Foobar.jpg', 'Mynd:Foobar.jpg' => 'Foobar.jpg', "Датотека:Foobar.jpg" => 'Foobar.jpg', 'Dosiero:Foobar.jpg' => 'Foobar.jpg', 'Image:Foobar.svg' => 'Foobar.svg', 'File:Foobar.svg' => 'Foobar.svg', 'Файл:Foobar.svg' => 'Foobar.svg', 'Datei:Foobar.svg' => 'Foobar.svg', 'Image:Thumb.png' => 'Thumb.png', 'File:Thumb.png' => 'Thumb.png', 'File:LoremIpsum.djvu' => 'LoremIpsum.djvu', 'File:Video.ogv' => 'Video.ogv', 'File:Transcode.webm' => 'Transcode.webm', 'File:Audio.oga' => 'Audio.oga', 'File:Bad.jpg' => 'Bad.jpg', 'File:Hi-ho.jpg' => 'Hi-ho.jpg', ]; private const PNAMES = [ 'Image:Foobar.jpg' => 'File:Foobar.jpg', 'Image:Foobar.svg' => 'File:Foobar.svg', 'Image:Thumb.png' => 'File:Thumb.png' ]; // FIXME: Get this info from pagelanguage of a revision for these pages private const PAGELANGS = [ 'Rupage' => 'ru', 'Depage' => 'de', ]; // File is present in these langs private const FILELANGS = [ 'Foobar.svg' => [ 'en', 'ru' ], ]; // This templatedata description only provides a subset of fields // that mediawiki API returns. Parsoid only uses the format and // paramOrder fields at this point, so keeping these lean. private static $templateData = [ 'Template:NoFormatWithParamOrder' => [ 'paramOrder' => [ 'f0', 'f1', 'unused2', 'f2', 'unused3' ] ], 'Template:InlineTplNoParamOrder' => [ 'format' => 'inline' ], 'Template:BlockTplNoParamOrder' => [ 'format' => 'block' ], 'Template:InlineTplWithParamOrder' => [ 'format' => 'inline', 'paramOrder' => [ 'f1', 'f2' ] ], 'Template:BlockTplWithParamOrder' => [ 'format' => 'block', 'paramOrder' => [ 'f1', 'f2' ] ], 'Template:WithParamOrderAndAliases' => [ 'params' => [ 'f1' => [ 'aliases' => [ 'f4', 'f3' ] ] ], 'paramOrder' => [ 'f1', 'f2' ] ], 'Template:InlineFormattedTpl_1' => [ 'format' => '{{_|_=_}}' ], 'Template:InlineFormattedTpl_2' => [ 'format' => "\n{{_ | _ = _}}" ], 'Template:InlineFormattedTpl_3' => [ 'format' => '{{_| _____ = _}}' ], 'Template:BlockFormattedTpl_1' => [ 'format' => "{{_\n| _ = _\n}}" ], 'Template:BlockFormattedTpl_2' => [ 'format' => "\n{{_\n| _ = _\n}}\n" ], 'Template:BlockFormattedTpl_3' => [ 'format' => "{{_|\n _____ = _}}" ] ]; /** @var string wiki prefix for which we are mocking the api access */ private $prefix = 'enwiki'; /** @var callable(string):string A helper to normalize titles. */ private $normalizeTitle = null; public function __construct( ?string $prefix = null, ?callable $normalizeTitleFunc = null ) { $this->prefix = $prefix ?? $this->prefix; $this->normalizeTitle = $normalizeTitleFunc ?? // poor man's normalization ( fn ( $t ) => str_replace( ' ', '_', $t ) ); // PORT-FIXME: Need to get this value // $wtSizeLimit = $parsoidOptions->limits->wt2html->maxWikitextSize; $wtSizeLimit = 1000000; $mainSlot = &self::$LARGE_PAGE['query']['pages'][0]['revisions'][0]['slots']['main']; $mainSlot['content'] = str_repeat( 'a', $wtSizeLimit + 1 ); } /** * Update prefix * @param string $prefix */ public function setApiPrefix( string $prefix ): void { $this->prefix = $prefix; } /** * Register an article defined in parsertests so that we can return * the proper known/missing information about that title. * @param string $key The normalized title of the article * @param Article $article The contents of the article * @return callable */ public function addArticle( string $key, Article $article ): callable { $oldVal = $this->articleCache[$key] ?? null; $this->articleCache[$key] = $article; return function () use ( $key, $oldVal ) { $this->articleCache[$key] = $oldVal; }; } public function makeRequest( array $params ): array { switch ( $params['action'] ?? null ) { case 'query': return $this->processQuery( $params ); case 'parse': return $this->parse( $params['text'], !empty( $params['onlypst'] ) ); case 'templatedata': return $this->fetchTemplateData( $params ); case 'expandtemplates': $ret = $this->preProcess( $params['titles'] ?? $params['title'], $params['text'], $params['revid'] ?? null ); if ( $ret ) { $ret += [ 'categories' => [], 'modules' => [], 'modulestyles' => [] ]; } return $ret; default: return []; // FIXME: Maybe some error } } /** * Image scaling computation helper. * * Linker.php in core calls File::transform(...) for each dimension (1x, * 1.5x, 2x) which then scales the image dimensions, using round/ceil/floor * as appropriate to yield integer dimensions. Note that the results * may be unintuitive due to the conversion to integer: eg, a 442px width * image may become 883px in 2x mode. Resist the temptation to "optimize" * this by computing the transformed size once and then scaling that; * always scale the input dimensions instead. * @see ImageHandler::normaliseParams, MediaHandler::fitBoxWidth, * File::scaleHeight, etc, in core. * * Either $twidth or $theight or both will be set when called; both * will be set when this function returns. * * @param int $width Original image width * @param int $height Original image height * @param int|float|null &$twidth Thumbnail width (inout parameter) * @param int|float|null &$theight Thumbnail height (inout parameter) */ public static function transformHelper( $width, $height, &$twidth, &$theight ) { if ( $theight === null ) { // File::scaleHeight in PHP $theight = round( $height * $twidth / $width ); } elseif ( $twidth === null || // Match checks in ImageHandler.php::normaliseParams in core ( $twidth * $height > $theight * $width ) ) { // MediaHandler::fitBoxWidth in PHP // This is crazy! $idealWidth = $width * $theight / $height; $roundedUp = ceil( $idealWidth ); if ( round( $roundedUp * $height / $width ) > $theight ) { $twidth = floor( $idealWidth ); } else { $twidth = $roundedUp; } } else { if ( round( $height * $twidth / $width ) > $theight ) { $twidth = ceil( $width * $theight / $height ); } else { $theight = round( $height * $twidth / $width ); } } } /** * @param string $filename * @param ?int $twidth * @param ?int $theight * @param ?string $extraParam optional iiurlparam, used for video/pdf/etc * @param ?string $contexttitle optional iibadfilecontexttitle * @return ?array */ private function imageInfo( string $filename, ?int $twidth, ?int $theight, ?string $extraParam, ?string $contexttitle ): ?array { $normPageName = self::PNAMES[$filename] ?? $filename; $normFileName = self::FNAMES[$filename] ?? $filename; $props = self::FILE_PROPS[$normFileName] ?? null; if ( $props === null ) { // We don't have info for this file return null; } $md5 = md5( $normFileName ); $md5prefix = $md5[0] . '/' . $md5[0] . $md5[1] . '/'; $baseurl = self::IMAGE_BASE_URL . '/' . $md5prefix . $normFileName; $height = $props['height']; $width = $props['width']; $turl = self::IMAGE_BASE_URL . '/thumb/' . $md5prefix . $normFileName; $durl = self::IMAGE_DESC_URL . '/' . $normFileName; $mediatype = $props['mediatype'] ?? ( $props['mime'] === 'image/svg+xml' ? 'DRAWING' : 'BITMAP' ); $info = [ 'size' => $props['size'], 'height' => $height, 'width' => $width, 'url' => $baseurl, 'descriptionurl' => $durl, 'mediatype' => $mediatype, 'mime' => $props['mime'] ]; if ( isset( $props['duration'] ) ) { $info['duration'] = $props['duration']; } if ( isset( $props['pagecount'] ) ) { $info['pagecount'] = $props['pagecount']; } if ( ( $mediatype === 'VIDEO' || $mediatype === 'DRAWING' ) && !$twidth && !$theight ) { $twidth = $width; $theight = $height; } preg_match( '/^lang([a-z]+(?:-[a-z]+)*)-(\d+)px$/i', $extraParam ?? '', $matches ); $lang = $matches[1] ?? null; $pagelang = self::PAGELANGS[$contexttitle] ?? 'en'; $filelangs = self::FILELANGS[$normFileName] ?? [ 'en' ]; // Set $lang based on the targetlang, if the file is present in that lang if ( $lang === null && $mediatype === 'DRAWING' && $pagelang !== 'en' && in_array( $pagelang, $filelangs, true ) ) { $lang = $pagelang; $extraParam = "lang{$lang}-{$twidth}px"; } if ( $theight || $twidth ) { // Save $twidth and $theight $origThumbHeight = $theight; $origThumbWidth = $twidth; // Set $twidth and $theight self::transformHelper( $width, $height, $twidth, $theight ); $urlWidth = $twidth; if ( $twidth > $width ) { // The PHP api won't enlarge a bitmap ... but the batch api will. // But, to match the PHP sections, don't scale. if ( $mediatype !== 'DRAWING' ) { $urlWidth = $width; } } $thumbBaseUrl = $turl; $page = null; if ( $urlWidth !== $width || $mediatype === 'AUDIO' || $mediatype === 'VIDEO' || $mediatype === 'OFFICE' || $mediatype === 'DRAWING' ) { $turl .= '/'; if ( preg_match( '/^page(\d+)-(\d+)px$/', $extraParam ?? '', $matches ) ) { $turl .= $extraParam; $page = (int)$matches[1]; } elseif ( $mediatype === 'OFFICE' ) { $turl .= 'page1-' . $urlWidth . 'px'; $page = 1; } elseif ( $lang !== null ) { // Explicit English just gets the default path if ( $lang === 'en' ) { $turl .= $urlWidth . 'px'; $lang = null; } else { $turl .= $extraParam; } } else { $turl .= $urlWidth . 'px'; } $turl .= '-'; if ( $mediatype === 'VIDEO' ) { // Hack in a 'seek' option, if provided (T258767) if ( str_starts_with( $extraParam ?? '', 'seek' ) ) { $turl .= $props['extraParams'][$extraParam] ?? ''; } $turl .= '-'; } $turl .= $normFileName; switch ( $mediatype ) { case 'AUDIO': // No thumbs are generated for audio $turl = self::IMAGE_BASE_URL . '/w/resources/assets/file-type-icons/fileicon-ogg.png'; break; case 'VIDEO': case 'OFFICE': $turl .= '.jpg'; break; case 'DRAWING': $turl .= '.png'; break; } } else { $turl = $baseurl; } $info['thumbwidth'] = $twidth; $info['thumbheight'] = $theight; $info['thumburl'] = $turl; // src set info; added to core API result as part of T226683 // See Linker.php::processResponsiveImages() in core foreach ( [ 1.5, 2 ] as $scale ) { $stwidth = $stheight = null; if ( $origThumbWidth !== null ) { $stwidth = round( $origThumbWidth * $scale ); } if ( $origThumbHeight !== null ) { $stheight = round( $origThumbHeight * $scale ); } self::transformHelper( $width, $height, $stwidth, $stheight ); $turl = $baseurl; if ( $stwidth < $width || $mediatype === 'DRAWING' || $mediatype === 'OFFICE' ) { $turl = $thumbBaseUrl . '/'; if ( $page !== null ) { $turl .= "page{$page}-"; } if ( $lang !== null ) { $turl .= "lang{$lang}-"; } $turl .= $stwidth . 'px-' . $normFileName; if ( $mediatype === 'VIDEO' || $mediatype === 'OFFICE' ) { $turl .= '.jpg'; } elseif ( $mediatype === 'DRAWING' ) { $turl .= '.png'; } } if ( $info['thumburl'] !== $turl && $mediatype !== 'AUDIO' ) { $info['responsiveUrls']["$scale"] = $turl; } } } if ( isset( $props['derivatives'] ) ) { $info['derivatives'] = [ [ 'src' => $info['url'], 'type' => $info['mime'], 'width' => strval( $info['width'] ), 'height' => strval( $info['height'] ), ], ]; foreach ( $props['derivatives'] as $derivative ) { $info['derivatives'][] = [ 'src' => self::IMAGE_BASE_URL . '/transcoded/' . $md5prefix . $normFileName . '/' . $normFileName . '.' . $derivative['transcodekey'], 'type' => $derivative['type'], 'transcodekey' => $derivative['transcodekey'], 'width' => strval( $derivative['width'] ), 'height' => strval( $derivative['height'] ), ]; } } return [ 'result' => $info, 'normPageName' => $normPageName ]; } private const TRACKING_CATEGORIES = [ 'broken-file-category' => 'Pages with broken file links', 'magiclink-tracking-rfc' => 'Pages using RFC magic links', 'magiclink-tracking-isbn' => 'Pages using ISBN magic links', 'magiclink-tracking-pmid' => 'Pages using PMID magic links', ]; private function processQuery( array $params ): array { if ( ( $params['meta'] ?? null ) === 'siteinfo' ) { if ( !isset( $this->cachedConfigs[$this->prefix] ) ) { $this->cachedConfigs[$this->prefix] = json_decode( file_get_contents( __DIR__ . "/../../baseconfig/$this->prefix.json" ), true ); } return $this->cachedConfigs[$this->prefix]; } if ( ( $params['meta'] ?? null ) === 'allmessages' ) { $allmessages = []; if ( isset( self::TRACKING_CATEGORIES[$params['ammessages']] ) ) { $allmessages[] = [ 'content' => self::TRACKING_CATEGORIES[$params['ammessages']] ]; } else { $allmessages[] = [ 'missing' => true ]; } return [ 'query' => [ 'allmessages' => $allmessages ] ]; } $revid = $params['revids'] ?? null; if ( ( $params['prop'] ?? null ) === 'revisions' ) { if ( $revid === '1' || $params['titles'] === 'Main_Page' ) { return self::$MAIN_PAGE; } elseif ( $revid === '2' || $params['titles'] === 'Junk_Page' ) { return self::$JUNK_PAGE; } elseif ( $revid === '3' || $params['titles'] === 'Large_Page' ) { return self::$LARGE_PAGE; } elseif ( $revid === '63' || $params['titles'] === 'Revision_ID' ) { return self::$REVISION_PAGE; } elseif ( $revid === '100' || $params['titles'] === 'Reuse_Page' ) { return self::$REUSE_PAGE; } elseif ( $revid === '101' || $params['titles'] === 'JSON_Page' ) { return self::$JSON_PAGE; } elseif ( $revid === '102' || $params['titles'] === 'Lint_Page' ) { return self::$LINT_PAGE; } elseif ( $revid === '103' || $params['titles'] === 'Redlinks_Page' ) { return self::$REDLINKS_PAGE; } elseif ( $revid === '104' || $params['titles'] === 'Variant_Page' ) { return self::$VARIANT_PAGE; } elseif ( $revid === '105' || $params['titles'] === 'No_Variant_Page' ) { return self::$NOVARIANT_PAGE; } elseif ( $revid === '999' || $params['titles'] === 'Old_Response' ) { return self::$OLD_RESPONSE; } else { return [ 'query' => [ 'pages' => [ [ 'ns' => 6, 'title' => json_encode( $params['titles'] ), 'missing' => true, 'imagerepository' => true ] ] ] ]; } } if ( ( $params['prop'] ?? null ) === 'info' ) { $ret = []; $titles = preg_split( '/\|/', $params['titles'] ); foreach ( $titles as $t ) { $props = [ 'title' => $t ]; $normalizeTitle = $this->normalizeTitle; $key = $normalizeTitle( $t ); $definedInPt = isset( $this->articleCache[$key] ); if ( in_array( $t, self::$missingTitles, true ) || !$definedInPt ) { $props['missing'] = true; } if ( in_array( $t, self::$specialTitles, true ) ) { $props['special'] = true; $props['missing'] = false; } if ( in_array( $t, self::$redirectTitles, true ) ) { $props['redirect'] = true; $props['missing'] = false; } if ( in_array( $t, self::$disambigTitles, true ) ) { $props['linkclasses'] = [ 'mw-disambig' ]; $props['missing'] = false; } $ret[] = $props; } return [ 'query' => [ 'pages' => $ret ] ]; } if ( ( $params['prop'] ?? null ) === 'imageinfo' ) { $response = [ 'query' => [] ]; $filename = $params['titles']; // assumes this is a single file $tonum = static function ( $x ) { return $x ? (int)$x : null; }; $ii = self::imageInfo( $filename, isset( $params['iiurlwidth'] ) ? $tonum( $params['iiurlwidth'] ) : null, isset( $params['iiurlheight'] ) ? $tonum( $params['iiurlheight'] ) : null, $params['iiurlparam'] ?? null, $params['iibadfilecontexttitle'] ?? null ); if ( $ii === null ) { $p = [ 'ns' => 6, 'title' => $filename, 'imagerepository' => true, 'imageinfo' => [ [ 'size' => 0, 'width' => 0, 'height' => 0, 'filemissing' => true, 'mime' => null, 'mediatype' => null ] ] ]; $p['missing'] = $p['imageinfo']['filemissing'] = true; $p['badfile'] = false; } else { if ( $filename !== $ii['normPageName'] ) { $response['query']['normalized'] = [ [ 'from' => $filename, 'to' => $ii['normPageName'] ] ]; } $p = [ 'pageid' => 1, 'ns' => 6, 'title' => $ii['normPageName'], 'imageinfo' => [ $ii['result'] ] ]; $p['badfile'] = ( $filename === 'File:Bad.jpg' ); } $response['query']['pages'] = [ $p ]; return $response; } return [ "error" => new Error( 'Uh oh!' ) ]; } private function parse( string $text, bool $onlypst ): array { // We're performing a subst if ( $onlypst ) { return [ 'text' => preg_replace( '/\{\{subst:1x\|([^}]+)\}\}/', '$1', $text, 1 ) ]; } $res = null; // Render to html the contents of known extension tags // These are the only known extensions (besides native extensions) // used in parser tests currently. This would need to be updated // as more templates are added OR we need to rely on true parsing. preg_match( '#<([A-Za-z][^\t\n\v />\0]*)#', $text, $match ); switch ( $match[1] ?? '' ) { // FIXME: this isn't really used by the mocha tests // since some mocha tests hit the production db, but // when we fix that, they should go through this. case 'templatestyles': $res = "<style data-mw-deduplicate='TemplateStyles:r123456'>small { font-size: 120% } big { font-size: 80% }</style>"; // Silliness break; case 'translate': $res = $text; break; case 'indicator': case 'section': $res = ""; break; default: throw new Error( 'Unhandled extension type encountered in: ' . $text ); } $parse = [ 'text' => $res, 'categories' => [], 'modules' => [], 'modulestyles' => [] ]; return [ 'parse' => $parse ]; } private function preProcess( string $title, string $text, ?int $revid ): ?array { // These are the only known templates in current parser tests. // This would need to be updated as more templates are added OR we need // to rely on true (instead of mock) preprocessing. preg_match( '/{{1x\|(.*?)}}/', $text, $match ); if ( $match ) { return [ 'wikitext' => $match[1] ]; } elseif ( $text === '{{colours of the rainbow}}' ) { return [ 'wikitext' => 'purple' ]; } elseif ( $text === '{{REVISIONID}}' ) { return [ 'wikitext' => (string)$revid ]; } else { error_log( "UNKNOWN TEMPLATE: $text for $title\n" ); return null; } } private function fetchTemplateData( array $params ): array { return [ // Assumes that titles is a single title // (which is how Parsoid uses this) 'pages' => [ '1' => self::$templateData[$params['titles'] ?? ''] ?? [] ] ]; } } PK ! $S\K� � ParserHook.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Closure; use Error; use Wikimedia\Parsoid\DOM\DocumentFragment; use Wikimedia\Parsoid\DOM\Element; use Wikimedia\Parsoid\Ext\ExtensionModule; use Wikimedia\Parsoid\Ext\ExtensionTagHandler; use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; use Wikimedia\Parsoid\Utils\DOMCompat; use Wikimedia\Parsoid\Utils\DOMDataUtils; use Wikimedia\Parsoid\Utils\WTUtils; /** * See tests/parser/ParserTestParserHook.php in core. */ class ParserHook extends ExtensionTagHandler implements ExtensionModule { /** @inheritDoc */ public function sourceToDom( ParsoidExtensionAPI $extApi, string $content, array $args ): DocumentFragment { $extName = $extApi->extTag->getName(); if ( $extApi->extTag->isSelfClosed() ) { $content = null; } switch ( $extName ) { case 'tag': case 'tåg': return $extApi->htmlToDom( "<pre>\n" . var_export( $content, true ) . "\n" . var_export( $extApi->extArgsToArray( $args ), true ) . "\n" . "</pre>" ); case 'statictag': // FIXME: Choose a better DOM representation that doesn't mess with // newline constraints. return $extApi->htmlToDom( '<span />' ); case 'asidetag': // T278565 return $extApi->htmlToDom( '<aside>Some aside content</aside>' ); case 'pwraptest': return $extApi->htmlToDom( '<!--CMT--><style>p{}</style>' ); case 'spantag': // "Transparent" tag which wraps wikitext in a <span>; // useful in testing various parsoid wrapping scenarios // (we used to use <ref> for this) // NOTE: This tag disables p-wrapping and indent-pre transforms. return $extApi->extTagToDOM( $args, $content, [ 'wrapperTag' => 'span', 'parseOpts' => [ 'extTag' => $extName, 'context' => 'inline', ], ] ); default: throw new Error( "Unexpected tag name: $extName in ParserHook" ); } } /** @inheritDoc */ public function processAttributeEmbeddedHTML( ParsoidExtensionAPI $extApi, Element $elt, Closure $proc ): void { $dataMw = DOMDataUtils::getDataMw( $elt ); if ( isset( $dataMw->body->html ) ) { $dataMw->body->html = $proc( $dataMw->body->html ); } } /** @inheritDoc */ public function domToWikitext( ParsoidExtensionAPI $extApi, Element $node, bool $wrapperUnmodified ) { $dataMw = DOMDataUtils::getDataMw( $node ); $extName = WTUtils::getExtTagName( $node ) ?? $dataMw->name; if ( $extName !== 'spantag' ) { return false; // use default serialization } $html2wtOpts = [ 'extName' => $extName, // FIXME: One-off PHP parser state leak. This needs a better solution. 'inPHPBlock' => true ]; $src = ''; if ( $wrapperUnmodified && isset( $dataMw->body->extsrc ) ) { $src = $dataMw->body->extsrc; } elseif ( isset( $dataMw->body->html ) ) { // First look for the extension's content in data-mw.body.html $src = $extApi->htmlToWikitext( $html2wtOpts, $dataMw->body->html ); } else { $src = $extApi->htmlToWikitext( $html2wtOpts, DOMCompat::getInnerHTML( $node ) ); } return "<$extName>" . $src . "</$extName>"; } /** @inheritDoc */ public function getConfig(): array { return [ 'name' => 'ParserHook', 'tags' => [ [ 'name' => 'tag', 'handler' => self::class ], [ 'name' => 'tåg', 'handler' => self::class ], [ 'name' => 'statictag', 'handler' => self::class ], [ 'name' => 'asidetag', 'handler' => self::class ], [ 'name' => 'pwraptest', 'handler' => self::class ], [ 'name' => 'spantag', 'handler' => self::class, 'options' => [ 'wt2html' => [ 'embedsHTMLInAttributes' => true, ], 'outputHasCoreMwDomSpecMarkup' => true, ], ], ], 'domProcessors' => [ ParserHookProcessor::class ] ]; } } PK ! �(�� � DummyAnnotation.phpnu �Iw�� <?php declare( strict_types = 1 ); namespace Wikimedia\Parsoid\ParserTests; use Wikimedia\Parsoid\Ext\ExtensionModule; use Wikimedia\Parsoid\Ext\ExtensionTagHandler; /** * Dummy annotation to test the annotation mechanisms outside of any extension-specific * considerations. */ class DummyAnnotation extends ExtensionTagHandler implements ExtensionModule { /** @inheritDoc */ public function getConfig(): array { return [ 'name' => 'DummyAnnotation', // If these are not the same length as "translate" and "tvar" // respectively, it requires adjusting wtOffsets in the (large) test file. 'annotations' => [ 'dummyanno', 'ann2' ] ]; } } PK ! ���C"