Файловый менеджер - Редактировать - /var/www/html/mediawiki-1.43.1/extensions/AbuseFilter/includes/Parser/FilterEvaluator.php
Ðазад
<?php namespace MediaWiki\Extension\AbuseFilter\Parser; use Exception; use InvalidArgumentException; use MediaWiki\Extension\AbuseFilter\KeywordsManager; use MediaWiki\Extension\AbuseFilter\Parser\Exception\ConditionLimitException; use MediaWiki\Extension\AbuseFilter\Parser\Exception\ExceptionBase; use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException; use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException; use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleWarning; use MediaWiki\Extension\AbuseFilter\Variables\VariableHolder; use MediaWiki\Extension\AbuseFilter\Variables\VariablesManager; use MediaWiki\Language\Language; use MediaWiki\Parser\Sanitizer; use Psr\Log\LoggerInterface; use Wikimedia\Equivset\Equivset; use Wikimedia\IPUtils; use Wikimedia\ObjectCache\BagOStuff; use Wikimedia\Stats\IBufferingStatsdDataFactory; /** * This class evaluates an AST generated by the filter parser. * * @todo Override checkSyntax and make it only try to build the AST. That would mean faster results, * and no need to mess with DUNDEFINED and the like. However, we must first try to reduce the * amount of runtime-only exceptions, and try to detect them in the AFPTreeParser instead. * Otherwise, people may be able to save a broken filter without the syntax check reporting that. */ class FilterEvaluator { private const CACHE_VERSION = 1; public const FUNCTIONS = [ 'lcase' => 'funcLc', 'ucase' => 'funcUc', 'length' => 'funcLen', 'string' => 'castString', 'int' => 'castInt', 'float' => 'castFloat', 'bool' => 'castBool', 'norm' => 'funcNorm', 'ccnorm' => 'funcCCNorm', 'ccnorm_contains_any' => 'funcCCNormContainsAny', 'ccnorm_contains_all' => 'funcCCNormContainsAll', 'specialratio' => 'funcSpecialRatio', 'rmspecials' => 'funcRMSpecials', 'rmdoubles' => 'funcRMDoubles', 'rmwhitespace' => 'funcRMWhitespace', 'count' => 'funcCount', 'rcount' => 'funcRCount', 'get_matches' => 'funcGetMatches', 'ip_in_range' => 'funcIPInRange', 'ip_in_ranges' => 'funcIPInRanges', 'contains_any' => 'funcContainsAny', 'contains_all' => 'funcContainsAll', 'equals_to_any' => 'funcEqualsToAny', 'substr' => 'funcSubstr', 'strlen' => 'funcLen', 'strpos' => 'funcStrPos', 'str_replace' => 'funcStrReplace', 'str_replace_regexp' => 'funcStrReplaceRegexp', 'rescape' => 'funcStrRegexEscape', 'set' => 'funcSetVar', 'set_var' => 'funcSetVar', 'sanitize' => 'funcSanitize', ]; /** * The minimum and maximum amount of arguments required by each function. * @var int[][] */ public const FUNC_ARG_COUNT = [ 'lcase' => [ 1, 1 ], 'ucase' => [ 1, 1 ], 'length' => [ 1, 1 ], 'string' => [ 1, 1 ], 'int' => [ 1, 1 ], 'float' => [ 1, 1 ], 'bool' => [ 1, 1 ], 'norm' => [ 1, 1 ], 'ccnorm' => [ 1, 1 ], 'ccnorm_contains_any' => [ 2, INF ], 'ccnorm_contains_all' => [ 2, INF ], 'specialratio' => [ 1, 1 ], 'rmspecials' => [ 1, 1 ], 'rmdoubles' => [ 1, 1 ], 'rmwhitespace' => [ 1, 1 ], 'count' => [ 1, 2 ], 'rcount' => [ 1, 2 ], 'get_matches' => [ 2, 2 ], 'ip_in_range' => [ 2, 2 ], 'ip_in_ranges' => [ 2, INF ], 'contains_any' => [ 2, INF ], 'contains_all' => [ 2, INF ], 'equals_to_any' => [ 2, INF ], 'substr' => [ 2, 3 ], 'strlen' => [ 1, 1 ], 'strpos' => [ 2, 3 ], 'str_replace' => [ 3, 3 ], 'str_replace_regexp' => [ 3, 3 ], 'rescape' => [ 1, 1 ], 'set' => [ 2, 2 ], 'set_var' => [ 2, 2 ], 'sanitize' => [ 1, 1 ], ]; // Functions that affect parser state, and shouldn't be cached. private const ACTIVE_FUNCTIONS = [ 'funcSetVar', ]; public const KEYWORDS = [ 'in' => 'keywordIn', 'like' => 'keywordLike', 'matches' => 'keywordLike', 'contains' => 'keywordContains', 'rlike' => 'keywordRegex', 'irlike' => 'keywordRegexInsensitive', 'regex' => 'keywordRegex', ]; /** * @var bool Are we allowed to use short-circuit evaluation? */ private $mAllowShort; /** * @var VariableHolder */ private $mVariables; /** * @var int The current amount of conditions being consumed */ private $mCondCount; /** * @var bool Whether the condition limit is enabled. */ private $condLimitEnabled = true; /** * @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID" */ private $mFilter; /** * @var bool Whether we can allow retrieving _builtin_ variables not included in $this->mVariables */ private $allowMissingVariables = false; /** * @var BagOStuff Used to cache the AST and the tokens */ private $cache; /** * @var bool Whether the AST was retrieved from cache */ private $fromCache = false; /** * @var LoggerInterface Used for debugging */ private $logger; /** * @var Language Content language, used for language-dependent functions */ private $contLang; /** * @var IBufferingStatsdDataFactory */ private $statsd; /** @var KeywordsManager */ private $keywordsManager; /** @var VariablesManager */ private $varManager; /** @var int */ private $conditionsLimit; /** @var UserVisibleWarning[] */ private $warnings = []; /** * @var array Cached results of functions */ private $funcCache = []; /** * @var Equivset */ private $equivset; /** * @var array AFPToken::TID values found during node evaluation */ private $usedVars = []; /** * Create a new instance * * @param Language $contLang Content language, used for language-dependent function * @param BagOStuff $cache Used to cache the AST and the tokens * @param LoggerInterface $logger Used for debugging * @param KeywordsManager $keywordsManager * @param VariablesManager $varManager * @param IBufferingStatsdDataFactory $statsdDataFactory * @param Equivset $equivset * @param int $conditionsLimit * @param VariableHolder|null $vars */ public function __construct( Language $contLang, BagOStuff $cache, LoggerInterface $logger, KeywordsManager $keywordsManager, VariablesManager $varManager, IBufferingStatsdDataFactory $statsdDataFactory, Equivset $equivset, int $conditionsLimit, ?VariableHolder $vars = null ) { $this->contLang = $contLang; $this->cache = $cache; $this->logger = $logger; $this->statsd = $statsdDataFactory; $this->keywordsManager = $keywordsManager; $this->varManager = $varManager; $this->equivset = $equivset; $this->conditionsLimit = $conditionsLimit; $this->resetState(); if ( $vars ) { $this->mVariables = $vars; } } /** * For use in batch scripts and the like * * @param bool $enable True to enable the limit, false to disable it */ public function toggleConditionLimit( $enable ) { $this->condLimitEnabled = $enable; } /** * @throws ConditionLimitException */ private function raiseCondCount() { $this->mCondCount++; if ( $this->condLimitEnabled && $this->mCondCount > $this->conditionsLimit ) { throw new ConditionLimitException(); } } /** * @param VariableHolder $vars */ public function setVariables( VariableHolder $vars ) { $this->mVariables = $vars; } /** * Return the generated version of the parser for cache invalidation * purposes. Automatically tracks list of all functions and invalidates the * cache if it is changed. * @return string */ private static function getCacheVersion() { static $version = null; if ( $version !== null ) { return $version; } $versionKey = [ self::CACHE_VERSION, AFPTreeParser::CACHE_VERSION, AbuseFilterTokenizer::CACHE_VERSION, SyntaxChecker::CACHE_VERSION, array_keys( self::FUNCTIONS ), array_keys( self::KEYWORDS ), ]; $version = hash( 'sha256', serialize( $versionKey ) ); return $version; } /** * Resets the state of the parser */ private function resetState() { $this->mVariables = new VariableHolder(); $this->mCondCount = 0; $this->mAllowShort = true; $this->mFilter = null; $this->warnings = []; $this->usedVars = []; } /** * Check the syntax of $filter, throwing an exception if invalid * @param string $filter * @return true When successful * @throws UserVisibleException */ public function checkSyntaxThrow( string $filter ): bool { $this->allowMissingVariables = true; $origAS = $this->mAllowShort; try { $this->mAllowShort = false; $this->evalTree( $this->getTree( $filter ) ); } finally { $this->mAllowShort = $origAS; $this->allowMissingVariables = false; } return true; } /** * Check the syntax of $filter, without throwing * * @param string $filter * @return ParserStatus */ public function checkSyntax( string $filter ): ParserStatus { $initialConds = $this->mCondCount; try { $this->checkSyntaxThrow( $filter ); } catch ( UserVisibleException $excep ) { } return new ParserStatus( $excep ?? null, $this->warnings, $this->mCondCount - $initialConds ); } /** * This is the main entry point. It checks the given conditions and returns whether * they match. Parser errors are always logged. * * @param string $conds * @param string|null $filter The ID of the filter being parsed * @return RuleCheckerStatus */ public function checkConditions( string $conds, $filter = null ): RuleCheckerStatus { $this->mFilter = $filter; $excep = null; $initialConds = $this->mCondCount; $startTime = microtime( true ); try { $res = $this->parse( $conds ); } catch ( ExceptionBase $excep ) { $res = false; } $this->statsd->timing( 'abusefilter_cachingParser_full', microtime( true ) - $startTime ); $result = new RuleCheckerStatus( $res, $this->fromCache, $excep, $this->warnings, $this->mCondCount - $initialConds ); if ( $excep !== null ) { if ( $excep instanceof UserVisibleException ) { $msg = $excep->getMessageForLogs(); } else { $msg = $excep->getMessage(); } $this->logger->warning( "AbuseFilter parser error: {parser_error}", [ 'parser_error' => $msg, 'broken_filter' => $filter ?: 'none' ] ); } return $result; } /** * @param string $code * @return bool */ public function parse( $code ) { $res = $this->evalTree( $this->getTree( $code ) ); return $res->getType() === AFPData::DUNDEFINED ? false : $res->toBool(); } /** * @param string $filter * @return mixed */ public function evaluateExpression( $filter ) { return $this->evalTree( $this->getTree( $filter ) )->toNative(); } /** * @param string $code * @return AFPSyntaxTree */ private function getTree( $code ): AFPSyntaxTree { $this->fromCache = true; return $this->cache->getWithSetCallback( $this->cache->makeGlobalKey( __CLASS__, self::getCacheVersion(), hash( 'sha256', $code ) ), BagOStuff::TTL_DAY, function () use ( $code ) { $this->fromCache = false; $tokenizer = new AbuseFilterTokenizer( $this->cache ); $tokens = $tokenizer->getTokens( $code ); $parser = new AFPTreeParser( $this->logger, $this->statsd, $this->keywordsManager ); $parser->setFilter( $this->mFilter ); $tree = $parser->parse( $tokens ); $checker = new SyntaxChecker( $tree, $this->keywordsManager, SyntaxChecker::MCONSERVATIVE, false ); $checker->start(); return $tree; } ); } /** * @param AFPSyntaxTree $tree * @return AFPData */ private function evalTree( AFPSyntaxTree $tree ): AFPData { $startTime = microtime( true ); $root = $tree->getRoot(); if ( !$root ) { return new AFPData( AFPData::DNULL ); } $ret = $this->evalNode( $root ); $this->statsd->timing( 'abusefilter_cachingParser_eval', microtime( true ) - $startTime ); return $ret; } /** * Parse a filter and return the variables used. * All variables are AFPToken::TID and are found during the node stepthrough in evaluation * and saved to self::usedVars to be returned to the caller in this function. * * @param string $filter * @return string[] */ public function getUsedVars( string $filter ): array { $this->checkSyntax( $filter ); return array_unique( $this->usedVars ); } /** * Evaluate the value of the specified AST node. * * @param AFPTreeNode $node The node to evaluate. * @return AFPData|AFPTreeNode|string * @throws ExceptionBase * @throws UserVisibleException */ private function evalNode( AFPTreeNode $node ) { switch ( $node->type ) { case AFPTreeNode::ATOM: $tok = $node->children; switch ( $tok->type ) { case AFPToken::TID: return $this->getVarValue( strtolower( $tok->value ) ); case AFPToken::TSTRING: return new AFPData( AFPData::DSTRING, $tok->value ); case AFPToken::TFLOAT: return new AFPData( AFPData::DFLOAT, $tok->value ); case AFPToken::TINT: return new AFPData( AFPData::DINT, $tok->value ); /** @noinspection PhpMissingBreakStatementInspection */ case AFPToken::TKEYWORD: switch ( $tok->value ) { case "true": return new AFPData( AFPData::DBOOL, true ); case "false": return new AFPData( AFPData::DBOOL, false ); case "null": return new AFPData( AFPData::DNULL ); } // Fallthrough intended default: // @codeCoverageIgnoreStart throw new InternalException( "Unknown token provided in the ATOM node" ); // @codeCoverageIgnoreEnd } // Unreachable line case AFPTreeNode::ARRAY_DEFINITION: $items = []; // Foreach is usually faster than array_map // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here foreach ( $node->children as $el ) { $items[] = $this->evalNode( $el ); } return new AFPData( AFPData::DARRAY, $items ); case AFPTreeNode::FUNCTION_CALL: $functionName = $node->children[0]; $args = array_slice( $node->children, 1 ); $dataArgs = []; // Foreach is usually faster than array_map foreach ( $args as $arg ) { $dataArgs[] = $this->evalNode( $arg ); } return $this->callFunc( $functionName, $dataArgs, $node->position ); case AFPTreeNode::ARRAY_INDEX: [ $array, $offset ] = $node->children; $array = $this->evalNode( $array ); // Note: we MUST evaluate the offset to ensure it is valid, regardless // of $array! $offset = $this->evalNode( $offset ); // @todo If $array has no elements we could already throw an outofbounds. We don't // know what the index is, though. if ( $offset->getType() === AFPData::DUNDEFINED ) { return new AFPData( AFPData::DUNDEFINED ); } $offset = $offset->toInt(); if ( $array->getType() === AFPData::DUNDEFINED ) { return new AFPData( AFPData::DUNDEFINED ); } if ( $array->getType() !== AFPData::DARRAY ) { throw new UserVisibleException( 'notarray', $node->position, [] ); } $array = $array->toArray(); if ( count( $array ) <= $offset ) { throw new UserVisibleException( 'outofbounds', $node->position, [ $offset, count( $array ) ] ); } elseif ( $offset < 0 ) { throw new UserVisibleException( 'negativeindex', $node->position, [ $offset ] ); } return $array[$offset]; case AFPTreeNode::UNARY: [ $operation, $argument ] = $node->children; $argument = $this->evalNode( $argument ); if ( $operation === '-' ) { return $argument->unaryMinus(); } return $argument; case AFPTreeNode::KEYWORD_OPERATOR: [ $keyword, $leftOperand, $rightOperand ] = $node->children; $leftOperand = $this->evalNode( $leftOperand ); $rightOperand = $this->evalNode( $rightOperand ); return $this->callKeyword( $keyword, $leftOperand, $rightOperand, $node->position ); case AFPTreeNode::BOOL_INVERT: [ $argument ] = $node->children; $argument = $this->evalNode( $argument ); return $argument->boolInvert(); case AFPTreeNode::POW: [ $base, $exponent ] = $node->children; $base = $this->evalNode( $base ); $exponent = $this->evalNode( $exponent ); return $base->pow( $exponent ); case AFPTreeNode::MUL_REL: [ $op, $leftOperand, $rightOperand ] = $node->children; $leftOperand = $this->evalNode( $leftOperand ); $rightOperand = $this->evalNode( $rightOperand ); return $leftOperand->mulRel( $rightOperand, $op, $node->position ); case AFPTreeNode::SUM_REL: [ $op, $leftOperand, $rightOperand ] = $node->children; $leftOperand = $this->evalNode( $leftOperand ); $rightOperand = $this->evalNode( $rightOperand ); switch ( $op ) { case '+': return $leftOperand->sum( $rightOperand ); case '-': return $leftOperand->sub( $rightOperand ); default: // @codeCoverageIgnoreStart throw new InternalException( "Unknown sum-related operator: {$op}" ); // @codeCoverageIgnoreEnd } // Unreachable line case AFPTreeNode::COMPARE: [ $op, $leftOperand, $rightOperand ] = $node->children; $leftOperand = $this->evalNode( $leftOperand ); $rightOperand = $this->evalNode( $rightOperand ); $this->raiseCondCount(); return $leftOperand->compareOp( $rightOperand, $op ); case AFPTreeNode::LOGIC: [ $op, $leftOperand, $rightOperand ] = $node->children; $leftOperand = $this->evalNode( $leftOperand ); $value = $leftOperand->getType() === AFPData::DUNDEFINED ? false : $leftOperand->toBool(); // Short-circuit. if ( ( !$value && $op === '&' ) || ( $value && $op === '|' ) ) { if ( $rightOperand instanceof AFPTreeNode ) { $this->maybeDiscardNode( $rightOperand ); } return $leftOperand; } $rightOperand = $this->evalNode( $rightOperand ); return $leftOperand->boolOp( $rightOperand, $op ); case AFPTreeNode::CONDITIONAL: [ $condition, $valueIfTrue, $valueIfFalse ] = $node->children; $condition = $this->evalNode( $condition ); $isTrue = $condition->getType() === AFPData::DUNDEFINED ? false : $condition->toBool(); if ( $isTrue ) { if ( $valueIfFalse !== null ) { $this->maybeDiscardNode( $valueIfFalse ); } return $this->evalNode( $valueIfTrue ); } else { $this->maybeDiscardNode( $valueIfTrue ); return $valueIfFalse !== null ? $this->evalNode( $valueIfFalse ) // We assume null as default if the else is missing : new AFPData( AFPData::DNULL ); } case AFPTreeNode::ASSIGNMENT: [ $varName, $value ] = $node->children; $value = $this->evalNode( $value ); $this->setUserVariable( $varName, $value ); return $value; case AFPTreeNode::INDEX_ASSIGNMENT: [ $varName, $offset, $value ] = $node->children; $array = $this->getVarValue( $varName ); if ( $array->getType() !== AFPData::DARRAY && $array->getType() !== AFPData::DUNDEFINED ) { throw new UserVisibleException( 'notarray', $node->position, [] ); } $offset = $this->evalNode( $offset ); // @todo If $array has no elements we could already throw an outofbounds. We don't // know what the index is, though. if ( $array->getType() !== AFPData::DUNDEFINED ) { // If it's a DUNDEFINED, leave it as is if ( $offset->getType() !== AFPData::DUNDEFINED ) { $offset = $offset->toInt(); $array = $array->toArray(); if ( count( $array ) <= $offset ) { throw new UserVisibleException( 'outofbounds', $node->position, [ $offset, count( $array ) ] ); } elseif ( $offset < 0 ) { throw new UserVisibleException( 'negativeindex', $node->position, [ $offset ] ); } $value = $this->evalNode( $value ); $array[$offset] = $value; $array = new AFPData( AFPData::DARRAY, $array ); } else { $value = $this->evalNode( $value ); $array = new AFPData( AFPData::DUNDEFINED ); } $this->setUserVariable( $varName, $array ); } else { $value = $this->evalNode( $value ); } return $value; case AFPTreeNode::ARRAY_APPEND: [ $varName, $value ] = $node->children; $array = $this->getVarValue( $varName ); $value = $this->evalNode( $value ); if ( $array->getType() !== AFPData::DUNDEFINED ) { // If it's a DUNDEFINED, leave it as is if ( $array->getType() !== AFPData::DARRAY ) { throw new UserVisibleException( 'notarray', $node->position, [] ); } $array = $array->toArray(); $array[] = $value; $this->setUserVariable( $varName, new AFPData( AFPData::DARRAY, $array ) ); } return $value; case AFPTreeNode::SEMICOLON: $lastValue = null; // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here foreach ( $node->children as $statement ) { $lastValue = $this->evalNode( $statement ); } // @phan-suppress-next-next-line PhanTypeMismatchReturnNullable Can never be null because // empty statements are discarded in AFPTreeParser return $lastValue; default: // @codeCoverageIgnoreStart throw new InternalException( "Unknown node type passed: {$node->type}" ); // @codeCoverageIgnoreEnd } } /** * Helper to call a built-in function. * * @param string $fname The name of the function as found in the filter code * @param AFPData[] $args Arguments for the function * @param int $position * @return AFPData The return value of the function * @throws InvalidArgumentException if given an invalid func */ private function callFunc( $fname, array $args, int $position ): AFPData { if ( !array_key_exists( $fname, self::FUNCTIONS ) ) { // @codeCoverageIgnoreStart throw new InvalidArgumentException( "$fname is not a valid function." ); // @codeCoverageIgnoreEnd } $funcHandler = self::FUNCTIONS[$fname]; $funcHash = md5( $funcHandler . serialize( $args ) ); if ( isset( $this->funcCache[$funcHash] ) && !in_array( $funcHandler, self::ACTIVE_FUNCTIONS ) ) { $result = $this->funcCache[$funcHash]; } else { $this->raiseCondCount(); // Any undefined argument should be special-cased by the function, but that would be too // much overhead. We also cannot skip calling the handler in case it's making further // validation (T234339). So temporarily replace the DUNDEFINED with a DNULL. // @todo This is subpar. $hasUndefinedArg = false; foreach ( $args as $i => $arg ) { if ( $arg->hasUndefined() ) { $args[$i] = $arg->cloneAsUndefinedReplacedWithNull(); $hasUndefinedArg = true; } } if ( $hasUndefinedArg ) { // @phan-suppress-next-line PhanParamTooMany Not every function needs the position $this->$funcHandler( $args, $position ); $result = new AFPData( AFPData::DUNDEFINED ); } else { // @phan-suppress-next-line PhanParamTooMany Not every function needs the position $result = $this->$funcHandler( $args, $position ); } $this->funcCache[$funcHash] = $result; } if ( count( $this->funcCache ) > 1000 ) { // @codeCoverageIgnoreStart $this->funcCache = []; // @codeCoverageIgnoreEnd } return $result; } /** * Helper to invoke a built-in keyword. Note that this assumes that $kname is * a valid keyword name. * * @param string $kname * @param AFPData $lhs * @param AFPData $rhs * @param int $position * @return AFPData */ private function callKeyword( $kname, AFPData $lhs, AFPData $rhs, int $position ): AFPData { $func = self::KEYWORDS[$kname]; $this->raiseCondCount(); $hasUndefinedOperand = false; if ( $lhs->hasUndefined() ) { $lhs = $lhs->cloneAsUndefinedReplacedWithNull(); $hasUndefinedOperand = true; } if ( $rhs->hasUndefined() ) { $rhs = $rhs->cloneAsUndefinedReplacedWithNull(); $hasUndefinedOperand = true; } if ( $hasUndefinedOperand ) { // We need to run the handler with bogus args, see the comment in self::callFunc (T234339) // @todo Likewise, this is subpar. // @phan-suppress-next-line PhanParamTooMany Not every function needs the position $this->$func( $lhs, $rhs, $position ); $result = new AFPData( AFPData::DUNDEFINED ); } else { // @phan-suppress-next-line PhanParamTooMany Not every function needs the position $result = $this->$func( $lhs, $rhs, $position ); } return $result; } /** * Check whether a variable exists, being either built-in or user-defined. Doesn't include * disabled variables. * * @param string $varname * @return bool */ private function varExists( $varname ) { return $this->keywordsManager->isVarInUse( $varname ) || $this->mVariables->varIsSet( $varname ); } /** * @param string $var * @return AFPData * @throws UserVisibleException */ private function getVarValue( $var ) { $var = strtolower( $var ); $deprecatedVars = $this->keywordsManager->getDeprecatedVariables(); if ( array_key_exists( $var, $deprecatedVars ) ) { $var = $deprecatedVars[ $var ]; } // With check syntax, all unbound variables will be caught // already. So we do not error unbound variables at runtime, // allowing it to result in DUNDEFINED. $allowMissingVariables = !$this->varExists( $var ) || $this->allowMissingVariables; array_push( $this->usedVars, $var ); // It's a built-in, non-disabled variable (either set or unset), or a set custom variable $flags = $allowMissingVariables ? VariablesManager::GET_LAX // TODO: This should be GET_STRICT, but that's going to be very hard (see T230256) : VariablesManager::GET_BC; return $this->varManager->getVar( $this->mVariables, $var, $flags ); } /** * @param string $name * @param mixed $value * @throws UserVisibleException */ private function setUserVariable( $name, $value ) { $this->mVariables->setVar( $name, $value ); } // Built-in functions /** * @param array $args * @return AFPData */ private function funcLc( $args ) { $s = $args[0]->toString(); return new AFPData( AFPData::DSTRING, $this->contLang->lc( $s ) ); } /** * @param array $args * @return AFPData */ private function funcUc( $args ) { $s = $args[0]->toString(); return new AFPData( AFPData::DSTRING, $this->contLang->uc( $s ) ); } /** * @param array $args * @return AFPData */ private function funcLen( $args ) { if ( $args[0]->type === AFPData::DARRAY ) { // Don't use toString on arrays, but count $val = count( $args[0]->data ); } else { $val = mb_strlen( $args[0]->toString(), 'utf-8' ); } return new AFPData( AFPData::DINT, $val ); } /** * @param array $args * @return AFPData */ private function funcSpecialRatio( $args ) { $s = $args[0]->toString(); if ( !strlen( $s ) ) { return new AFPData( AFPData::DFLOAT, 0 ); } $nospecials = $this->rmspecials( $s ); $val = 1. - ( ( mb_strlen( $nospecials ) / mb_strlen( $s ) ) ); return new AFPData( AFPData::DFLOAT, $val ); } /** * @param array $args * @return AFPData */ private function funcCount( $args ) { if ( $args[0]->type === AFPData::DARRAY && count( $args ) === 1 ) { return new AFPData( AFPData::DINT, count( $args[0]->data ) ); } if ( count( $args ) === 1 ) { $count = count( explode( ',', $args[0]->toString() ) ); } else { $needle = $args[0]->toString(); $haystack = $args[1]->toString(); // T62203: Keep empty parameters from causing PHP warnings if ( $needle === '' ) { $count = 0; } else { $count = substr_count( $haystack, $needle ); } } return new AFPData( AFPData::DINT, $count ); } /** * @param array $args * @param int $position * @return AFPData * @throws UserVisibleException */ private function funcRCount( $args, int $position ) { if ( count( $args ) === 1 ) { $count = count( explode( ',', $args[0]->toString() ) ); } else { $needle = $args[0]->toString(); $haystack = $args[1]->toString(); $needle = $this->mungeRegexp( $needle ); $this->checkRegexMatchesEmpty( $args[0], $needle, $position ); // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged $count = @preg_match_all( $needle, $haystack ); if ( $count === false ) { throw new UserVisibleException( 'regexfailure', $position, [ $needle ] ); } } return new AFPData( AFPData::DINT, $count ); } /** * Returns an array of matches of needle in the haystack, the first one for the whole regex, * the other ones for every capturing group. * * @param array $args * @param int $position * @return AFPData An array of matches. * @throws UserVisibleException */ private function funcGetMatches( $args, int $position ) { $needle = $args[0]->toString(); $haystack = $args[1]->toString(); // Count the amount of capturing groups in the submitted pattern. // This way we can return a fixed-dimension array, much easier to manage. // ToDo: Find a better way to do this. // First, strip away escaped parentheses $sanitized = preg_replace( '/((\\\\\\\\)*)\\\\\(/', '$1', $needle ); // Then strip starting parentheses of non-capturing groups, including // atomics, lookaheads and so on, even if not every of them is supported. // Avoid stripping named capturing groups: (?P<name>), (?<name>) and (?'name') $sanitized = preg_replace( '/\(\?(?!P?<[a-zA-Z_][a-zA-Z0-9_]*>|\'[a-zA-Z_][a-zA-Z0-9_]*\')/', '', $sanitized ); // And also strip "(*", used with backtracking verbs like (*FAIL) $sanitized = str_replace( '(*', '', $sanitized ); // Finally create an array of falses with dimension = # of capturing groups + 1 // (as there is also the 0 element, which contains the whole match) $groupscount = substr_count( $sanitized, '(' ) + 1; $falsy = array_fill( 0, $groupscount, false ); $needle = $this->mungeRegexp( $needle ); $this->checkRegexMatchesEmpty( $args[0], $needle, $position ); // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged $check = @preg_match( $needle, $haystack, $matches ); if ( $check === false ) { throw new UserVisibleException( 'regexfailure', $position, [ $needle ] ); } // Named capturing groups add the capture twice: with a numeric key and with a string key. // AF doesn't provide associative arrays, thus we have to filter out the elements with string keys, // else AFPData::newFromPHPVar would erroneously insert them into the final array, with numeric keys. $matches = array_filter( $matches, 'is_int', ARRAY_FILTER_USE_KEY ); // Returned array has non-empty positions identical to the ones returned // by the third parameter of a standard preg_match call ($matches in this case). // We want an union with falsy to return a fixed-dimension array. return AFPData::newFromPHPVar( $matches + $falsy ); } /** * @param array $args * @param int $position * @return AFPData * @throws UserVisibleException */ private function funcIPInRange( $args, int $position ) { $ip = $args[0]->toString(); $range = $args[1]->toString(); if ( !IPUtils::isValidRange( $range ) && !IPUtils::isIPAddress( $range ) ) { throw new UserVisibleException( 'invalidiprange', $position, [ $range ] ); } $result = IPUtils::isInRange( $ip, $range ); return new AFPData( AFPData::DBOOL, $result ); } /** * @param array $args * @param int $position * @return AFPData * @throws UserVisibleException */ private function funcIPInRanges( $args, int $position ) { $ip = array_shift( $args )->toString(); $strRanges = []; foreach ( $args as $range ) { $range = $range->toString(); if ( !IPUtils::isValidRange( $range ) && !IPUtils::isIPAddress( $range ) ) { throw new UserVisibleException( 'invalidiprange', $position, [ $range ] ); } $strRanges[] = $range; } return new AFPData( AFPData::DBOOL, IPUtils::isInRanges( $ip, $strRanges ) ); } /** * @param array $args * @return AFPData */ private function funcCCNorm( $args ) { $s = $args[0]->toString(); $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' ); $s = $this->ccnorm( $s ); return new AFPData( AFPData::DSTRING, $s ); } /** * @param array $args * @return AFPData */ private function funcSanitize( $args ) { $s = $args[0]->toString(); $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' ); $s = Sanitizer::decodeCharReferences( $s ); return new AFPData( AFPData::DSTRING, $s ); } /** * @param array $args * @return AFPData */ private function funcContainsAny( $args ) { $s = array_shift( $args ); return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, true ) ); } /** * @param array $args * @return AFPData */ private function funcContainsAll( $args ) { $s = array_shift( $args ); return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, false, false ) ); } /** * Normalize and search a string for multiple substrings in OR mode * * @param array $args * @return AFPData */ private function funcCCNormContainsAny( $args ) { $s = array_shift( $args ); return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, true, true ) ); } /** * Normalize and search a string for multiple substrings in AND mode * * @param array $args * @return AFPData */ private function funcCCNormContainsAll( $args ) { $s = array_shift( $args ); return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, false, true ) ); } /** * Search for substrings in a string * * Use is_any to determine whether to use logic OR (true) or AND (false). * * Use normalize = true to make use of ccnorm and * normalize both sides of the search. * * @param AFPData $string * @param AFPData[] $values * @param bool $is_any * @param bool $normalize * * @return bool */ private function contains( $string, $values, $is_any = true, $normalize = false ) { $string = $string->toString(); if ( $string === '' ) { return false; } if ( $normalize ) { $string = $this->ccnorm( $string ); } foreach ( $values as $needle ) { $needle = $needle->toString(); if ( $normalize ) { $needle = $this->ccnorm( $needle ); } if ( $needle === '' ) { // T62203: Keep empty parameters from causing PHP warnings continue; } $is_found = strpos( $string, $needle ) !== false; if ( $is_found === $is_any ) { // If I'm here and it's ANY (OR) => something is found. // If I'm here and it's ALL (AND) => nothing is found. // In both cases, we've had enough. return $is_found; } } // If I'm here and it's ANY (OR) => nothing was found: return false ($is_any is true) // If I'm here and it's ALL (AND) => everything was found: return true ($is_any is false) return !$is_any; } /** * @param array $args * @return AFPData */ private function funcEqualsToAny( $args ) { $s = array_shift( $args ); return new AFPData( AFPData::DBOOL, self::equalsToAny( $s, $args ) ); } /** * Check if the given string is equals to any of the following strings * * @param AFPData $string * @param AFPData[] $values * * @return bool */ private static function equalsToAny( $string, $values ) { foreach ( $values as $needle ) { if ( $string->equals( $needle, true ) ) { return true; } } return false; } /** * @param string $s * @return string */ private function ccnorm( $s ): string { return $this->equivset->normalize( $s ); } /** * @param string $s * @return array|string */ private function rmspecials( $s ) { // (T385452) Disable JIT for this call, as it breaks sometimes ini_set( 'pcre.jit', '0' ); $res = preg_replace( '/[^\p{L}\p{N}\s]/u', '', $s ); ini_restore( 'pcre.jit' ); return $res; } /** * @param string $s * @return array|string */ private function rmdoubles( $s ) { // (T385452) Disable JIT for this call, as it breaks sometimes ini_set( 'pcre.jit', '0' ); $res = preg_replace( '/(.)\1+/us', '\1', $s ); ini_restore( 'pcre.jit' ); return $res; } /** * @param string $s * @return array|string */ private function rmwhitespace( $s ) { return preg_replace( '/\s+/u', '', $s ); } /** * @param array $args * @return AFPData */ private function funcRMSpecials( $args ) { $s = $args[0]->toString(); return new AFPData( AFPData::DSTRING, $this->rmspecials( $s ) ); } /** * @param array $args * @return AFPData */ private function funcRMWhitespace( $args ) { $s = $args[0]->toString(); return new AFPData( AFPData::DSTRING, $this->rmwhitespace( $s ) ); } /** * @param array $args * @return AFPData */ private function funcRMDoubles( $args ) { $s = $args[0]->toString(); return new AFPData( AFPData::DSTRING, $this->rmdoubles( $s ) ); } /** * @param array $args * @return AFPData */ private function funcNorm( $args ) { $s = $args[0]->toString(); $s = $this->ccnorm( $s ); $s = $this->rmdoubles( $s ); $s = $this->rmspecials( $s ); $s = $this->rmwhitespace( $s ); return new AFPData( AFPData::DSTRING, $s ); } /** * @param array $args * @return AFPData */ private function funcSubstr( $args ) { $s = $args[0]->toString(); $offset = $args[1]->toInt(); $length = isset( $args[2] ) ? $args[2]->toInt() : null; $result = mb_substr( $s, $offset, $length ); return new AFPData( AFPData::DSTRING, $result ); } /** * @param array $args * @return AFPData */ private function funcStrPos( $args ) { $haystack = $args[0]->toString(); $needle = $args[1]->toString(); $offset = isset( $args[2] ) ? $args[2]->toInt() : 0; // T62203: Keep empty parameters from causing PHP warnings if ( $needle === '' ) { return new AFPData( AFPData::DINT, -1 ); } // Special handling for when the offset is not contained in $haystack. PHP can emit a warning // or throw an error depending on the version (T285978). TODO Should we also throw? if ( $offset > mb_strlen( $haystack ) ) { return new AFPData( AFPData::DINT, -1 ); } $result = mb_strpos( $haystack, $needle, $offset ); if ( $result === false ) { $result = -1; } return new AFPData( AFPData::DINT, $result ); } /** * @param array $args * @return AFPData */ private function funcStrReplace( $args ) { $subject = $args[0]->toString(); $search = $args[1]->toString(); $replace = $args[2]->toString(); return new AFPData( AFPData::DSTRING, str_replace( $search, $replace, $subject ) ); } /** * @param array $args * @param int $position * @return AFPData */ private function funcStrReplaceRegexp( $args, int $position ) { $subject = $args[0]->toString(); $search = $args[1]->toString(); $replace = $args[2]->toString(); $this->checkRegexMatchesEmpty( $args[1], $search, $position ); // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged $result = @preg_replace( $this->mungeRegexp( $search ), $replace, $subject ); if ( $result === null ) { throw new UserVisibleException( 'regexfailure', $position, [ $search ] ); } return new AFPData( AFPData::DSTRING, $result ); } /** * @param array $args * @return AFPData */ private function funcStrRegexEscape( $args ) { $string = $args[0]->toString(); // preg_quote does not need the second parameter, since rlike takes // care of the delimiter symbol itself return new AFPData( AFPData::DSTRING, preg_quote( $string ) ); } /** * @param array $args * @return mixed */ private function funcSetVar( $args ) { $varName = $args[0]->toString(); $value = $args[1]; $this->setUserVariable( $varName, $value ); return $value; } /** * Checks if $a contains $b * * @param AFPData $a * @param AFPData $b * @return AFPData */ private function containmentKeyword( AFPData $a, AFPData $b ) { $a = $a->toString(); $b = $b->toString(); if ( $a === '' || $b === '' ) { return new AFPData( AFPData::DBOOL, false ); } return new AFPData( AFPData::DBOOL, strpos( $a, $b ) !== false ); } /** * @param AFPData $a * @param AFPData $b * @return AFPData */ private function keywordIn( AFPData $a, AFPData $b ) { return $this->containmentKeyword( $b, $a ); } /** * @param AFPData $a * @param AFPData $b * @return AFPData */ private function keywordContains( AFPData $a, AFPData $b ) { return $this->containmentKeyword( $a, $b ); } /** * @param AFPData $str * @param AFPData $pattern * @return AFPData */ private function keywordLike( AFPData $str, AFPData $pattern ) { $str = $str->toString(); $pattern = '#^' . strtr( preg_quote( $pattern->toString(), '#' ), AFPData::WILDCARD_MAP ) . '$#u'; // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged $result = @preg_match( $pattern, $str ); return new AFPData( AFPData::DBOOL, (bool)$result ); } /** * @param AFPData $str * @param AFPData $regex * @param int $pos * @param bool $insensitive * @return AFPData * @throws Exception */ private function keywordRegex( AFPData $str, AFPData $regex, $pos, $insensitive = false ) { $str = $str->toString(); $pattern = $regex->toString(); $pattern = $this->mungeRegexp( $pattern ); if ( $insensitive ) { $pattern .= 'i'; } $this->checkRegexMatchesEmpty( $regex, $pattern, $pos ); // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged $result = @preg_match( $pattern, $str ); if ( $result === false ) { throw new UserVisibleException( 'regexfailure', // Coverage bug // @codeCoverageIgnoreStart $pos, // @codeCoverageIgnoreEnd [ $pattern ] ); } return new AFPData( AFPData::DBOOL, (bool)$result ); } /** * @param AFPData $str * @param AFPData $regex * @param int $pos * @return AFPData */ private function keywordRegexInsensitive( AFPData $str, AFPData $regex, $pos ) { return $this->keywordRegex( $str, $regex, $pos, true ); } /** * @param array $args * @return AFPData */ private function castString( $args ) { return AFPData::castTypes( $args[0], AFPData::DSTRING ); } /** * @param array $args * @return AFPData */ private function castInt( $args ) { return AFPData::castTypes( $args[0], AFPData::DINT ); } /** * @param array $args * @return AFPData */ private function castFloat( $args ) { return AFPData::castTypes( $args[0], AFPData::DFLOAT ); } /** * @param array $args * @return AFPData */ private function castBool( $args ) { return AFPData::castTypes( $args[0], AFPData::DBOOL ); } /** * Given a node that we don't need to evaluate, decide what to do with it. * The nodes passed in will usually be discarded by short-circuit * evaluation. If we don't allow it, we fully evaluate the node. * * @param AFPTreeNode $node */ private function maybeDiscardNode( AFPTreeNode $node ) { if ( !$this->mAllowShort ) { $this->evalNode( $node ); } } /** * Given a regexp in the AF syntax, make it PCRE-compliant (i.e. we need to escape slashes, add * delimiters and modifiers). * * @param string $rawRegexp * @return string */ private function mungeRegexp( string $rawRegexp ): string { $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $rawRegexp ); return "/$needle/u"; } /** * Check whether the provided regex matches the empty string. * @note This method can generate a PHP notice if the regex is invalid * * @param AFPData $regex TODO Can we avoid passing this in? * @param string $pattern Already munged * @param int $position */ private function checkRegexMatchesEmpty( AFPData $regex, string $pattern, int $position ): void { if ( $regex->getType() === AFPData::DUNDEFINED ) { // We can't tell, and toString() would return the empty string (T273809) return; } // @phan-suppress-next-next-line PhanParamSuspiciousOrder // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged if ( @preg_match( $pattern, '' ) === 1 ) { $this->warnings[] = new UserVisibleWarning( 'match-empty-regex', $position, [] ); } } }
| ver. 1.1 | |
.
| PHP 8.4.18 | Ð“ÐµÐ½ÐµÑ€Ð°Ñ†Ð¸Ñ Ñтраницы: 0 |
proxy
|
phpinfo
|
ÐаÑтройка