<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

/**
 * Reads PHP code and returns the FQCN of every class defined within it.
 */
class ClassCollector {

	/**
	 * @var string Current namespace
	 */
	protected $namespace = '';

	/**
	 * @var array List of FQCN detected in this pass
	 */
	protected $classes;

	/**
	 * @var array|null Token from token_get_all() that started an expect sequence
	 */
	protected $startToken;

	/**
	 * @var array[]|string[] List of tokens that are members of the current expect sequence
	 */
	protected $tokens;

	/**
	 * @var array|null Class alias with target/name fields
	 */
	protected $alias;

	/**
	 * @param string $code PHP code (including <?php) to detect class names from
	 * @return array List of FQCN detected within the tokens
	 */
	public function getClasses( $code ) {
		$this->namespace = '';
		$this->classes = [];
		$this->startToken = null;
		$this->alias = null;
		$this->tokens = [];

		// HACK: The PHP tokenizer is slow (T225730).
		// Speed it up by reducing the input to the three kinds of statement we care about:
		// - namespace X;
		// - [final] [abstract] class X … {}
		// - class_alias( … );
		$lines = [];
		$matches = null;
		preg_match_all(
			// phpcs:ignore Generic.Files.LineLength.TooLong
			'#^\t*(?:namespace |(final )?(abstract )?(class|interface|trait) |class_alias\()[^;{]+[;{]\s*\}?#m',
			$code,
			$matches
		);
		if ( isset( $matches[0][0] ) ) {
			foreach ( $matches[0] as $match ) {
				$match = trim( $match );
				if ( str_ends_with( $match, '{' ) ) {
					// Keep it balanced
					$match .= '}';
				}
				$lines[] = $match;
			}
		}
		$code = '<?php ' . implode( "\n", $lines ) . "\n";

		foreach ( token_get_all( $code ) as $token ) {
			if ( $this->startToken === null ) {
				$this->tryBeginExpect( $token );
			} else {
				$this->tryEndExpect( $token );
			}
		}

		return $this->classes;
	}

	/**
	 * Determine if $token begins the next expect sequence.
	 *
	 * @param array $token
	 */
	protected function tryBeginExpect( $token ) {
		if ( is_string( $token ) ) {
			return;
		}
		// Note: When changing class name discovery logic,
		// AutoLoaderStructureTest.php may also need to be updated.
		switch ( $token[0] ) {
			case T_NAMESPACE:
			case T_CLASS:
			case T_INTERFACE:
			case T_TRAIT:
			case T_DOUBLE_COLON:
			case T_NEW:
				$this->startToken = $token;
				break;
			case T_STRING:
				if ( $token[1] === 'class_alias' ) {
					$this->startToken = $token;
					$this->alias = [];
				}
		}
	}

	/**
	 * Accepts the next token in an expect sequence
	 *
	 * @param array|string $token
	 */
	protected function tryEndExpect( $token ) {
		// @phan-suppress-next-line PhanTypeArraySuspiciousNullable
		switch ( $this->startToken[0] ) {
			case T_DOUBLE_COLON:
				// Skip over T_CLASS after T_DOUBLE_COLON because this is something like
				// "ClassName::class" that evaluates to a fully qualified class name. It
				// doesn't define a new class.
				$this->startToken = null;
				break;
			case T_NEW:
				// Skip over T_CLASS after T_NEW because this is an anonymous class.
				if ( !is_array( $token ) || $token[0] !== T_WHITESPACE ) {
					$this->startToken = null;
				}
				break;
			case T_NAMESPACE:
				if ( $token === ';' || $token === '{' ) {
					$this->namespace = $this->implodeTokens() . '\\';
				} else {
					$this->tokens[] = $token;
				}
				break;

			case T_STRING:
				if ( $this->alias !== null ) {
					// Flow 1 - Two string literals:
					// - T_STRING  class_alias
					// - '('
					// - T_CONSTANT_ENCAPSED_STRING 'TargetClass'
					// - ','
					// - T_WHITESPACE
					// - T_CONSTANT_ENCAPSED_STRING 'AliasName'
					// - ')'
					// Flow 2 - Use of ::class syntax for first parameter
					// - T_STRING  class_alias
					// - '('
					// - T_STRING TargetClass
					// - T_DOUBLE_COLON ::
					// - T_CLASS class
					// - ','
					// - T_WHITESPACE
					// - T_CONSTANT_ENCAPSED_STRING 'AliasName'
					// - ')'
					if ( $token === '(' ) {
						// Start of a function call to class_alias()
						$this->alias = [ 'target' => false, 'name' => false ];
					} elseif ( $token === ',' ) {
						// Record that we're past the first parameter
						if ( $this->alias['target'] === false ) {
							$this->alias['target'] = true;
						}
					} elseif ( is_array( $token ) && $token[0] === T_CONSTANT_ENCAPSED_STRING ) {
						if ( $this->alias['target'] === true ) {
							// We already saw a first argument, this must be the second.
							// Strip quotes from the string literal.
							$this->alias['name'] = self::stripQuotes( $token[1] );
						}
					} elseif ( $token === ')' ) {
						// End of function call
						$this->classes[] = $this->alias['name'];
						$this->alias = null;
						$this->startToken = null;
					} elseif ( !is_array( $token ) || (
							$token[0] !== T_STRING &&
							$token[0] !== T_DOUBLE_COLON &&
							$token[0] !== T_CLASS &&
							$token[0] !== T_WHITESPACE
						) ) {
						// Ignore this call to class_alias() - compat/Timestamp.php
						$this->alias = null;
						$this->startToken = null;
					}
				}
				break;

			case T_CLASS:
			case T_INTERFACE:
			case T_TRAIT:
				$this->tokens[] = $token;
				if ( is_array( $token ) && $token[0] === T_STRING ) {
					$this->classes[] = $this->namespace . $this->implodeTokens();
				}
		}
	}

	/**
	 * Decode a quoted PHP string, interpreting escape sequences, like eval($str).
	 * The implementation is half-baked, but the character set allowed in class
	 * names is pretty small. This could be replaced by a call to a fully-baked
	 * utility function.
	 *
	 * @param string $str
	 * @return string
	 */
	private static function stripQuotes( $str ) {
		return str_replace( '\\\\', '\\', substr( $str, 1, -1 ) );
	}

	/**
	 * Returns the string representation of the tokens within the
	 * current expect sequence and resets the sequence.
	 *
	 * @return string
	 */
	protected function implodeTokens() {
		$content = [];
		foreach ( $this->tokens as $token ) {
			$content[] = is_string( $token ) ? $token : $token[1];
		}

		$this->tokens = [];
		$this->startToken = null;

		return trim( implode( '', $content ), " \n\t" );
	}
}
