Файловый менеджер - Редактировать - /var/www/html/language.zip
Ðазад
PK ! hգ�0 0 LazyLocalizationContext.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ namespace MediaWiki\Language; /** * Wrapper for injecting a LocalizationContext with lazy initialization. * * @since 1.42 * @ingroup Language */ class LazyLocalizationContext implements LocalizationContext { /** @var callable */ private $instantiator; private ?LocalizationContext $context = null; /** * @param callable $instantiator */ public function __construct( callable $instantiator ) { $this->instantiator = $instantiator; } private function resolve(): LocalizationContext { if ( !$this->context ) { $this->context = ( $this->instantiator )(); } return $this->context; } public function getLanguageCode() { return $this->resolve()->getLanguageCode(); } public function msg( $key, ...$params ) { return $this->resolve()->msg( $key, ...$params ); } } PK ! 5��kX kX Language.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * @defgroup Language Internationalisation * * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information. */ /** * @defgroup Languages Languages * @ingroup Language */ namespace MediaWiki\Language; use CLDRPluralRuleParser\Evaluator; use DateTime; use DateTimeImmutable; use DateTimeZone; use InvalidArgumentException; use Locale; use LocalisationCache; use MediaWiki\Config\Config; use MediaWiki\Context\RequestContext; use MediaWiki\HookContainer\HookContainer; use MediaWiki\HookContainer\HookRunner; use MediaWiki\Html\Html; use MediaWiki\Json\FormatJson; use MediaWiki\Languages\Data\NormalizeAr; use MediaWiki\Languages\Data\NormalizeMl; use MediaWiki\Languages\LanguageConverterFactory; use MediaWiki\Languages\LanguageFallback; use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MainConfigNames; use MediaWiki\MediaWikiServices; use MediaWiki\Message\Message; use MediaWiki\Parser\MagicWord; use MediaWiki\Title\NamespaceInfo; use MediaWiki\User\User; use MediaWiki\User\UserIdentity; use MediaWiki\User\UserTimeCorrection; use MediaWiki\Utils\MWTimestamp; use MediaWiki\Xml\XmlSelect; use NumberFormatter; use RuntimeException; use StringUtils; use UtfNormal\Validator as UtfNormalValidator; use Wikimedia\Assert\Assert; use Wikimedia\AtEase\AtEase; use Wikimedia\Bcp47Code\Bcp47Code; use Wikimedia\DebugInfo\DebugInfoTrait; /** * Base class for language-specific code. * * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information. * * @ingroup Language */ class Language implements Bcp47Code { use DebugInfoTrait; /** @var string */ public $mCode; /** * @deprecated since 1.35, use LocalisationCache with custom language config */ public $mMagicExtensions = []; /** @var string|null */ private $mHtmlCode = null; /** * memoize * @var string[][] * @deprecated since 1.35, must be private */ public $dateFormatStrings = []; /** * memoize * @var string[][]|null * @deprecated since 1.35, must be protected */ public $mExtendedSpecialPageAliases; /** @var array<int,string>|null Indexed by numeric namespace ID */ protected $namespaceNames; /** @var array<string,int>|null Indexed by localized lower-cased namespace name */ protected $mNamespaceIds; /** @var array<string,int>|null Map from alias to namespace ID */ protected $namespaceAliases; /** * @var ReplacementArray[] * @noVarDump */ private $transformData = []; /** * @var NamespaceInfo * @noVarDump */ private $namespaceInfo; /** * @var LocalisationCache * @noVarDump */ private $localisationCache; /** * @var LanguageNameUtils * @noVarDump */ private $langNameUtils; /** * @var LanguageFallback * @noVarDump */ private $langFallback; /** * @var array[]|null * @noVarDump */ private $grammarTransformCache; /** * @var LanguageConverterFactory * @noVarDump */ private $converterFactory; /** * @var HookContainer * @noVarDump */ private $hookContainer; /** * @var HookRunner * @noVarDump */ private $hookRunner; /** * @var Config * @noVarDump */ private $config; /** * @var array|null */ private $overrideUcfirstCharacters; /** * @var NumberFormatter|null * @noVarDump */ private $numberFormatter = null; /** * @since 1.35 */ public const WEEKDAY_MESSAGES = [ 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday' ]; /** * @since 1.35 */ public const WEEKDAY_ABBREVIATED_MESSAGES = [ 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat' ]; /** * @since 1.35 */ public const MONTH_MESSAGES = [ 'january', 'february', 'march', 'april', 'may_long', 'june', 'july', 'august', 'september', 'october', 'november', 'december' ]; /** * @deprecated since 1.35, use the MONTH_MESSAGES constant */ public static $mMonthMsgs = self::MONTH_MESSAGES; /** * @since 1.35 */ public const MONTH_GENITIVE_MESSAGES = [ 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen', 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen', 'december-gen' ]; /** * @since 1.35 */ public const MONTH_ABBREVIATED_MESSAGES = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ]; /** * @deprecated since 1.35, use the MONTH_ABBREVIATED_MESSAGES constant */ public static $mMonthAbbrevMsgs = self::MONTH_ABBREVIATED_MESSAGES; /** * @since 1.35 */ public const IRANIAN_CALENDAR_MONTHS_MESSAGES = [ 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3', 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6', 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9', 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12' ]; /** * @since 1.35 */ public const HEBREW_CALENDAR_MONTHS_MESSAGES = [ 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3', 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6', 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9', 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12', 'hebrew-calendar-m6a', 'hebrew-calendar-m6b' ]; /** * @since 1.35 */ public const HEBREW_CALENDAR_MONTH_GENITIVE_MESSAGES = [ 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen', 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen', 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen', 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen', 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen' ]; /** * @since 1.35 */ public const HIJRI_CALENDAR_MONTH_MESSAGES = [ 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3', 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6', 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9', 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12' ]; /** * @since 1.35 */ protected const DURATION_INTERVALS = [ 'millennia' => 1000 * 31_556_952, 'centuries' => 100 * 31_556_952, 'decades' => 10 * 31_556_952, // The average year is 365.2425 days (365 + (24 * 3 + 25) / 400) 'years' => 31_556_952, // 365.2425 * 24 * 3600 // To simplify, we consider a month to be 1/12 of a year 'months' => 365.2425 * 24 * 3600 / 12, 'days' => 24 * 3600, 'hours' => 3600, 'minutes' => 60, 'seconds' => 1, ]; /** * @deprecated since 1.35, use the DURATION_INTERVALS constant * @since 1.20 * @var int[] */ public static $durationIntervals = self::DURATION_INTERVALS; /** * Unicode directional formatting characters */ private const LRM = "\u{200E}"; // U+200E LEFT-TO-RIGHT MARK private const RLM = "\u{200F}"; // U+200F RIGHT-TO-LEFT MARK private const LRE = "\u{202A}"; // U+202A LEFT-TO-RIGHT EMBEDDING private const RLE = "\u{202B}"; // U+202B RIGHT-TO-LEFT EMBEDDING private const PDF = "\u{202C}"; // U+202C POP DIRECTIONAL FORMATTING /** * Directionality test regex for embedBidi(). Matches the first strong directionality codepoint: * - in group 1 if it is LTR * - in group 2 if it is RTL * Does not match if there is no strong directionality codepoint. * * The form is '/(?:([strong ltr codepoint])|([strong rtl codepoint]))/u'. * * Generated by UnicodeJS (see tools/strongDir) from the UCD; see * https://gerrit.wikimedia.org/g/unicodejs . * @var string */ // @codeCoverageIgnoreStart // phpcs:ignore Generic.Files.LineLength,MediaWiki.Commenting.PropertyDocumentation.MissingDocumentationPrivate private static $strongDirRegex = '/(?:([\x{41}-\x{5a}\x{61}-\x{7a}\x{aa}\x{b5}\x{ba}\x{c0}-\x{d6}\x{d8}-\x{f6}\x{f8}-\x{2b8}\x{2bb}-\x{2c1}\x{2d0}\x{2d1}\x{2e0}-\x{2e4}\x{2ee}\x{370}-\x{373}\x{376}\x{377}\x{37a}-\x{37d}\x{37f}\x{386}\x{388}-\x{38a}\x{38c}\x{38e}-\x{3a1}\x{3a3}-\x{3f5}\x{3f7}-\x{482}\x{48a}-\x{52f}\x{531}-\x{556}\x{559}-\x{55f}\x{561}-\x{587}\x{589}\x{903}-\x{939}\x{93b}\x{93d}-\x{940}\x{949}-\x{94c}\x{94e}-\x{950}\x{958}-\x{961}\x{964}-\x{980}\x{982}\x{983}\x{985}-\x{98c}\x{98f}\x{990}\x{993}-\x{9a8}\x{9aa}-\x{9b0}\x{9b2}\x{9b6}-\x{9b9}\x{9bd}-\x{9c0}\x{9c7}\x{9c8}\x{9cb}\x{9cc}\x{9ce}\x{9d7}\x{9dc}\x{9dd}\x{9df}-\x{9e1}\x{9e6}-\x{9f1}\x{9f4}-\x{9fa}\x{a03}\x{a05}-\x{a0a}\x{a0f}\x{a10}\x{a13}-\x{a28}\x{a2a}-\x{a30}\x{a32}\x{a33}\x{a35}\x{a36}\x{a38}\x{a39}\x{a3e}-\x{a40}\x{a59}-\x{a5c}\x{a5e}\x{a66}-\x{a6f}\x{a72}-\x{a74}\x{a83}\x{a85}-\x{a8d}\x{a8f}-\x{a91}\x{a93}-\x{aa8}\x{aaa}-\x{ab0}\x{ab2}\x{ab3}\x{ab5}-\x{ab9}\x{abd}-\x{ac0}\x{ac9}\x{acb}\x{acc}\x{ad0}\x{ae0}\x{ae1}\x{ae6}-\x{af0}\x{af9}\x{b02}\x{b03}\x{b05}-\x{b0c}\x{b0f}\x{b10}\x{b13}-\x{b28}\x{b2a}-\x{b30}\x{b32}\x{b33}\x{b35}-\x{b39}\x{b3d}\x{b3e}\x{b40}\x{b47}\x{b48}\x{b4b}\x{b4c}\x{b57}\x{b5c}\x{b5d}\x{b5f}-\x{b61}\x{b66}-\x{b77}\x{b83}\x{b85}-\x{b8a}\x{b8e}-\x{b90}\x{b92}-\x{b95}\x{b99}\x{b9a}\x{b9c}\x{b9e}\x{b9f}\x{ba3}\x{ba4}\x{ba8}-\x{baa}\x{bae}-\x{bb9}\x{bbe}\x{bbf}\x{bc1}\x{bc2}\x{bc6}-\x{bc8}\x{bca}-\x{bcc}\x{bd0}\x{bd7}\x{be6}-\x{bf2}\x{c01}-\x{c03}\x{c05}-\x{c0c}\x{c0e}-\x{c10}\x{c12}-\x{c28}\x{c2a}-\x{c39}\x{c3d}\x{c41}-\x{c44}\x{c58}-\x{c5a}\x{c60}\x{c61}\x{c66}-\x{c6f}\x{c7f}\x{c82}\x{c83}\x{c85}-\x{c8c}\x{c8e}-\x{c90}\x{c92}-\x{ca8}\x{caa}-\x{cb3}\x{cb5}-\x{cb9}\x{cbd}-\x{cc4}\x{cc6}-\x{cc8}\x{cca}\x{ccb}\x{cd5}\x{cd6}\x{cde}\x{ce0}\x{ce1}\x{ce6}-\x{cef}\x{cf1}\x{cf2}\x{d02}\x{d03}\x{d05}-\x{d0c}\x{d0e}-\x{d10}\x{d12}-\x{d3a}\x{d3d}-\x{d40}\x{d46}-\x{d48}\x{d4a}-\x{d4c}\x{d4e}\x{d57}\x{d5f}-\x{d61}\x{d66}-\x{d75}\x{d79}-\x{d7f}\x{d82}\x{d83}\x{d85}-\x{d96}\x{d9a}-\x{db1}\x{db3}-\x{dbb}\x{dbd}\x{dc0}-\x{dc6}\x{dcf}-\x{dd1}\x{dd8}-\x{ddf}\x{de6}-\x{def}\x{df2}-\x{df4}\x{e01}-\x{e30}\x{e32}\x{e33}\x{e40}-\x{e46}\x{e4f}-\x{e5b}\x{e81}\x{e82}\x{e84}\x{e87}\x{e88}\x{e8a}\x{e8d}\x{e94}-\x{e97}\x{e99}-\x{e9f}\x{ea1}-\x{ea3}\x{ea5}\x{ea7}\x{eaa}\x{eab}\x{ead}-\x{eb0}\x{eb2}\x{eb3}\x{ebd}\x{ec0}-\x{ec4}\x{ec6}\x{ed0}-\x{ed9}\x{edc}-\x{edf}\x{f00}-\x{f17}\x{f1a}-\x{f34}\x{f36}\x{f38}\x{f3e}-\x{f47}\x{f49}-\x{f6c}\x{f7f}\x{f85}\x{f88}-\x{f8c}\x{fbe}-\x{fc5}\x{fc7}-\x{fcc}\x{fce}-\x{fda}\x{1000}-\x{102c}\x{1031}\x{1038}\x{103b}\x{103c}\x{103f}-\x{1057}\x{105a}-\x{105d}\x{1061}-\x{1070}\x{1075}-\x{1081}\x{1083}\x{1084}\x{1087}-\x{108c}\x{108e}-\x{109c}\x{109e}-\x{10c5}\x{10c7}\x{10cd}\x{10d0}-\x{1248}\x{124a}-\x{124d}\x{1250}-\x{1256}\x{1258}\x{125a}-\x{125d}\x{1260}-\x{1288}\x{128a}-\x{128d}\x{1290}-\x{12b0}\x{12b2}-\x{12b5}\x{12b8}-\x{12be}\x{12c0}\x{12c2}-\x{12c5}\x{12c8}-\x{12d6}\x{12d8}-\x{1310}\x{1312}-\x{1315}\x{1318}-\x{135a}\x{1360}-\x{137c}\x{1380}-\x{138f}\x{13a0}-\x{13f5}\x{13f8}-\x{13fd}\x{1401}-\x{167f}\x{1681}-\x{169a}\x{16a0}-\x{16f8}\x{1700}-\x{170c}\x{170e}-\x{1711}\x{1720}-\x{1731}\x{1735}\x{1736}\x{1740}-\x{1751}\x{1760}-\x{176c}\x{176e}-\x{1770}\x{1780}-\x{17b3}\x{17b6}\x{17be}-\x{17c5}\x{17c7}\x{17c8}\x{17d4}-\x{17da}\x{17dc}\x{17e0}-\x{17e9}\x{1810}-\x{1819}\x{1820}-\x{1877}\x{1880}-\x{18a8}\x{18aa}\x{18b0}-\x{18f5}\x{1900}-\x{191e}\x{1923}-\x{1926}\x{1929}-\x{192b}\x{1930}\x{1931}\x{1933}-\x{1938}\x{1946}-\x{196d}\x{1970}-\x{1974}\x{1980}-\x{19ab}\x{19b0}-\x{19c9}\x{19d0}-\x{19da}\x{1a00}-\x{1a16}\x{1a19}\x{1a1a}\x{1a1e}-\x{1a55}\x{1a57}\x{1a61}\x{1a63}\x{1a64}\x{1a6d}-\x{1a72}\x{1a80}-\x{1a89}\x{1a90}-\x{1a99}\x{1aa0}-\x{1aad}\x{1b04}-\x{1b33}\x{1b35}\x{1b3b}\x{1b3d}-\x{1b41}\x{1b43}-\x{1b4b}\x{1b50}-\x{1b6a}\x{1b74}-\x{1b7c}\x{1b82}-\x{1ba1}\x{1ba6}\x{1ba7}\x{1baa}\x{1bae}-\x{1be5}\x{1be7}\x{1bea}-\x{1bec}\x{1bee}\x{1bf2}\x{1bf3}\x{1bfc}-\x{1c2b}\x{1c34}\x{1c35}\x{1c3b}-\x{1c49}\x{1c4d}-\x{1c7f}\x{1cc0}-\x{1cc7}\x{1cd3}\x{1ce1}\x{1ce9}-\x{1cec}\x{1cee}-\x{1cf3}\x{1cf5}\x{1cf6}\x{1d00}-\x{1dbf}\x{1e00}-\x{1f15}\x{1f18}-\x{1f1d}\x{1f20}-\x{1f45}\x{1f48}-\x{1f4d}\x{1f50}-\x{1f57}\x{1f59}\x{1f5b}\x{1f5d}\x{1f5f}-\x{1f7d}\x{1f80}-\x{1fb4}\x{1fb6}-\x{1fbc}\x{1fbe}\x{1fc2}-\x{1fc4}\x{1fc6}-\x{1fcc}\x{1fd0}-\x{1fd3}\x{1fd6}-\x{1fdb}\x{1fe0}-\x{1fec}\x{1ff2}-\x{1ff4}\x{1ff6}-\x{1ffc}\x{200e}\x{2071}\x{207f}\x{2090}-\x{209c}\x{2102}\x{2107}\x{210a}-\x{2113}\x{2115}\x{2119}-\x{211d}\x{2124}\x{2126}\x{2128}\x{212a}-\x{212d}\x{212f}-\x{2139}\x{213c}-\x{213f}\x{2145}-\x{2149}\x{214e}\x{214f}\x{2160}-\x{2188}\x{2336}-\x{237a}\x{2395}\x{249c}-\x{24e9}\x{26ac}\x{2800}-\x{28ff}\x{2c00}-\x{2c2e}\x{2c30}-\x{2c5e}\x{2c60}-\x{2ce4}\x{2ceb}-\x{2cee}\x{2cf2}\x{2cf3}\x{2d00}-\x{2d25}\x{2d27}\x{2d2d}\x{2d30}-\x{2d67}\x{2d6f}\x{2d70}\x{2d80}-\x{2d96}\x{2da0}-\x{2da6}\x{2da8}-\x{2dae}\x{2db0}-\x{2db6}\x{2db8}-\x{2dbe}\x{2dc0}-\x{2dc6}\x{2dc8}-\x{2dce}\x{2dd0}-\x{2dd6}\x{2dd8}-\x{2dde}\x{3005}-\x{3007}\x{3021}-\x{3029}\x{302e}\x{302f}\x{3031}-\x{3035}\x{3038}-\x{303c}\x{3041}-\x{3096}\x{309d}-\x{309f}\x{30a1}-\x{30fa}\x{30fc}-\x{30ff}\x{3105}-\x{312d}\x{3131}-\x{318e}\x{3190}-\x{31ba}\x{31f0}-\x{321c}\x{3220}-\x{324f}\x{3260}-\x{327b}\x{327f}-\x{32b0}\x{32c0}-\x{32cb}\x{32d0}-\x{32fe}\x{3300}-\x{3376}\x{337b}-\x{33dd}\x{33e0}-\x{33fe}\x{3400}-\x{4db5}\x{4e00}-\x{9fd5}\x{a000}-\x{a48c}\x{a4d0}-\x{a60c}\x{a610}-\x{a62b}\x{a640}-\x{a66e}\x{a680}-\x{a69d}\x{a6a0}-\x{a6ef}\x{a6f2}-\x{a6f7}\x{a722}-\x{a787}\x{a789}-\x{a7ad}\x{a7b0}-\x{a7b7}\x{a7f7}-\x{a801}\x{a803}-\x{a805}\x{a807}-\x{a80a}\x{a80c}-\x{a824}\x{a827}\x{a830}-\x{a837}\x{a840}-\x{a873}\x{a880}-\x{a8c3}\x{a8ce}-\x{a8d9}\x{a8f2}-\x{a8fd}\x{a900}-\x{a925}\x{a92e}-\x{a946}\x{a952}\x{a953}\x{a95f}-\x{a97c}\x{a983}-\x{a9b2}\x{a9b4}\x{a9b5}\x{a9ba}\x{a9bb}\x{a9bd}-\x{a9cd}\x{a9cf}-\x{a9d9}\x{a9de}-\x{a9e4}\x{a9e6}-\x{a9fe}\x{aa00}-\x{aa28}\x{aa2f}\x{aa30}\x{aa33}\x{aa34}\x{aa40}-\x{aa42}\x{aa44}-\x{aa4b}\x{aa4d}\x{aa50}-\x{aa59}\x{aa5c}-\x{aa7b}\x{aa7d}-\x{aaaf}\x{aab1}\x{aab5}\x{aab6}\x{aab9}-\x{aabd}\x{aac0}\x{aac2}\x{aadb}-\x{aaeb}\x{aaee}-\x{aaf5}\x{ab01}-\x{ab06}\x{ab09}-\x{ab0e}\x{ab11}-\x{ab16}\x{ab20}-\x{ab26}\x{ab28}-\x{ab2e}\x{ab30}-\x{ab65}\x{ab70}-\x{abe4}\x{abe6}\x{abe7}\x{abe9}-\x{abec}\x{abf0}-\x{abf9}\x{ac00}-\x{d7a3}\x{d7b0}-\x{d7c6}\x{d7cb}-\x{d7fb}\x{e000}-\x{fa6d}\x{fa70}-\x{fad9}\x{fb00}-\x{fb06}\x{fb13}-\x{fb17}\x{ff21}-\x{ff3a}\x{ff41}-\x{ff5a}\x{ff66}-\x{ffbe}\x{ffc2}-\x{ffc7}\x{ffca}-\x{ffcf}\x{ffd2}-\x{ffd7}\x{ffda}-\x{ffdc}\x{10000}-\x{1000b}\x{1000d}-\x{10026}\x{10028}-\x{1003a}\x{1003c}\x{1003d}\x{1003f}-\x{1004d}\x{10050}-\x{1005d}\x{10080}-\x{100fa}\x{10100}\x{10102}\x{10107}-\x{10133}\x{10137}-\x{1013f}\x{101d0}-\x{101fc}\x{10280}-\x{1029c}\x{102a0}-\x{102d0}\x{10300}-\x{10323}\x{10330}-\x{1034a}\x{10350}-\x{10375}\x{10380}-\x{1039d}\x{1039f}-\x{103c3}\x{103c8}-\x{103d5}\x{10400}-\x{1049d}\x{104a0}-\x{104a9}\x{10500}-\x{10527}\x{10530}-\x{10563}\x{1056f}\x{10600}-\x{10736}\x{10740}-\x{10755}\x{10760}-\x{10767}\x{11000}\x{11002}-\x{11037}\x{11047}-\x{1104d}\x{11066}-\x{1106f}\x{11082}-\x{110b2}\x{110b7}\x{110b8}\x{110bb}-\x{110c1}\x{110d0}-\x{110e8}\x{110f0}-\x{110f9}\x{11103}-\x{11126}\x{1112c}\x{11136}-\x{11143}\x{11150}-\x{11172}\x{11174}-\x{11176}\x{11182}-\x{111b5}\x{111bf}-\x{111c9}\x{111cd}\x{111d0}-\x{111df}\x{111e1}-\x{111f4}\x{11200}-\x{11211}\x{11213}-\x{1122e}\x{11232}\x{11233}\x{11235}\x{11238}-\x{1123d}\x{11280}-\x{11286}\x{11288}\x{1128a}-\x{1128d}\x{1128f}-\x{1129d}\x{1129f}-\x{112a9}\x{112b0}-\x{112de}\x{112e0}-\x{112e2}\x{112f0}-\x{112f9}\x{11302}\x{11303}\x{11305}-\x{1130c}\x{1130f}\x{11310}\x{11313}-\x{11328}\x{1132a}-\x{11330}\x{11332}\x{11333}\x{11335}-\x{11339}\x{1133d}-\x{1133f}\x{11341}-\x{11344}\x{11347}\x{11348}\x{1134b}-\x{1134d}\x{11350}\x{11357}\x{1135d}-\x{11363}\x{11480}-\x{114b2}\x{114b9}\x{114bb}-\x{114be}\x{114c1}\x{114c4}-\x{114c7}\x{114d0}-\x{114d9}\x{11580}-\x{115b1}\x{115b8}-\x{115bb}\x{115be}\x{115c1}-\x{115db}\x{11600}-\x{11632}\x{1163b}\x{1163c}\x{1163e}\x{11641}-\x{11644}\x{11650}-\x{11659}\x{11680}-\x{116aa}\x{116ac}\x{116ae}\x{116af}\x{116b6}\x{116c0}-\x{116c9}\x{11700}-\x{11719}\x{11720}\x{11721}\x{11726}\x{11730}-\x{1173f}\x{118a0}-\x{118f2}\x{118ff}\x{11ac0}-\x{11af8}\x{12000}-\x{12399}\x{12400}-\x{1246e}\x{12470}-\x{12474}\x{12480}-\x{12543}\x{13000}-\x{1342e}\x{14400}-\x{14646}\x{16800}-\x{16a38}\x{16a40}-\x{16a5e}\x{16a60}-\x{16a69}\x{16a6e}\x{16a6f}\x{16ad0}-\x{16aed}\x{16af5}\x{16b00}-\x{16b2f}\x{16b37}-\x{16b45}\x{16b50}-\x{16b59}\x{16b5b}-\x{16b61}\x{16b63}-\x{16b77}\x{16b7d}-\x{16b8f}\x{16f00}-\x{16f44}\x{16f50}-\x{16f7e}\x{16f93}-\x{16f9f}\x{1b000}\x{1b001}\x{1bc00}-\x{1bc6a}\x{1bc70}-\x{1bc7c}\x{1bc80}-\x{1bc88}\x{1bc90}-\x{1bc99}\x{1bc9c}\x{1bc9f}\x{1d000}-\x{1d0f5}\x{1d100}-\x{1d126}\x{1d129}-\x{1d166}\x{1d16a}-\x{1d172}\x{1d183}\x{1d184}\x{1d18c}-\x{1d1a9}\x{1d1ae}-\x{1d1e8}\x{1d360}-\x{1d371}\x{1d400}-\x{1d454}\x{1d456}-\x{1d49c}\x{1d49e}\x{1d49f}\x{1d4a2}\x{1d4a5}\x{1d4a6}\x{1d4a9}-\x{1d4ac}\x{1d4ae}-\x{1d4b9}\x{1d4bb}\x{1d4bd}-\x{1d4c3}\x{1d4c5}-\x{1d505}\x{1d507}-\x{1d50a}\x{1d50d}-\x{1d514}\x{1d516}-\x{1d51c}\x{1d51e}-\x{1d539}\x{1d53b}-\x{1d53e}\x{1d540}-\x{1d544}\x{1d546}\x{1d54a}-\x{1d550}\x{1d552}-\x{1d6a5}\x{1d6a8}-\x{1d6da}\x{1d6dc}-\x{1d714}\x{1d716}-\x{1d74e}\x{1d750}-\x{1d788}\x{1d78a}-\x{1d7c2}\x{1d7c4}-\x{1d7cb}\x{1d800}-\x{1d9ff}\x{1da37}-\x{1da3a}\x{1da6d}-\x{1da74}\x{1da76}-\x{1da83}\x{1da85}-\x{1da8b}\x{1f110}-\x{1f12e}\x{1f130}-\x{1f169}\x{1f170}-\x{1f19a}\x{1f1e6}-\x{1f202}\x{1f210}-\x{1f23a}\x{1f240}-\x{1f248}\x{1f250}\x{1f251}\x{20000}-\x{2a6d6}\x{2a700}-\x{2b734}\x{2b740}-\x{2b81d}\x{2b820}-\x{2cea1}\x{2f800}-\x{2fa1d}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}])|([\x{590}\x{5be}\x{5c0}\x{5c3}\x{5c6}\x{5c8}-\x{5ff}\x{7c0}-\x{7ea}\x{7f4}\x{7f5}\x{7fa}-\x{815}\x{81a}\x{824}\x{828}\x{82e}-\x{858}\x{85c}-\x{89f}\x{200f}\x{fb1d}\x{fb1f}-\x{fb28}\x{fb2a}-\x{fb4f}\x{10800}-\x{1091e}\x{10920}-\x{10a00}\x{10a04}\x{10a07}-\x{10a0b}\x{10a10}-\x{10a37}\x{10a3b}-\x{10a3e}\x{10a40}-\x{10ae4}\x{10ae7}-\x{10b38}\x{10b40}-\x{10e5f}\x{10e7f}-\x{10fff}\x{1e800}-\x{1e8cf}\x{1e8d7}-\x{1edff}\x{1ef00}-\x{1efff}\x{608}\x{60b}\x{60d}\x{61b}-\x{64a}\x{66d}-\x{66f}\x{671}-\x{6d5}\x{6e5}\x{6e6}\x{6ee}\x{6ef}\x{6fa}-\x{710}\x{712}-\x{72f}\x{74b}-\x{7a5}\x{7b1}-\x{7bf}\x{8a0}-\x{8e2}\x{fb50}-\x{fd3d}\x{fd40}-\x{fdcf}\x{fdf0}-\x{fdfc}\x{fdfe}\x{fdff}\x{fe70}-\x{fefe}\x{1ee00}-\x{1eeef}\x{1eef2}-\x{1eeff}]))/u'; // @codeCoverageIgnoreEnd /** * @internal Calling this directly is deprecated. Use LanguageFactory instead. * * @param string|null $code Which code to use. Passing null is deprecated in 1.35, hard-deprecated since 1.43. * @param NamespaceInfo|null $namespaceInfo * @param LocalisationCache|null $localisationCache * @param LanguageNameUtils|null $langNameUtils * @param LanguageFallback|null $langFallback * @param LanguageConverterFactory|null $converterFactory * @param HookContainer|null $hookContainer * @param Config|null $config */ public function __construct( $code = null, ?NamespaceInfo $namespaceInfo = null, ?LocalisationCache $localisationCache = null, ?LanguageNameUtils $langNameUtils = null, ?LanguageFallback $langFallback = null, ?LanguageConverterFactory $converterFactory = null, ?HookContainer $hookContainer = null, ?Config $config = null ) { if ( !func_num_args() ) { // Old calling convention, deprecated wfDeprecatedMsg( __METHOD__ . ' without providing all services is deprecated', '1.35' ); if ( static::class === 'Language' ) { $this->mCode = 'en'; } else { $this->mCode = str_replace( '_', '-', strtolower( substr( static::class, 8 ) ) ); } $services = MediaWikiServices::getInstance(); $this->namespaceInfo = $services->getNamespaceInfo(); $this->localisationCache = $services->getLocalisationCache(); $this->langNameUtils = $services->getLanguageNameUtils(); $this->langFallback = $services->getLanguageFallback(); $this->converterFactory = $services->getLanguageConverterFactory(); $this->hookContainer = $services->getHookContainer(); $this->hookRunner = new HookRunner( $this->hookContainer ); $this->config = $services->getMainConfig(); return; } Assert::parameter( $code !== null, '$code', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $namespaceInfo !== null, '$namespaceInfo', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $localisationCache !== null, '$localisationCache', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $langNameUtils !== null, '$langNameUtils', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $langFallback !== null, '$langFallback', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $converterFactory !== null, '$converterFactory', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $hookContainer !== null, '$hookContainer', 'Parameters cannot be null unless all are omitted' ); Assert::parameter( $config !== null, '$config', 'Parameters cannot be null unless all are omitted' ); $this->mCode = $code; $this->namespaceInfo = $namespaceInfo; $this->localisationCache = $localisationCache; $this->langNameUtils = $langNameUtils; $this->langFallback = $langFallback; $this->converterFactory = $converterFactory; $this->hookContainer = $hookContainer; $this->hookRunner = new HookRunner( $hookContainer ); $this->config = $config; } /** * @return array * @since 1.19 */ public function getFallbackLanguages() { return $this->langFallback->getAll( $this->mCode ); } /** * Exports $wgBookstoreListEn * @return array */ public function getBookstoreList() { return $this->localisationCache->getItem( $this->mCode, 'bookstoreList' ); } /** * Returns an array of localised namespaces indexed by their numbers. If the namespace is not * available in localised form, it will be included in English. * * @return array<int,string> List of localized namespace names, indexed by numeric namespace ID. */ public function getNamespaces() { if ( $this->namespaceNames === null ) { $metaNamespace = $this->config->get( MainConfigNames::MetaNamespace ); $metaNamespaceTalk = $this->config->get( MainConfigNames::MetaNamespaceTalk ); $extraNamespaces = $this->config->get( MainConfigNames::ExtraNamespaces ); $validNamespaces = $this->namespaceInfo->getCanonicalNamespaces(); // @phan-suppress-next-line PhanTypeMismatchProperty $this->namespaceNames = $extraNamespaces + $this->localisationCache->getItem( $this->mCode, 'namespaceNames' ); // @phan-suppress-next-line PhanTypeInvalidLeftOperand $this->namespaceNames += $validNamespaces; $this->namespaceNames[NS_PROJECT] = $metaNamespace; if ( $metaNamespaceTalk ) { $this->namespaceNames[NS_PROJECT_TALK] = $metaNamespaceTalk; } else { $talk = $this->namespaceNames[NS_PROJECT_TALK]; $this->namespaceNames[NS_PROJECT_TALK] = $this->fixVariableInNamespace( $talk ); } # Sometimes a language will be localised but not actually exist on this wiki. foreach ( $this->namespaceNames as $key => $text ) { if ( !isset( $validNamespaces[$key] ) ) { unset( $this->namespaceNames[$key] ); } } # The above mixing may leave namespaces out of canonical order. # Re-order by namespace ID number... ksort( $this->namespaceNames ); $this->getHookRunner()->onLanguageGetNamespaces( $this->namespaceNames ); } return $this->namespaceNames; } /** * Arbitrarily set all the namespace names at once. Mainly used for testing * @param string[] $namespaces Array of namespaces (id => name) */ public function setNamespaces( array $namespaces ) { $this->namespaceNames = $namespaces; $this->mNamespaceIds = null; } /** * Resets all the namespace caches. Mainly used for testing * @deprecated since 1.39 Use MediaWikiServices::resetServiceForTesting() instead. */ public function resetNamespaces() { $this->namespaceNames = null; $this->mNamespaceIds = null; $this->namespaceAliases = null; } /** * A convenience function that returns getNamespaces() with spaces instead of underscores * in values. Useful for producing output to be displayed e.g. in `<select>` forms. * * @return string[] */ public function getFormattedNamespaces() { $ns = $this->getNamespaces(); foreach ( $ns as $k => $v ) { $ns[$k] = strtr( $v, '_', ' ' ); } return $ns; } /** * Get a namespace value by key * * Namespace name uses underscores (not spaces), e.g. 'MediaWiki_talk'. * * <code> * $mw_ns = $lang->getNsText( NS_MEDIAWIKI_TALK ); * echo $mw_ns; // prints 'MediaWiki_talk' * </code> * * @param int $index The array key of the namespace to return * @return string|false String if the namespace value exists, otherwise false */ public function getNsText( $index ) { $ns = $this->getNamespaces(); return $ns[$index] ?? false; } /** * A convenience function that returns the same thing as * getNsText() except with '_' changed to ' ', useful for * producing output. * * <code> * $mw_ns = $lang->getFormattedNsText( NS_MEDIAWIKI_TALK ); * echo $mw_ns; // prints 'MediaWiki talk' * </code> * * @param int $index The array key of the namespace to return * @return string Namespace name without underscores (empty string if namespace does not exist) */ public function getFormattedNsText( $index ) { $ns = $this->getNsText( $index ); return $ns === false ? '' : strtr( $ns, '_', ' ' ); } /** * Returns gender-dependent namespace alias if available. * See https://www.mediawiki.org/wiki/Manual:$wgExtraGenderNamespaces * @param int $index Namespace index * @param string $gender Gender key (male, female... ) * @return string|false * @since 1.18 */ public function getGenderNsText( $index, $gender ) { $extraGenderNamespaces = $this->config->get( MainConfigNames::ExtraGenderNamespaces ); $ns = $extraGenderNamespaces + (array)$this->localisationCache->getItem( $this->mCode, 'namespaceGenderAliases' ); return $ns[$index][$gender] ?? $this->getNsText( $index ); } /** * Whether this language uses gender-dependent namespace aliases. * See https://www.mediawiki.org/wiki/Manual:$wgExtraGenderNamespaces * @return bool * @since 1.18 */ public function needsGenderDistinction() { $extraGenderNamespaces = $this->config->get( MainConfigNames::ExtraGenderNamespaces ); $extraNamespaces = $this->config->get( MainConfigNames::ExtraNamespaces ); if ( count( $extraGenderNamespaces ) > 0 ) { // $wgExtraGenderNamespaces overrides everything return true; } elseif ( isset( $extraNamespaces[NS_USER] ) && isset( $extraNamespaces[NS_USER_TALK] ) ) { // @todo There may be other gender namespace than NS_USER & NS_USER_TALK in the future // $wgExtraNamespaces overrides any gender aliases specified in i18n files return false; } else { // Check what is in i18n files $aliases = $this->localisationCache->getItem( $this->mCode, 'namespaceGenderAliases' ); return count( $aliases ) > 0; } } /** * Get a namespace key by case-insensitive value. * Only matches namespace names for the current language, not the * canonical ones defined in Namespace.php. * * @param string $text * @return int|false An integer if $text is a valid value otherwise false */ public function getLocalNsIndex( $text ) { $lctext = $this->lc( $text ); $ids = $this->getNamespaceIds(); return $ids[$lctext] ?? false; } /** * @return array<string,int> Map from names to namespace IDs. Note that each * namespace ID can have multiple alias. */ public function getNamespaceAliases() { if ( $this->namespaceAliases === null ) { $aliases = $this->localisationCache->getItem( $this->mCode, 'namespaceAliases' ); if ( !$aliases ) { $aliases = []; } else { foreach ( $aliases as $name => $index ) { if ( $index === NS_PROJECT_TALK ) { unset( $aliases[$name] ); $name = $this->fixVariableInNamespace( $name ); $aliases[$name] = $index; } } } $extraGenderNamespaces = $this->config->get( MainConfigNames::ExtraGenderNamespaces ); $genders = $extraGenderNamespaces + (array)$this->localisationCache ->getItem( $this->mCode, 'namespaceGenderAliases' ); foreach ( $genders as $index => $forms ) { foreach ( $forms as $alias ) { $aliases[$alias] = $index; } } $langConverter = $this->getConverterInternal(); # Also add converted namespace names as aliases, to avoid confusion. $convertedNames = []; foreach ( $langConverter->getVariants() as $variant ) { if ( $variant === $this->mCode ) { continue; } foreach ( $this->getNamespaces() as $ns => $_ ) { $convertedNames[$langConverter->convertNamespace( $ns, $variant )] = $ns; } } $this->namespaceAliases = $aliases + $convertedNames; // In the case of conflicts between $wgNamespaceAliases and other sources // of aliasing, $wgNamespaceAliases wins. $this->namespaceAliases = $this->config->get( MainConfigNames::NamespaceAliases ) + $this->namespaceAliases; # Filter out aliases to namespaces that don't exist, e.g. from extensions # that aren't loaded here but are included in the l10n cache. # (array_intersect preserves keys from its first argument) $this->namespaceAliases = array_intersect( $this->namespaceAliases, array_keys( $this->getNamespaces() ) ); } return $this->namespaceAliases; } /** * @return array<string,int> indexed by localized lower-cased namespace name */ public function getNamespaceIds() { if ( $this->mNamespaceIds === null ) { # Put namespace names and aliases into a hashtable. # If this is too slow, then we should arrange it so that it is done # before caching. The catch is that at pre-cache time, the above # class-specific fixup hasn't been done. $this->mNamespaceIds = []; foreach ( $this->getNamespaces() as $index => $name ) { $this->mNamespaceIds[$this->lc( $name )] = $index; } foreach ( $this->getNamespaceAliases() as $name => $index ) { $this->mNamespaceIds[$this->lc( $name )] = $index; } } return $this->mNamespaceIds; } /** * Get a namespace key by case-insensitive value. Canonical namespace * names override custom ones defined for the current language. * * @param string $text * @return int|false An integer if $text is a valid value otherwise false */ public function getNsIndex( $text ) { $lctext = $this->lc( $text ); $ns = $this->namespaceInfo->getCanonicalIndex( $lctext ); if ( $ns !== null ) { return $ns; } $ids = $this->getNamespaceIds(); return $ids[$lctext] ?? false; } /** * Short names for language variants used for language conversion links. * * @param string $code * @param bool $usemsg Use the "variantname-xyz" message if it exists * @return string */ public function getVariantname( $code, $usemsg = true ) { if ( $usemsg ) { $msg = $this->msg( "variantname-$code" ); if ( $msg->exists() ) { return $msg->text(); } } $name = $this->langNameUtils->getLanguageName( $code ); if ( $name ) { return $name; # if it's defined as a language name, show that } else { # otherwise, output the language code return $code; } } /** * @return string[]|false List of date format preference keys, or false if disabled. */ public function getDatePreferences() { return $this->localisationCache->getItem( $this->mCode, 'datePreferences' ); } /** * @return string[] */ public function getDateFormats() { return $this->localisationCache->getItem( $this->mCode, 'dateFormats' ); } /** * @return string */ public function getDefaultDateFormat() { $df = $this->localisationCache->getItem( $this->mCode, 'defaultDateFormat' ); if ( $df === 'dmy or mdy' ) { return $this->config->get( MainConfigNames::AmericanDates ) ? 'mdy' : 'dmy'; } else { return $df; } } /** * @return string[] */ public function getDatePreferenceMigrationMap() { return $this->localisationCache->getItem( $this->mCode, 'datePreferenceMigrationMap' ); } /** * Get a message from the MediaWiki namespace. * * @param string $msg Message name * @return string */ public function getMessageFromDB( $msg ) { return $this->msg( $msg )->text(); } /** * Gets the Message object from this language. Only for use inside this class. * * @param string $msg Message name * @param mixed ...$params Message parameters * @return Message */ protected function msg( $msg, ...$params ) { return wfMessage( $msg, ...$params )->inLanguage( $this ); } /** * @param int $key Number from 1 to 12 * @return string */ public function getMonthName( $key ) { return $this->getMessageFromDB( self::MONTH_MESSAGES[$key - 1] ); } /** * @return string[] Indexed from 0 to 11 */ public function getMonthNamesArray() { $monthNames = [ '' ]; for ( $i = 1; $i <= 12; $i++ ) { $monthNames[] = $this->getMonthName( $i ); } return $monthNames; } /** * @param int $key Number from 1 to 12 * @return string */ public function getMonthNameGen( $key ) { return $this->getMessageFromDB( self::MONTH_GENITIVE_MESSAGES[$key - 1] ); } /** * @param int $key Number from 1 to 12 * @return string */ public function getMonthAbbreviation( $key ) { return $this->getMessageFromDB( self::MONTH_ABBREVIATED_MESSAGES[$key - 1] ); } /** * @return string[] Indexed from 0 to 11 */ public function getMonthAbbreviationsArray() { $monthNames = [ '' ]; for ( $i = 1; $i <= 12; $i++ ) { $monthNames[] = $this->getMonthAbbreviation( $i ); } return $monthNames; } /** * @param int $key Number from 1 to 7 * @return string */ public function getWeekdayName( $key ) { return $this->getMessageFromDB( self::WEEKDAY_MESSAGES[$key - 1] ); } /** * @param int $key Number from 1 to 7 * @return string */ public function getWeekdayAbbreviation( $key ) { return $this->getMessageFromDB( self::WEEKDAY_ABBREVIATED_MESSAGES[$key - 1] ); } /** * Pass through the result from $dateTimeObj->format() * * @param DateTime|false|null &$dateTimeObj * @param string $ts * @param DateTimeZone|false|null $zone * @param string $code * @return string */ private static function dateTimeObjFormat( &$dateTimeObj, $ts, $zone, $code ) { if ( !$dateTimeObj ) { $dateTimeObj = DateTime::createFromFormat( 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' ) ); } return $dateTimeObj->format( $code ); } /** * This is a workalike of PHP's date() function, but with better * internationalisation, a reduced set of format characters, and a better * escaping format. * * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrUeIOPTZ. See * the PHP manual for definitions. There are a number of extensions, which * start with "x": * * xn Do not translate digits of the next numeric format character * xN Toggle raw digit (xn) flag, stays set until explicitly unset * xr Use roman numerals for the next numeric format character * xh Use hebrew numerals for the next numeric format character * xx Literal x * xg Genitive month name * * xij j (day number) in Iranian calendar * xiF F (month name) in Iranian calendar * xin n (month number) in Iranian calendar * xiy y (two digit year) in Iranian calendar * xiY Y (full year) in Iranian calendar * xit t (days in month) in Iranian calendar * xiz z (day of the year) in Iranian calendar * * xjj j (day number) in Hebrew calendar * xjF F (month name) in Hebrew calendar * xjt t (days in month) in Hebrew calendar * xjx xg (genitive month name) in Hebrew calendar * xjn n (month number) in Hebrew calendar * xjY Y (full year) in Hebrew calendar * * xmj j (day number) in Hijri calendar * xmF F (month name) in Hijri calendar * xmn n (month number) in Hijri calendar * xmY Y (full year) in Hijri calendar * * xkY Y (full year) in Thai solar calendar. Months and days are * identical to the Gregorian calendar * xoY Y (full year) in Minguo calendar or Juche year. * Months and days are identical to the * Gregorian calendar * xtY Y (full year) in Japanese nengo. Months and days are * identical to the Gregorian calendar * * Characters enclosed in double quotes will be considered literal (with * the quotes themselves removed). Unmatched quotes will be considered * literal quotes. Example: * * "The month is" F => The month is January * i's" => 20'11" * * Backslash escaping is also supported. * * Input timestamp is assumed to be pre-normalized to the desired local * time zone, if any. Note that the format characters crUeIOPTZ will assume * $ts is UTC if $zone is not given. * * @param string $format * @param string $ts 14-character timestamp * YYYYMMDDHHMMSS * 01234567890123 * @param DateTimeZone|null $zone Timezone of $ts * @param int|null &$ttl The amount of time (in seconds) the output may be cached for. * Only makes sense if $ts is the current time. * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai? * * @return string * @return-taint tainted */ public function sprintfDate( $format, $ts, ?DateTimeZone $zone = null, &$ttl = 'unused' ) { // @phan-suppress-previous-line PhanTypeMismatchDefault Type mismatch on pass-by-ref args $s = ''; $raw = false; $roman = false; $hebrewNum = false; $dateTimeObj = false; $rawToggle = false; $iranian = false; $hebrew = false; $hijri = false; $thai = false; $minguo = false; $tenno = false; $usedSecond = false; $usedMinute = false; $usedHour = false; $usedAMPM = false; $usedDay = false; $usedWeek = false; $usedMonth = false; $usedYear = false; $usedISOYear = false; $usedIsLeapYear = false; $usedHebrewMonth = false; $usedIranianMonth = false; $usedHijriMonth = false; $usedHebrewYear = false; $usedIranianYear = false; $usedHijriYear = false; $usedTennoYear = false; if ( strlen( $ts ) !== 14 ) { throw new InvalidArgumentException( __METHOD__ . ": The timestamp $ts should have 14 characters" ); } if ( !ctype_digit( $ts ) ) { throw new InvalidArgumentException( __METHOD__ . ": The timestamp $ts should be a number" ); } $formatLength = strlen( $format ); for ( $p = 0; $p < $formatLength; $p++ ) { $num = false; $code = $format[$p]; if ( $code == 'x' && $p < $formatLength - 1 ) { $code .= $format[++$p]; } if ( ( $code === 'xi' || $code === 'xj' || $code === 'xk' || $code === 'xm' || $code === 'xo' || $code === 'xt' ) && $p < $formatLength - 1 ) { $code .= $format[++$p]; } switch ( $code ) { case 'xx': $s .= 'x'; break; case 'xn': $raw = true; break; case 'xN': $rawToggle = !$rawToggle; break; case 'xr': $roman = true; break; case 'xh': $hebrewNum = true; break; case 'xg': $usedMonth = true; $s .= $this->getMonthNameGen( (int)substr( $ts, 4, 2 ) ); break; case 'xjx': $usedHebrewMonth = true; if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $s .= $this->getMessageFromDB( self::HEBREW_CALENDAR_MONTH_GENITIVE_MESSAGES[$hebrew[1] - 1] ); break; case 'd': $usedDay = true; $num = substr( $ts, 6, 2 ); break; case 'D': $usedDay = true; $s .= $this->getWeekdayAbbreviation( (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'w' ) + 1 ); break; case 'j': $usedDay = true; $num = intval( substr( $ts, 6, 2 ) ); break; case 'xij': $usedDay = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $num = $iranian[2]; break; case 'xmj': $usedDay = true; if ( !$hijri ) { $hijri = self::tsToHijri( $ts ); } $num = $hijri[2]; break; case 'xjj': $usedDay = true; if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $num = $hebrew[2]; break; case 'l': $usedDay = true; $s .= $this->getWeekdayName( (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'w' ) + 1 ); break; case 'F': $usedMonth = true; $s .= $this->getMonthName( (int)substr( $ts, 4, 2 ) ); break; case 'xiF': $usedIranianMonth = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $s .= $this->getMessageFromDB( self::IRANIAN_CALENDAR_MONTHS_MESSAGES[$iranian[1] - 1] ); break; case 'xmF': $usedHijriMonth = true; if ( !$hijri ) { $hijri = self::tsToHijri( $ts ); } $s .= $this->getMessageFromDB( self::HIJRI_CALENDAR_MONTH_MESSAGES[$hijri[1] - 1] ); break; case 'xjF': $usedHebrewMonth = true; if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $s .= $this->getMessageFromDB( self::HEBREW_CALENDAR_MONTHS_MESSAGES[$hebrew[1] - 1] ); break; case 'm': $usedMonth = true; $num = substr( $ts, 4, 2 ); break; case 'M': $usedMonth = true; $s .= $this->getMonthAbbreviation( (int)substr( $ts, 4, 2 ) ); break; case 'n': $usedMonth = true; $num = intval( substr( $ts, 4, 2 ) ); break; case 'xin': $usedIranianMonth = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $num = $iranian[1]; break; case 'xmn': $usedHijriMonth = true; if ( !$hijri ) { $hijri = self::tsToHijri( $ts ); } $num = $hijri[1]; break; case 'xjn': $usedHebrewMonth = true; if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $num = $hebrew[1]; break; case 'xjt': $usedHebrewMonth = true; if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $num = $hebrew[3]; break; case 'Y': $usedYear = true; $num = substr( $ts, 0, 4 ); break; case 'xiY': $usedIranianYear = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $num = $iranian[0]; break; case 'xmY': $usedHijriYear = true; if ( !$hijri ) { $hijri = self::tsToHijri( $ts ); } $num = $hijri[0]; break; case 'xjY': $usedHebrewYear = true; if ( !$hebrew ) { $hebrew = self::tsToHebrew( $ts ); } $num = $hebrew[0]; break; case 'xkY': $usedYear = true; if ( !$thai ) { $thai = self::tsToYear( $ts, 'thai' ); } $num = $thai[0]; break; case 'xoY': $usedYear = true; if ( !$minguo ) { $minguo = self::tsToYear( $ts, 'minguo' ); } $num = $minguo[0]; break; case 'xtY': $usedTennoYear = true; if ( !$tenno ) { $tenno = self::tsToJapaneseGengo( $ts ); } $num = $tenno; break; case 'y': $usedYear = true; $num = substr( $ts, 2, 2 ); break; case 'xiy': $usedIranianYear = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $num = substr( (string)$iranian[0], -2 ); break; case 'xit': $usedIranianYear = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $num = self::IRANIAN_DAYS[$iranian[1] - 1]; break; case 'xiz': $usedIranianYear = true; if ( !$iranian ) { $iranian = self::tsToIranian( $ts ); } $num = $iranian[3]; break; case 'a': $usedAMPM = true; $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm'; break; case 'A': $usedAMPM = true; $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM'; break; case 'g': $usedHour = true; $h = (int)substr( $ts, 8, 2 ); $num = $h % 12 ?: 12; break; case 'G': $usedHour = true; $num = intval( substr( $ts, 8, 2 ) ); break; case 'h': $usedHour = true; $h = (int)substr( $ts, 8, 2 ); $num = sprintf( '%02d', $h % 12 ?: 12 ); break; case 'H': $usedHour = true; $num = substr( $ts, 8, 2 ); break; case 'i': $usedMinute = true; $num = substr( $ts, 10, 2 ); break; case 's': $usedSecond = true; $num = substr( $ts, 12, 2 ); break; case 'c': case 'r': $usedSecond = true; // fall through case 'e': case 'O': case 'P': case 'T': $s .= self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case 'w': case 'N': case 'z': $usedDay = true; $num = self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case 'W': $usedWeek = true; $num = self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case 't': $usedMonth = true; $num = self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case 'L': $usedIsLeapYear = true; $num = self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case 'o': $usedISOYear = true; $num = self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case 'U': $usedSecond = true; // fall through case 'I': case 'Z': $num = self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code ); break; case '\\': # Backslash escaping if ( $p < $formatLength - 1 ) { $s .= $format[++$p]; } else { $s .= '\\'; } break; case '"': # Quoted literal if ( $p < $formatLength - 1 ) { $endQuote = strpos( $format, '"', $p + 1 ); if ( $endQuote === false ) { # No terminating quote, assume literal " $s .= '"'; } else { $s .= substr( $format, $p + 1, $endQuote - $p - 1 ); $p = $endQuote; } } else { # Quote at the end of the string, assume literal " $s .= '"'; } break; default: $s .= $format[$p]; } if ( $num !== false ) { if ( $rawToggle || $raw ) { $s .= $num; $raw = false; } elseif ( $roman ) { $s .= self::romanNumeral( $num ); $roman = false; } elseif ( $hebrewNum ) { $s .= self::hebrewNumeral( $num ); $hebrewNum = false; } elseif ( preg_match( '/^[\d.]+$/', $num ) ) { $s .= $this->formatNumNoSeparators( $num ); } else { $s .= $num; } } } if ( $ttl === 'unused' ) { // No need to calculate the TTL, the caller won't use it anyway. } elseif ( $usedSecond ) { $ttl = 1; } elseif ( $usedMinute ) { $ttl = 60 - (int)substr( $ts, 12, 2 ); } elseif ( $usedHour ) { $ttl = 3600 - (int)substr( $ts, 10, 2 ) * 60 - (int)substr( $ts, 12, 2 ); } elseif ( $usedAMPM ) { $ttl = 43200 - ( (int)substr( $ts, 8, 2 ) % 12 ) * 3600 - (int)substr( $ts, 10, 2 ) * 60 - (int)substr( $ts, 12, 2 ); } elseif ( $usedDay || $usedHebrewMonth || $usedIranianMonth || $usedHijriMonth || $usedHebrewYear || $usedIranianYear || $usedHijriYear || $usedTennoYear ) { // @todo Someone who understands the non-Gregorian calendars // should write proper logic for them so that they don't need purged every day. $ttl = 86400 - (int)substr( $ts, 8, 2 ) * 3600 - (int)substr( $ts, 10, 2 ) * 60 - (int)substr( $ts, 12, 2 ); } else { $possibleTtls = []; $timeRemainingInDay = 86400 - (int)substr( $ts, 8, 2 ) * 3600 - (int)substr( $ts, 10, 2 ) * 60 - (int)substr( $ts, 12, 2 ); if ( $usedWeek ) { $possibleTtls[] = ( 7 - (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'N' ) ) * 86400 + $timeRemainingInDay; } elseif ( $usedISOYear ) { // December 28th falls on the last ISO week of the year, every year. // The last ISO week of a year can be 52 or 53. $lastWeekOfISOYear = (int)DateTime::createFromFormat( 'Ymd', (int)substr( $ts, 0, 4 ) . '1228', $zone ?: new DateTimeZone( 'UTC' ) )->format( 'W' ); $currentISOWeek = (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'W' ); $weeksRemaining = $lastWeekOfISOYear - $currentISOWeek; $timeRemainingInWeek = ( 7 - (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'N' ) ) * 86400 + $timeRemainingInDay; $possibleTtls[] = $weeksRemaining * 604800 + $timeRemainingInWeek; } if ( $usedMonth ) { $possibleTtls[] = ( (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 't' ) - (int)substr( $ts, 6, 2 ) ) * 86400 + $timeRemainingInDay; } elseif ( $usedYear ) { $possibleTtls[] = ( (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'L' ) + 364 - (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'z' ) ) * 86400 + $timeRemainingInDay; } elseif ( $usedIsLeapYear ) { $year = (int)substr( $ts, 0, 4 ); $timeRemainingInYear = ( (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'L' ) + 364 - (int)self::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'z' ) ) * 86400 + $timeRemainingInDay; $mod = $year % 4; if ( $mod || ( !( $year % 100 ) && $year % 400 ) ) { // this isn't a leap year. see when the next one starts $nextCandidate = $year - $mod + 4; if ( $nextCandidate % 100 || !( $nextCandidate % 400 ) ) { $possibleTtls[] = ( $nextCandidate - $year - 1 ) * 365 * 86400 + $timeRemainingInYear; } else { $possibleTtls[] = ( $nextCandidate - $year + 3 ) * 365 * 86400 + $timeRemainingInYear; } } else { // this is a leap year, so the next year isn't $possibleTtls[] = $timeRemainingInYear; } } if ( $possibleTtls ) { $ttl = min( $possibleTtls ); } } return $s; } /** * Number of days in each month of the Gregorian calendar */ private const GREG_DAYS = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]; /** * Number of days in each month of the Iranian calendar */ private const IRANIAN_DAYS = [ 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 ]; /** * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert * Gregorian dates to Iranian dates. Originally written in C, it * is released under the terms of GNU Lesser General Public * License. Conversion to PHP was performed by Niklas Laxström. * * Link: http://www.farsiweb.info/jalali/jalali.c * * @param string $ts * * @return int[] */ private static function tsToIranian( $ts ) { $gy = (int)substr( $ts, 0, 4 ) - 1600; $gm = (int)substr( $ts, 4, 2 ) - 1; $gd = (int)substr( $ts, 6, 2 ) - 1; # Days passed from the beginning (including leap years) $gDayNo = 365 * $gy + floor( ( $gy + 3 ) / 4 ) - floor( ( $gy + 99 ) / 100 ) + floor( ( $gy + 399 ) / 400 ); // Add the number of days for the past months of this year for ( $i = 0; $i < $gm; $i++ ) { $gDayNo += self::GREG_DAYS[$i]; } // Leap years if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 ) || $gy % 400 == 0 ) ) { $gDayNo++; } // Days passed in the current month $gDayNo += $gd; $jDayNo = $gDayNo - 79; $jNp = (int)floor( $jDayNo / 12053 ); $jDayNo %= 12053; $jy = 979 + 33 * $jNp + 4 * (int)floor( $jDayNo / 1461 ); $jDayNo %= 1461; if ( $jDayNo >= 366 ) { $jy += (int)floor( ( $jDayNo - 1 ) / 365 ); $jDayNo = (int)floor( ( $jDayNo - 1 ) % 365 ); } $jz = $jDayNo; for ( $i = 0; $i < 11 && $jDayNo >= self::IRANIAN_DAYS[$i]; $i++ ) { $jDayNo -= self::IRANIAN_DAYS[$i]; } $jm = $i + 1; $jd = $jDayNo + 1; return [ $jy, $jm, $jd, $jz ]; } /** * Converting Gregorian dates to Hijri dates. * * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license * * @see https://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0 * * @param string $ts * * @return int[] */ private static function tsToHijri( $ts ) { $year = (int)substr( $ts, 0, 4 ); $month = (int)substr( $ts, 4, 2 ); $day = (int)substr( $ts, 6, 2 ); $zyr = $year; $zd = $day; $zm = $month; $zy = $zyr; if ( ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) || ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) ) ) { $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) + (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) - (int)( ( 3 * (int)( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) / 4 ) + $zd - 32075; } else { $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) + (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777; } $zl = $zjd - 1948440 + 10632; $zn = (int)( ( $zl - 1 ) / 10631 ); $zl = $zl - 10631 * $zn + 354; $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) ); $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29; $zm = (int)( ( 24 * $zl ) / 709 ); $zd = $zl - (int)( ( 709 * $zm ) / 24 ); $zy = 30 * $zn + $zj - 30; return [ $zy, $zm, $zd ]; } /** * Converting Gregorian dates to Hebrew dates. * * Based on a JavaScript code by Abu Mami and Yisrael Hersch * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted * to translate the relevant functions into PHP and release them under * GNU GPL. * * The months are counted from Tishrei = 1. In a leap year, Adar I is 13 * and Adar II is 14. In a non-leap year, Adar is 6. * * @param string $ts * * @return int[] */ private static function tsToHebrew( $ts ) { # Parse date $year = (int)substr( $ts, 0, 4 ); $month = (int)substr( $ts, 4, 2 ); $day = (int)substr( $ts, 6, 2 ); # Calculate Hebrew year $hebrewYear = $year + 3760; # Month number when September = 1, August = 12 $month += 4; if ( $month > 12 ) { # Next year $month -= 12; $year++; $hebrewYear++; } # Calculate day of year from 1 September $dayOfYear = $day; for ( $i = 1; $i < $month; $i++ ) { if ( $i == 6 ) { # February $dayOfYear += 28; # Check if the year is a leap year if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) { $dayOfYear++; } } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) { $dayOfYear += 30; } else { $dayOfYear += 31; } } # Calculate the start of the Hebrew year $start = self::hebrewYearStart( $hebrewYear ); # Calculate next year's start if ( $dayOfYear <= $start ) { # Day is before the start of the year - it is the previous year # Next year's start $nextStart = $start; # Previous year $year--; $hebrewYear--; # Add days since the previous year's 1 September $dayOfYear += 365; if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) { # Leap year $dayOfYear++; } # Start of the new (previous) year $start = self::hebrewYearStart( $hebrewYear ); } else { # Next year's start $nextStart = self::hebrewYearStart( $hebrewYear + 1 ); } # Calculate Hebrew day of year $hebrewDayOfYear = $dayOfYear - $start; # Difference between year's days $diff = $nextStart - $start; # Add 12 (or 13 for leap years) days to ignore the difference between # Hebrew and Gregorian year (353 at least vs. 365/6) - now the # difference is only about the year type if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) { $diff += 13; } else { $diff += 12; } # Check the year pattern, and is leap year # 0 means an incomplete year, 1 means a regular year, 2 means a complete year # This is mod 30, to work on both leap years (which add 30 days of Adar I) # and non-leap years $yearPattern = $diff % 30; # Check if leap year $isLeap = $diff >= 30; # Calculate day in the month from number of day in the Hebrew year # Don't check Adar - if the day is not in Adar, we will stop before; # if it is in Adar, we will use it to check if it is Adar I or Adar II $hebrewDay = $hebrewDayOfYear; $hebrewMonth = 1; $days = 0; while ( $hebrewMonth <= 12 ) { # Calculate days in this month if ( $isLeap && $hebrewMonth == 6 ) { # Leap year - has Adar I, with 30 days, and Adar II, with 29 days $days = 30; if ( $hebrewDay <= $days ) { # Day in Adar I $hebrewMonth = 13; } else { # Subtract the days of Adar I $hebrewDay -= $days; # Try Adar II $days = 29; if ( $hebrewDay <= $days ) { # Day in Adar II $hebrewMonth = 14; } } } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) { # Cheshvan in a complete year (otherwise as the rule below) $days = 30; } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) { # Kislev in an incomplete year (otherwise as the rule below) $days = 29; } else { # Odd months have 30 days, even have 29 $days = 30 - ( $hebrewMonth - 1 ) % 2; } if ( $hebrewDay <= $days ) { # In the current month break; } else { # Subtract the days of the current month $hebrewDay -= $days; # Try in the next month $hebrewMonth++; } } return [ $hebrewYear, $hebrewMonth, $hebrewDay, $days ]; } /** * This calculates the Hebrew year start, as days since 1 September. * Based on Carl Friedrich Gauss algorithm for finding Easter date. * Used for Hebrew date. * * @param int $year * * @return int */ private static function hebrewYearStart( $year ) { $a = ( 12 * ( $year - 1 ) + 17 ) % 19; $b = ( $year - 1 ) % 4; $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 ); if ( $m < 0 ) { $m--; } $Mar = intval( $m ); if ( $m < 0 ) { $m++; } $m -= $Mar; $c = ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7; if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) { $Mar++; } elseif ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) { $Mar += 2; } elseif ( $c == 2 || $c == 4 || $c == 6 ) { $Mar++; } $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24; return $Mar; } /** * Algorithm to convert Gregorian dates to Thai solar dates, * Minguo dates or Minguo dates. * * Link: https://en.wikipedia.org/wiki/Thai_solar_calendar * https://en.wikipedia.org/wiki/Minguo_calendar * * @param string $ts 14-character timestamp * @param string $cName Calendar name * @return array Converted year, month, day */ private static function tsToYear( $ts, $cName ) { $gy = (int)substr( $ts, 0, 4 ); $gm = (int)substr( $ts, 4, 2 ); $gd = (int)substr( $ts, 6, 2 ); if ( $cName === 'thai' ) { # Thai solar dates # Add 543 years to the Gregorian calendar # Months and days are identical $gy_offset = $gy + 543; # fix for dates between 1912 and 1941 # https://en.wikipedia.org/?oldid=836596673#New_year if ( $gy >= 1912 && $gy <= 1940 ) { if ( $gm <= 3 ) { $gy_offset--; } $gm = ( $gm - 3 ) % 12; } } elseif ( $cName === 'minguo' || $cName === 'juche' ) { # Minguo dates # Deduct 1911 years from the Gregorian calendar # Months and days are identical $gy_offset = $gy - 1911; } else { $gy_offset = $gy; } return [ $gy_offset, $gm, $gd ]; } /** * Algorithm to convert Gregorian dates to Japanese gengo year. * * Link: https://en.wikipedia.org/wiki/Japanese_era_name * * @param string $ts 14-character timestamp * @return string Converted year */ private static function tsToJapaneseGengo( $ts ) { # Nengō dates up to Meiji period. # Deduct years from the Gregorian calendar # depending on the nengo periods # The months and days are identical $gy = (int)substr( $ts, 0, 4 ); $ts = (int)$ts; if ( $ts >= 18730101000000 && $ts < 19120730000000 ) { # Meiji period; start from meiji 6 (1873) it starts using gregorian year return self::tsToJapaneseGengoCalculate( $gy, 1868, '明治' ); } elseif ( $ts >= 19120730000000 && $ts < 19261225000000 ) { # Taishō period return self::tsToJapaneseGengoCalculate( $gy, 1912, '大正' ); } elseif ( $ts >= 19261225000000 && $ts < 19890108000000 ) { # Shōwa period return self::tsToJapaneseGengoCalculate( $gy, 1926, '昭和' ); } elseif ( $ts >= 19890108000000 && $ts < 20190501000000 ) { # Heisei period return self::tsToJapaneseGengoCalculate( $gy, 1989, '平成' ); } elseif ( $ts >= 20190501000000 ) { # Reiwa period return self::tsToJapaneseGengoCalculate( $gy, 2019, '令和' ); } return "西暦$gy"; } /** * Calculate Gregorian year to Japanese gengo year. * * Link: https://en.wikipedia.org/wiki/Japanese_era_name * * @param int $gy 4-digit Gregorian year * @param int $startYear 4-digit Gengo start year * @param string $gengo Actual Gengo string * @return string Converted year */ private static function tsToJapaneseGengoCalculate( $gy, $startYear, $gengo ) { $gy_offset = $gy - $startYear + 1; if ( $gy_offset == 1 ) { $gy_offset = '元'; } return "$gengo$gy_offset"; } /** * Gets directionality of the first strongly directional codepoint, for embedBidi() * * This is the rule the BIDI algorithm uses to determine the directionality of * paragraphs ( https://www.unicode.org/reports/tr9/#The_Paragraph_Level ) and * FSI isolates ( https://www.unicode.org/reports/tr9/#Explicit_Directional_Isolates ). * * TODO: Does not handle BIDI control characters inside the text. * TODO: Does not handle unallocated characters. * * @param string $text Text to test * @return null|string Directionality ('ltr' or 'rtl') or null */ private static function strongDirFromContent( $text = '' ) { if ( !preg_match( self::$strongDirRegex, $text, $matches ) ) { return null; } if ( $matches[1] === '' ) { return 'rtl'; } return 'ltr'; } /** * Roman number formatting up to 10000 * * @param int $num * * @return string */ public static function romanNumeral( $num ) { static $table = [ [ '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ], [ '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ], [ '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ], [ '', 'M', 'MM', 'MMM', 'MMMM', 'MMMMM', 'MMMMMM', 'MMMMMMM', 'MMMMMMMM', 'MMMMMMMMM', 'MMMMMMMMMM' ] ]; $num = intval( $num ); if ( $num > 10000 || $num <= 0 ) { return (string)$num; } $s = ''; for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) { if ( $num >= $pow10 ) { $s .= $table[$i][(int)floor( $num / $pow10 )]; } $num %= $pow10; } return $s; } /** * Hebrew Gematria number formatting up to 9999 * * @param int $num * * @return string */ public static function hebrewNumeral( $num ) { static $table = [ [ '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ], [ '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ], [ '', [ 'ק' ], [ 'ר' ], [ 'ש' ], [ 'ת' ], [ 'ת', 'ק' ], [ 'ת', 'ר' ], [ 'ת', 'ש' ], [ 'ת', 'ת' ], [ 'ת', 'ת', 'ק' ], [ 'ת', 'ת', 'ר' ], ], [ '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ] ]; $num = intval( $num ); if ( $num > 9999 || $num <= 0 ) { return (string)$num; } // Round thousands have special notations if ( $num === 1000 ) { return "א' אלף"; } elseif ( $num % 1000 === 0 ) { return $table[0][$num / 1000] . "' אלפים"; } $letters = []; for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) { if ( $num >= $pow10 ) { if ( $num === 15 || $num === 16 ) { $letters[] = $table[0][9]; $letters[] = $table[0][$num - 9]; $num = 0; } else { $letters = array_merge( $letters, (array)$table[$i][intval( $num / $pow10 )] ); if ( $pow10 === 1000 ) { $letters[] = "'"; } } } $num %= $pow10; } $preTransformLength = count( $letters ); if ( $preTransformLength === 1 ) { // Add geresh (single quote) to one-letter numbers $letters[] = "'"; } else { $lastIndex = $preTransformLength - 1; $letters[$lastIndex] = strtr( $letters[$lastIndex], [ 'כ' => 'ך', 'מ' => 'ם', 'נ' => 'ן', 'פ' => 'ף', 'צ' => 'ץ' ] ); // Add gershayim (double quote) to multiple-letter numbers, // but exclude numbers with only one letter after the thousands // (1001-1009, 1020, 1030, 2001-2009, etc.) if ( $letters[1] === "'" && $preTransformLength === 3 ) { $letters[] = "'"; } else { array_splice( $letters, -1, 0, '"' ); } } return implode( $letters ); } /** * Used by date() and time() to adjust the time output. * * @param string $ts The time in date('YmdHis') format * @param string|false $tz Adjust the time by this amount (default false, mean we * get user timecorrection setting) * @return string */ public function userAdjust( $ts, $tz = false ) { $localTZoffset = $this->config->get( MainConfigNames::LocalTZoffset ); if ( $tz === false ) { $optionsLookup = MediaWikiServices::getInstance()->getUserOptionsLookup(); $tz = $optionsLookup->getOption( RequestContext::getMain()->getUser(), 'timecorrection' ); } $timeCorrection = new UserTimeCorrection( (string)$tz, null, $localTZoffset ); $tzObj = $timeCorrection->getTimeZone(); if ( $tzObj ) { $date = new DateTime( $ts, new DateTimeZone( 'UTC' ) ); $date->setTimezone( $tzObj ); return self::makeMediaWikiTimestamp( $ts, $date ); } $minDiff = $timeCorrection->getTimeOffset(); # No difference? Return the time unchanged if ( $minDiff === 0 ) { return $ts; } $date = new DateTime( $ts ); $date->modify( "{$minDiff} minutes" ); return self::makeMediaWikiTimestamp( $ts, $date ); } /** * Convenience function to convert a PHP DateTime object to a 14-character MediaWiki timestamp, * falling back to the specified timestamp if the DateTime object holds a too large date (T32148, T277809). * This is a private utility method as it is only really useful for {@link userAdjust}. * * @param string $fallback 14-character MW timestamp to fall back to if the DateTime object holds a too large date * @param DateTime $date The DateTime object to convert * @return string 14-character MW timestamp */ private static function makeMediaWikiTimestamp( $fallback, $date ) { $ts = $date->format( 'YmdHis' ); return strlen( $ts ) === 14 ? $ts : $fallback; } /** * This is meant to be used by time(), date(), and timeanddate() to get * the date preference they're supposed to use. It should be used in * all children. * * function timeanddate([...], $format = true) { * $datePreference = $this->dateFormat($format); * [...] * } * * @param int|string|bool $usePrefs If true, the user's preference is used * if false, the site/language default is used * if int/string, assumed to be a format. * @return string */ public function dateFormat( $usePrefs = true ) { if ( is_bool( $usePrefs ) ) { if ( $usePrefs ) { $datePreference = RequestContext::getMain() ->getUser() ->getDatePreference(); } else { $userOptionsLookup = MediaWikiServices::getInstance()->getUserOptionsLookup(); $datePreference = (string)$userOptionsLookup->getDefaultOption( 'date' ); } } else { $datePreference = (string)$usePrefs; } // return int if ( $datePreference == '' ) { return 'default'; } return $datePreference; } /** * Get a format string for a given type and preference * @param string $type One of 'date', 'time', 'both', or 'pretty'. * @param string $pref The format name as it appears in Messages*.php under * $datePreferences. * * @since 1.22 New type 'pretty' that provides a more readable timestamp format * * @return string */ public function getDateFormatString( $type, $pref ) { $wasDefault = false; if ( $pref == 'default' ) { $wasDefault = true; $pref = $this->getDefaultDateFormat(); } if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) { $df = $this->localisationCache ->getSubitem( $this->mCode, 'dateFormats', "$pref $type" ); if ( $type === 'pretty' && $df === null ) { $df = $this->getDateFormatString( 'date', $pref ); } if ( !$wasDefault && $df === null ) { $pref = $this->getDefaultDateFormat(); $df = $this->localisationCache ->getSubitem( $this->mCode, 'dateFormats', "$pref $type" ); } $this->dateFormatStrings[$type][$pref] = $df; } return $this->dateFormatStrings[$type][$pref]; } /** * @param string $ts The time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param bool $adj Whether to adjust the time output according to the * user configured offset ($timecorrection) * @param mixed $format True to use user's date format preference * @param string|false $timecorrection The time offset as returned by * validateTimeZone() in Special:Preferences * @return string */ public function date( $ts, $adj = false, $format = true, $timecorrection = false ) { $ts = wfTimestamp( TS_MW, $ts ); if ( $adj ) { $ts = $this->userAdjust( $ts, $timecorrection ); } $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) ); return $this->sprintfDate( $df, $ts ); } /** * @param string $ts The time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param bool $adj Whether to adjust the time output according to the * user configured offset ($timecorrection) * @param mixed $format True to use user's date format preference * @param string|false $timecorrection The time offset as returned by * validateTimeZone() in Special:Preferences * @return string */ public function time( $ts, $adj = false, $format = true, $timecorrection = false ) { $ts = wfTimestamp( TS_MW, $ts ); if ( $adj ) { $ts = $this->userAdjust( $ts, $timecorrection ); } $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) ); return $this->sprintfDate( $df, $ts ); } /** * @param string $ts The time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param bool $adj Whether to adjust the time output according to the * user configured offset ($timecorrection) * @param mixed $format What date format to return the result in; if it's false output the * default one (default true) * @param string|false $timecorrection The time offset as returned by * validateTimeZone() in Special:Preferences * @return string */ public function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) { $ts = wfTimestamp( TS_MW, $ts ); if ( $adj ) { $ts = $this->userAdjust( $ts, $timecorrection ); } $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) ); return $this->sprintfDate( $df, $ts ); } /** * Takes a number of seconds and turns it into a text using values such as hours and minutes. * * @since 1.20 * * @param int $seconds The number of seconds. * @param array $chosenIntervals The intervals to enable. * * @return string */ public function formatDuration( $seconds, array $chosenIntervals = [] ) { $intervals = $this->getDurationIntervals( $seconds, $chosenIntervals ); $segments = []; foreach ( $intervals as $intervalName => $intervalValue ) { // Messages: duration-seconds, duration-minutes, duration-hours, duration-days, duration-weeks, // duration-years, duration-decades, duration-centuries, duration-millennia $message = $this->msg( 'duration-' . $intervalName )->numParams( $intervalValue ); $segments[] = $message->escaped(); } return $this->listToText( $segments ); } /** * Takes two timestamps and turns the difference between them into a text using values such as hours and minutes. * * @param int $timestamp1 The first timestamp. * @param int $timestamp2 The second timestamp. * @param ?int $precision The number of intervals to show. * * @return string */ public function formatDurationBetweenTimestamps( int $timestamp1, int $timestamp2, ?int $precision = null ): string { $precision ??= count( self::DURATION_INTERVALS ); $sortedTimestamps = [ $timestamp1, $timestamp2 ]; sort( $sortedTimestamps ); $date1 = ( new DateTimeImmutable() )->setTimestamp( $sortedTimestamps[0] ); $date2 = ( new DateTimeImmutable() )->setTimestamp( $sortedTimestamps[1] ); $interval = $date1->diff( $date2 ); $format = []; if ( $interval->y >= 1000 ) { $millennia = floor( $interval->y / 1000 ); $format[] = $this->msg( 'duration-millennia' )->numParams( $millennia )->text(); $interval->y -= $millennia * 1000; } if ( $interval->y >= 100 ) { $centuries = floor( $interval->y / 100 ); $format[] = $this->msg( 'duration-centuries' )->numParams( $centuries )->text(); $interval->y -= $centuries * 100; } if ( $interval->y >= 10 ) { $decades = floor( $interval->y / 10 ); $format[] = $this->msg( 'duration-decades' )->numParams( $decades )->text(); $interval->y -= $decades * 10; } if ( $interval->y !== 0 ) { $format[] = $this->msg( 'duration-years' )->numParams( $interval->y )->text(); } if ( $interval->m !== 0 ) { $format[] = $this->msg( 'duration-months' )->numParams( $interval->m )->text(); } if ( $interval->d !== 0 ) { $format[] = $this->msg( 'duration-days' )->numParams( $interval->d )->text(); } if ( $interval->h !== 0 ) { $format[] = $this->msg( 'duration-hours' )->numParams( $interval->h )->text(); } if ( $interval->i !== 0 ) { $format[] = $this->msg( 'duration-minutes' )->numParams( $interval->i )->text(); } if ( $interval->s !== 0 ) { $format[] = $this->msg( 'duration-seconds' )->numParams( $interval->s )->text(); } // slice the array to the provided precision $format = array_slice( $format, 0, $precision ); // build the string from the array $format = $this->listToText( $format ); return $format ?: $this->msg( 'duration-seconds' )->numParams( 0 )->text(); } /** * Takes a number of seconds and returns an array with a set of corresponding intervals. * For example, 65 will be turned into [ minutes => 1, seconds => 5 ]. * * @since 1.20 * * @param int $seconds The number of seconds. * @param array $chosenIntervals The intervals to enable. * * @return int[] */ public function getDurationIntervals( $seconds, array $chosenIntervals = [] ) { if ( !$chosenIntervals ) { // Default intervals. Do not include `months` as they were not part of the origional default implementaiton $chosenIntervals = [ 'millennia', 'centuries', 'decades', 'years', 'days', 'hours', 'minutes', 'seconds' ]; } $intervals = array_intersect_key( self::DURATION_INTERVALS, array_fill_keys( $chosenIntervals, true ) ); $sortedNames = array_keys( $intervals ); $smallestInterval = array_pop( $sortedNames ); $segments = []; foreach ( $intervals as $name => $length ) { $value = floor( $seconds / $length ); if ( $value > 0 || ( $name == $smallestInterval && !$segments ) ) { $seconds -= $value * $length; $segments[$name] = $value; } } return $segments; } /** * Internal helper function for userDate(), userTime() and userTimeAndDate() * * @param string $type Can be 'date', 'time' or 'both' * @param string $ts The time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param UserIdentity $user User used to get preferences for timezone and format * @param array $options Array, can contain the following keys: * - 'timecorrection': time correction, can have the following values: * - true: use user's preference * - false: don't use time correction * - int: value of time correction in minutes * - 'format': format to use, can have the following values: * - true: use user's preference * - false: use default preference * - string: format to use * @since 1.19 * @return string */ private function internalUserTimeAndDate( $type, $ts, UserIdentity $user, array $options ) { $ts = wfTimestamp( TS_MW, $ts ); $options += [ 'timecorrection' => true, 'format' => true ]; if ( $options['timecorrection'] !== false ) { if ( $options['timecorrection'] === true ) { $offset = MediaWikiServices::getInstance() ->getUserOptionsLookup() ->getOption( $user, 'timecorrection' ); } else { $offset = $options['timecorrection']; } $ts = $this->userAdjust( $ts, $offset ); } if ( $options['format'] === true ) { $format = MediaWikiServices::getInstance() ->getUserFactory() ->newFromUserIdentity( $user ) ->getDatePreference(); } else { $format = $options['format']; } $df = $this->getDateFormatString( $type, $this->dateFormat( $format ) ); return $this->sprintfDate( $df, $ts ); } /** * Get the formatted date for the given timestamp and formatted for * the given user. * * @param mixed $ts Mixed: the time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param UserIdentity $user User used to get preferences for timezone and format * @param array $options Array, can contain the following keys: * - 'timecorrection': time correction, can have the following values: * - true: use user's preference * - false: don't use time correction * - int: value of time correction in minutes * - 'format': format to use, can have the following values: * - true: use user's preference * - false: use default preference * - string: format to use * @since 1.19 * @return string */ public function userDate( $ts, UserIdentity $user, array $options = [] ) { return $this->internalUserTimeAndDate( 'date', $ts, $user, $options ); } /** * Get the formatted time for the given timestamp and formatted for * the given user. * * @param mixed $ts The time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param UserIdentity $user User used to get preferences for timezone and format * @param array $options Array, can contain the following keys: * - 'timecorrection': time correction, can have the following values: * - true: use user's preference * - false: don't use time correction * - int: value of time correction in minutes * - 'format': format to use, can have the following values: * - true: use user's preference * - false: use default preference * - string: format to use * @since 1.19 * @return string */ public function userTime( $ts, UserIdentity $user, array $options = [] ) { return $this->internalUserTimeAndDate( 'time', $ts, $user, $options ); } /** * Get the formatted date and time for the given timestamp and formatted for * the given user. * * @param mixed $ts The time format which needs to be turned into a * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param UserIdentity $user User used to get preferences for timezone and format * @param array $options Array, can contain the following keys: * - 'timecorrection': time correction, can have the following values: * - true: use user's preference * - false: don't use time correction * - int: value of time correction in minutes * - 'format': format to use, can have the following values: * - true: use user's preference * - false: use default preference * - string: format to use * @since 1.19 * @return string */ public function userTimeAndDate( $ts, UserIdentity $user, array $options = [] ) { return $this->internalUserTimeAndDate( 'both', $ts, $user, $options ); } /** * Get the timestamp in a human-friendly relative format, e.g., "3 days ago". * * Determine the difference between the timestamp and the current time, and * generate a readable timestamp by returning "<N> <units> ago", where the * largest possible unit is used. * * @since 1.26 (Prior to 1.26, the method existed but was not meant to be used directly) * * @param MWTimestamp $time * @param MWTimestamp|null $relativeTo The base timestamp to compare to (defaults to now) * @param UserIdentity|null $user User the timestamp is being generated for * (or null to use main context's user) * @return string Formatted timestamp */ public function getHumanTimestamp( MWTimestamp $time, ?MWTimestamp $relativeTo = null, ?UserIdentity $user = null ) { $relativeTo ??= new MWTimestamp(); if ( $user === null ) { $user = RequestContext::getMain()->getUser(); } else { // For compatibility with the hook signature and self::getHumanTimestampInternal $user = MediaWikiServices::getInstance() ->getUserFactory() ->newFromUserIdentity( $user ); } // Adjust for the user's timezone. $offsetThis = $time->offsetForUser( $user ); $offsetRel = $relativeTo->offsetForUser( $user ); $ts = ''; if ( $this->getHookRunner()->onGetHumanTimestamp( $ts, $time, $relativeTo, $user, $this ) ) { $ts = $this->getHumanTimestampInternal( $time, $relativeTo, $user ); } // Reset the timezone on the objects. $time->timestamp->sub( $offsetThis ); $relativeTo->timestamp->sub( $offsetRel ); return $ts; } /** * Convert an MWTimestamp into a pretty human-readable timestamp using * the given user preferences and relative base time. * * @see Language::getHumanTimestamp * @param MWTimestamp $ts Timestamp to prettify * @param MWTimestamp $relativeTo Base timestamp * @param User $user User preferences to use * @return string Human timestamp * @since 1.26 */ private function getHumanTimestampInternal( MWTimestamp $ts, MWTimestamp $relativeTo, User $user ) { $diff = $ts->diff( $relativeTo ); $diffDay = (bool)( (int)$ts->timestamp->format( 'w' ) - (int)$relativeTo->timestamp->format( 'w' ) ); $days = $diff->days ?: (int)$diffDay; if ( $diff->invert ) { // Future dates: Use full timestamp /** * @todo FIXME: Add better handling of future timestamps. */ $format = $this->getDateFormatString( 'both', $user->getDatePreference() ?: 'default' ); $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ); } elseif ( $days > 5 && $ts->timestamp->format( 'Y' ) !== $relativeTo->timestamp->format( 'Y' ) ) { // Timestamps are in different years and more than 5 days apart: use full date $format = $this->getDateFormatString( 'date', $user->getDatePreference() ?: 'default' ); $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ); } elseif ( $days > 5 ) { // Timestamps are in same year and more than 5 days ago: show day and month only. $format = $this->getDateFormatString( 'pretty', $user->getDatePreference() ?: 'default' ); $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ); } elseif ( $days > 1 ) { // Timestamp within the past 5 days: show the day of the week and time $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' ); $weekday = self::WEEKDAY_MESSAGES[(int)$ts->timestamp->format( 'w' )]; // The following messages are used here: // * sunday-at, monday-at, tuesday-at, wednesday-at, thursday-at, friday-at, saturday-at $ts = $this->msg( "$weekday-at" ) ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) ) ->text(); } elseif ( $days == 1 ) { // Timestamp was yesterday: say 'yesterday' and the time. $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' ); $ts = $this->msg( 'yesterday-at' ) ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) ) ->text(); } elseif ( $diff->h > 1 || ( $diff->h == 1 && $diff->i > 30 ) ) { // Timestamp was today, but more than 90 minutes ago: say 'today' and the time. $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' ); $ts = $this->msg( 'today-at' ) ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) ) ->text(); // From here on in, the timestamp was soon enough ago so that we can simply say // XX units ago, e.g., "2 hours ago" or "5 minutes ago" } elseif ( $diff->h == 1 ) { // Less than 90 minutes, but more than an hour ago. $ts = $this->msg( 'hours-ago' )->numParams( 1 )->text(); } elseif ( $diff->i >= 1 ) { // A few minutes ago. $ts = $this->msg( 'minutes-ago' )->numParams( $diff->i )->text(); } elseif ( $diff->s >= 30 ) { // Less than a minute, but more than 30 sec ago. $ts = $this->msg( 'seconds-ago' )->numParams( $diff->s )->text(); } else { // Less than 30 seconds ago. $ts = $this->msg( 'just-now' )->text(); } return $ts; } /** * Gets the localized friendly name for a group, if it exists. For example, * "Administrators" or "Bureaucrats" * * @since 1.38 * @param string $group Internal group name * @return string Localized friendly group name */ public function getGroupName( $group ) { $msg = $this->msg( "group-$group" ); return $msg->isBlank() ? $group : $msg->text(); } /** * Gets the localized name for a member of a user group if it exists. * For example, "administrator" or "bureaucrat" * * @since 1.38 * @param string $group Internal group name * @param string|UserIdentity $member * @return string Localized name for group member */ public function getGroupMemberName( string $group, $member ) { if ( $member instanceof UserIdentity ) { $member = $member->getName(); } $msg = $this->msg( "group-$group-member", $member ); return $msg->isBlank() ? $group : $msg->text(); } /** * @deprecated since 1.41, use LocalisationCache or MessageCache as appropriate. * @param string $key * @return string|null */ public function getMessage( $key ) { return $this->localisationCache->getSubitem( $this->mCode, 'messages', $key ); } /** * @deprecated since 1.41, use LocalisationCache directly. * @return string[] */ public function getAllMessages() { return $this->localisationCache->getItem( $this->mCode, 'messages' ); } /** * @param string $in * @param string $out * @param string $string * @return string */ public function iconv( $in, $out, $string ) { # Even with //IGNORE iconv can whine about illegal characters in # *input* string. We just ignore those too. # REF: https://bugs.php.net/bug.php?id=37166 # REF: https://phabricator.wikimedia.org/T18885 AtEase::suppressWarnings(); $text = iconv( $in, $out . '//IGNORE', $string ); AtEase::restoreWarnings(); return $text; } /** * @param string $str * @return string The string with uppercase conversion applied to the first character */ public function ucfirst( $str ) { $octetCode = ord( $str ); // See https://en.wikipedia.org/wiki/ASCII#Printable_characters if ( $octetCode < 96 ) { // Assume this is an uppercase/uncased ASCII character return (string)$str; } elseif ( $octetCode < 128 ) { // Assume this is a lowercase/uncased ASCII character return ucfirst( $str ); } $first = mb_substr( $str, 0, 1 ); if ( strlen( $first ) === 1 ) { // Broken UTF-8? return ucfirst( $str ); } // Memoize the config table $overrides = $this->overrideUcfirstCharacters ??= $this->config->get( MainConfigNames::OverrideUcfirstCharacters ); // Use the config table and fall back to MB_CASE_TITLE $ucFirst = $overrides[$first] ?? mb_convert_case( $first, MB_CASE_TITLE ); if ( $ucFirst !== $first ) { return $ucFirst . mb_substr( $str, 1 ); } else { return $str; } } /** * @param string $str * @param bool $first Whether to uppercase only the first character * @return string The string with uppercase conversion applied */ public function uc( $str, $first = false ) { if ( $first ) { return $this->ucfirst( $str ); } else { return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str ); } } /** * @param string $str * @return string The string with lowercase conversion applied to the first character */ public function lcfirst( $str ) { $octetCode = ord( $str ); // See https://en.wikipedia.org/wiki/ASCII#Printable_characters if ( $octetCode < 96 ) { // Assume this is an uppercase/uncased ASCII character return lcfirst( $str ); } elseif ( $octetCode < 128 ) { // Assume this is a lowercase/uncased ASCII character return (string)$str; } return $this->isMultibyte( $str ) // Assume this is a multibyte character and mb_internal_encoding() is appropriate ? mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ) // Assume this is a non-multibyte character and LC_CASE is appropriate : lcfirst( $str ); } /** * @param string $str * @param bool $first Whether to lowercase only the first character * @return string The string with lowercase conversion applied */ public function lc( $str, $first = false ) { if ( $first ) { return $this->lcfirst( $str ); } else { return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str ); } } /** * @param string $str * @return bool */ private function isMultibyte( $str ) { return strlen( $str ) !== mb_strlen( $str ); } /** * @param string $str * @return mixed|string */ public function ucwords( $str ) { if ( $this->isMultibyte( $str ) ) { $str = $this->lc( $str ); // regexp to find the first letter in each word (i.e., after each space) $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/"; // function to use to capitalize a single char return preg_replace_callback( $replaceRegexp, static function ( $matches ) { return mb_strtoupper( $matches[0] ); }, $str ); } else { return ucwords( strtolower( $str ) ); } } /** * capitalize words at word breaks * * @param string $str * @return mixed */ public function ucwordbreaks( $str ) { if ( $this->isMultibyte( $str ) ) { $str = $this->lc( $str ); // since \b doesn't work for UTF-8, we explicitly define word break chars $breaks = "[ \-\(\)\}\{\.,\?!]"; // find the first letter after word break $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|" . "$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/"; return preg_replace_callback( $replaceRegexp, static function ( $matches ) { return mb_strtoupper( $matches[0] ); }, $str ); } else { return preg_replace_callback( '/\b([\w\x80-\xff]+)\b/', function ( $matches ) { return $this->ucfirst( $matches[1] ); }, $str ); } } /** * Return a case-folded representation of $s * * This is a representation such that caseFold($s1) == caseFold($s2) if $s1 * and $s2 are the same except for the case of their characters. It is not * necessary for the value returned to make sense when displayed. * * Do *not* perform any other normalisation in this function. If a caller * uses this function when it should be using a more general normalisation * function, then fix the caller. * * @param string $s * * @return string */ public function caseFold( $s ) { return $this->uc( $s ); } /** * @param string $s * @return string */ public function checkTitleEncoding( string $s ) { if ( StringUtils::isUtf8( $s ) ) { return $s; } return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s ); } /** * @return string */ public function fallback8bitEncoding() { return $this->localisationCache->getItem( $this->mCode, 'fallback8bitEncoding' ); } /** * Most writing systems use whitespace to break up words. * Some languages such as Chinese don't conventionally do this, * which requires special handling when breaking up words for * searching, etc. * * @return bool */ public function hasWordBreaks() { return true; } /** * Some languages such as Chinese require word segmentation, * Specify such segmentation when overridden in derived class. * * @param string $string * @return string */ public function segmentByWord( $string ) { return $string; } /** * Specify the language variant that should be used for search indexing. * * @return string|null */ protected function getSearchIndexVariant() { return null; } /** * Some languages have special punctuation need to be normalized. * Make such changes here. * * Some languages such as Chinese have many-to-one conversions, * e.g., it should be better to use zh-hans for search, since conversion * from zh-hant to zh-hans is less ambiguous than the other way around. * * @param string $text * @return string */ public function normalizeForSearch( $text ) { $text = self::convertDoubleWidth( $text ); if ( $this->getSearchIndexVariant() ) { return $this->getConverterInternal()->autoConvert( $text, $this->getSearchIndexVariant() ); } return $text; } /** * Convert double-width roman characters to single-width. * range: ff00-ff5f ~= 0020-007f * * @param string $string * @return string */ protected static function convertDoubleWidth( $string ) { static $transTable = null; $transTable ??= array_combine( mb_str_split( '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' ), str_split( '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' ) ); return strtr( $string, $transTable ); } /** * @param string $string * @param string $pattern * @return string */ protected static function insertSpace( $string, $pattern ) { $string = preg_replace( $pattern, " $1 ", $string ); return preg_replace( '/ +/', ' ', $string ); } /** * @param string[] $termsArray * @return string[] */ public function convertForSearchResult( $termsArray ) { # some languages, e.g., Chinese, need to do a conversion # in order for search results to be displayed correctly return $termsArray; } /** * Get the first character of a string. * * @param string $s * @return string */ public function firstChar( $s ) { $firstChar = mb_substr( $s, 0, 1 ); if ( $firstChar === '' || strlen( $firstChar ) != 3 ) { return $firstChar; } // Break down Hangul syllables to grab the first jamo $code = mb_ord( $firstChar ); if ( $code < 0xac00 || $code >= 0xd7a4 ) { return $firstChar; } elseif ( $code < 0xb098 ) { return "\u{3131}"; } elseif ( $code < 0xb2e4 ) { return "\u{3134}"; } elseif ( $code < 0xb77c ) { return "\u{3137}"; } elseif ( $code < 0xb9c8 ) { return "\u{3139}"; } elseif ( $code < 0xbc14 ) { return "\u{3141}"; } elseif ( $code < 0xc0ac ) { return "\u{3142}"; } elseif ( $code < 0xc544 ) { return "\u{3145}"; } elseif ( $code < 0xc790 ) { return "\u{3147}"; } elseif ( $code < 0xcc28 ) { return "\u{3148}"; } elseif ( $code < 0xce74 ) { return "\u{314A}"; } elseif ( $code < 0xd0c0 ) { return "\u{314B}"; } elseif ( $code < 0xd30c ) { return "\u{314C}"; } elseif ( $code < 0xd558 ) { return "\u{314D}"; } else { return "\u{314E}"; } } /** * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this * also cleans up certain backwards-compatible sequences, converting them * to the modern Unicode equivalent. * * @internal * @param string $s * @return string */ public function normalize( $s ) { $allUnicodeFixes = $this->config->get( MainConfigNames::AllUnicodeFixes ); $s = UtfNormalValidator::cleanUp( $s ); // Optimization: This is disabled by default to avoid negative performance impact. if ( $allUnicodeFixes ) { $s = $this->transformUsingPairFile( NormalizeAr::class, $s ); $s = $this->transformUsingPairFile( NormalizeMl::class, $s ); } return $s; } /** * Transform a string using serialized data stored in the given file (which * must be in the serialized subdirectory of $IP). The file contains pairs * mapping source characters to destination characters. * * The data is cached in process memory. * * @param string $dataClass Name of a normalized pairs' data class * @param string $input * @return string */ protected function transformUsingPairFile( string $dataClass, string $input ): string { if ( !isset( $this->transformData[$dataClass] ) ) { $this->transformData[$dataClass] = new ReplacementArray( $dataClass::PAIRS ); } return $this->transformData[$dataClass]->replace( $input ); } /** * For right-to-left language support * * @return bool */ public function isRTL() { return $this->localisationCache->getItem( $this->mCode, 'rtl' ); } /** * Return the correct HTML 'dir' attribute value for this language. * @return string */ public function getDir() { return $this->isRTL() ? 'rtl' : 'ltr'; } /** * Return 'left' or 'right' as appropriate alignment for line-start * for this language's text direction. * * Should be equivalent to CSS3 'start' text-align value.... * * @return string */ public function alignStart() { return $this->isRTL() ? 'right' : 'left'; } /** * Return 'right' or 'left' as appropriate alignment for line-end * for this language's text direction. * * Should be equivalent to CSS3 'end' text-align value.... * * @return string */ public function alignEnd() { return $this->isRTL() ? 'left' : 'right'; } /** * A hidden direction mark (LRM or RLM), depending on the language direction. * Unlike getDirMark(), this function returns the character as an HTML entity. * This function should be used when the output is guaranteed to be HTML, * because it makes the output HTML source code more readable. When * the output is plain text or can be escaped, getDirMark() should be used. * * Use of hidden control characters when the output allows use of HTML markup * is discouraged and the recommendation is to use bdi HTML tag which doesn't * have the issue of hidden characters ending up in user clipboard in text * copy paste, see T375975. * * @deprecated hard deprecated since 1.43, use bdi HTML tag in HTML context * where possible. * @param bool $opposite Get the direction mark opposite to your language * @return string * @since 1.20 */ public function getDirMarkEntity( $opposite = false ) { wfDeprecated( __METHOD__, '1.43' ); if ( $opposite ) { return $this->isRTL() ? '‎' : '‏'; } return $this->isRTL() ? '‏' : '‎'; } /** * A hidden direction mark (LRM or RLM), depending on the language direction. * This function produces them as invisible Unicode characters and * the output may be hard to read and debug, so it should only be used * when the output is plain text or can be escaped. * * Use of hidden control characters when the output allows use of HTML markup * is discouraged and the recommendation is to use bdi HTML tag which doesn't * have the issue of hidden characters ending up in user clipboard in text * copy paste, see T375975. * * @deprecated since 1.43, use bdi HTML tag in HTML context where possible. * @param bool $opposite Get the direction mark opposite to your language * @return string */ public function getDirMark( $opposite = false ) { if ( $opposite ) { return $this->isRTL() ? self::LRM : self::RLM; } return $this->isRTL() ? self::RLM : self::LRM; } /** * An arrow, depending on the language direction. * * @param string $direction The direction of the arrow: forwards (default), * backwards, left, right, up, down. * @return string */ public function getArrow( $direction = 'forwards' ) { switch ( $direction ) { case 'forwards': return $this->isRTL() ? '←' : '→'; case 'backwards': return $this->isRTL() ? '→' : '←'; case 'left': return '←'; case 'right': return '→'; case 'up': return '↑'; case 'down': return '↓'; } } /** * To allow "foo[[bar]]" to extend the link over the whole word "foobar" * * @return bool */ public function linkPrefixExtension() { return $this->localisationCache->getItem( $this->mCode, 'linkPrefixExtension' ); } /** * Get all the magic words from the localisation cache. * * @return array<string,array> $magicWord => [ int $caseSensitive, string ...$alias ] */ public function getMagicWords() { return $this->localisationCache->getItem( $this->mCode, 'magicWords' ); } /** * Fill a MagicWord object with data from this instance * * @param MagicWord $mw */ public function getMagic( $mw ) { $rawEntry = $this->mMagicExtensions[$mw->mId] ?? $this->localisationCache->getSubitem( $this->mCode, 'magicWords', $mw->mId ); if ( !is_array( $rawEntry ) ) { wfWarn( "\"$rawEntry\" is not a valid magic word for \"$mw->mId\"" ); } else { $mw->mCaseSensitive = $rawEntry[0]; $mw->mSynonyms = array_slice( $rawEntry, 1 ); } } /** * Get special page names, as an associative array * canonical name => array of valid names, including aliases * @return string[][] */ public function getSpecialPageAliases() { // Cache aliases because it may be slow to load them $this->mExtendedSpecialPageAliases ??= $this->localisationCache->getItem( $this->mCode, 'specialPageAliases' ); return $this->mExtendedSpecialPageAliases; } /** * Italic is unsuitable for some languages * * @param string $text The text to be emphasized. * @return string */ public function emphasize( $text ) { return "<em>$text</em>"; } /** * Normally we output all numbers in plain en_US style, that is * 293,291.235 for two hundred ninety-three thousand two hundred ninety-one * point two hundred thirty-five. However, this is not suitable for all * languages, some such as Bengali (bn) want ২,৯৩,২৯১.২৩৫ and others such as * Icelandic just want to use commas instead of dots, and dots instead * of commas like "293.291,235". * * An example of this function being called: * <code> * wfMessage( 'message' )->numParams( $num )->text() * </code> * * See $separatorTransformTable on MessageIs.php for * the , => . and . => , implementation. * * @param string|int|float $number Expected to be a pre-formatted (e.g. leading zeros, number * of decimal places) numeric string. Any non-string will be cast to string. * @return string */ public function formatNum( $number ) { return $this->formatNumInternal( (string)$number, false, false ); } /** * Internal implementation function, shared between formatNum and formatNumNoSeparators. * * @param string $number The stringification of a valid PHP number * @param bool $noTranslate Whether to translate digits and separators * @param bool $noSeparators Whether to add separators * @return string */ private function formatNumInternal( string $number, bool $noTranslate, bool $noSeparators ): string { $translateNumerals = $this->config->get( MainConfigNames::TranslateNumerals ); if ( $number === '' ) { return $number; } if ( $number === (string)NAN ) { return $this->msg( 'formatnum-nan' )->text(); } if ( $number === (string)INF ) { return "∞"; } if ( $number === (string)-INF ) { return "\u{2212}∞"; } if ( !is_numeric( $number ) ) { # T267587: downgrade this to level:warn while we chase down the long # trail of callers. # wfDeprecated( 'Language::formatNum with a non-numeric string', '1.36' ); LoggerFactory::getInstance( 'formatnum' )->warning( 'Language::formatNum with non-numeric string', [ 'number' => $number ] ); $validNumberRe = '(-(?=[\d\.]))?(\d+|(?=\.\d))(\.\d*)?([Ee][-+]?\d+)?'; // For backwards-compat, apply formatNum piecewise on the valid // numbers in the string. Don't split on NAN/INF in this legacy // case as they are likely to be found embedded inside non-numeric // text. return preg_replace_callback( "/{$validNumberRe}/", function ( $m ) use ( $noTranslate, $noSeparators ) { return $this->formatNumInternal( $m[0], $noTranslate, $noSeparators ); }, $number ); } if ( !$noSeparators ) { $separatorTransformTable = $this->separatorTransformTable(); $fmt = $this->getNumberFormatter(); // minimumGroupingDigits can be used to suppress groupings below a certain value. // This is used for languages such as Polish, where one would only write the grouping // separator for values above 9999 - numbers with more than 4 digits. // NumberFormatter is yet to support minimumGroupingDigits, ICU has it as experimental feature. // The attribute value is used by adding it to the grouping separator value. If // the input number has fewer integer digits, the grouping separator is suppressed. $minimumGroupingDigits = $this->minimumGroupingDigits(); // Minimum length of a number to do digit grouping on. // http://unicode.org/reports/tr35/tr35-numbers.html#Examples_of_minimumGroupingDigits $minimumLength = $minimumGroupingDigits + $fmt->getAttribute( NumberFormatter::GROUPING_SIZE ); if ( $minimumGroupingDigits > 1 && !preg_match( '/^\-?\d{' . $minimumLength . '}/', $number ) ) { // This number does not need commas inserted (even if // NumberFormatter thinks it does) because it's not long // enough. We still need to do decimal separator // transformation, though. For example, 1234.56 becomes 1234,56 // in pl with $minimumGroupingDigits = 2. if ( !$noTranslate ) { $number = strtr( $number, $separatorTransformTable ?: [] ); } } elseif ( $number === '-0' ) { // Special case to ensure we don't lose the minus sign by // converting to an int. if ( !$noTranslate ) { $number = strtr( $number, $separatorTransformTable ?: [] ); } } else { // NumberFormatter supports separator transformation, // but it does not know all languages MW // supports. Example: arq. Also, languages like pl have // customisation. So manually set it. $fmt = clone $fmt; if ( $noTranslate ) { $fmt->setSymbol( NumberFormatter::DECIMAL_SEPARATOR_SYMBOL, '.' ); $fmt->setSymbol( NumberFormatter::GROUPING_SEPARATOR_SYMBOL, ',' ); } elseif ( $separatorTransformTable ) { $fmt->setSymbol( NumberFormatter::DECIMAL_SEPARATOR_SYMBOL, $separatorTransformTable[ '.' ] ?? '.' ); $fmt->setSymbol( NumberFormatter::GROUPING_SEPARATOR_SYMBOL, $separatorTransformTable[ ',' ] ?? ',' ); } // Maintain # of digits before and after the decimal point // (and presence of decimal point) if ( preg_match( '/^-?(\d*)(\.(\d*))?$/', $number, $m ) ) { $fmt->setAttribute( NumberFormatter::MIN_INTEGER_DIGITS, strlen( $m[1] ) ); if ( isset( $m[2] ) ) { $fmt->setAttribute( NumberFormatter::DECIMAL_ALWAYS_SHOWN, 1 ); } $fmt->setAttribute( NumberFormatter::FRACTION_DIGITS, strlen( $m[3] ?? '' ) ); } $number = $fmt->format( (float)$number ); } } if ( !$noTranslate ) { if ( $translateNumerals ) { // This is often unnecessary: PHP's NumberFormatter will often // do the digit transform itself (T267614) $s = $this->digitTransformTable(); if ( $s ) { $number = strtr( $number, $s ); } } # T10327: Make our formatted numbers prettier by using a # proper Unicode 'minus' character. $number = strtr( $number, [ '-' => "\u{2212}" ] ); } // Remove any LRM or RLM characters generated from NumberFormatter, // since directionality is handled outside of this context. // Similarly remove \u61C, the "Arabic Letter mark" (unicode 6.3.0) // https://en.wikipedia.org/wiki/Arabic_letter_mark // which is added starting PHP 7.3+ return strtr( $number, [ "\u{200E}" => '', // LRM "\u{200F}" => '', // RLM "\u{061C}" => '', // ALM ] ); } /** * Front-end for non-commafied formatNum * * @param string|int|float $number The string to be formatted, should be an integer * or a floating point number. * @since 1.21 * @return string */ public function formatNumNoSeparators( $number ) { return $this->formatNumInternal( (string)$number, false, true ); } /** * @param string $number * @return string */ public function parseFormattedNumber( $number ) { if ( $number === $this->msg( 'formatnum-nan' )->text() ) { return (string)NAN; } if ( $number === "∞" ) { return (string)INF; } // Accept either ASCII hyphen-minus or the unicode minus emitted by // ::formatNum() $number = strtr( $number, [ "\u{2212}" => '-' ] ); if ( $number === "-∞" ) { return (string)-INF; } $s = $this->digitTransformTable(); if ( $s ) { // Eliminate empty array values such as ''. (T66347) $s = array_filter( $s ); $number = strtr( $number, array_flip( $s ) ); } $s = $this->separatorTransformTable(); if ( $s ) { // Eliminate empty array values such as ''. (T66347) $s = array_filter( $s ); $number = strtr( $number, array_flip( $s ) ); } return strtr( $number, [ ',' => '' ] ); } /** * @return string */ public function digitGroupingPattern() { return $this->localisationCache->getItem( $this->mCode, 'digitGroupingPattern' ); } /** * @return string[] */ public function digitTransformTable() { return $this->localisationCache->getItem( $this->mCode, 'digitTransformTable' ); } /** * @return string[] */ public function separatorTransformTable() { return $this->localisationCache->getItem( $this->mCode, 'separatorTransformTable' ); } /** * The minimum number of digits a number must have, in addition to the grouping * size, before grouping separators are added. * * For example, Polish has minimumGroupingDigits = 2, which with a grouping * size of 3 causes 4-digit numbers to be written like 9999, but 5-digit * numbers are written like "10 000". * * @return int */ public function minimumGroupingDigits(): int { return $this->localisationCache->getItem( $this->mCode, 'minimumGroupingDigits' ) ?? 1; } /** * Take a list of strings and build a locale-friendly comma-separated * list, using the local comma-separator message. * The last two strings are chained with an "and". * * @param string[] $list * @param-taint $list tainted * @return string */ public function listToText( array $list ) { $itemCount = count( $list ); if ( $itemCount < 1 ) { return ''; } $text = array_pop( $list ); if ( $itemCount > 1 ) { $and = $this->msg( 'and' )->escaped(); $space = $this->msg( 'word-separator' )->escaped(); $comma = ''; if ( $itemCount > 2 ) { $comma = $this->msg( 'comma-separator' )->escaped(); } $text = implode( $comma, $list ) . $and . $space . $text; } // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive return $text; } /** * Take a list of strings and build a locale-friendly comma-separated * list, using the local comma-separator message. * @param string[] $list Array of strings to put in a comma list * @param-taint $list tainted * @return string */ public function commaList( array $list ) { return implode( $this->msg( 'comma-separator' )->escaped(), $list ); } /** * Take a list of strings and build a locale-friendly semicolon-separated * list, using the local semicolon-separator message. * @param string[] $list Array of strings to put in a semicolon list * @param-taint $list tainted * @return string */ public function semicolonList( array $list ) { return implode( $this->msg( 'semicolon-separator' )->escaped(), $list ); } /** * Same as commaList, but separate it with the pipe instead. * @param string[] $list Array of strings to put in a pipe list * @param-taint $list tainted * @return string */ public function pipeList( array $list ) { return implode( $this->msg( 'pipe-separator' )->escaped(), $list ); } /** * Truncate a string to a specified length in bytes, appending an optional * string (e.g., for ellipsis) * When an ellipsis isn't needed, using mb_strcut() directly is recommended. * * If $length is negative, the string will be truncated from the beginning * * @since 1.31 * * @param string $string String to truncate * @param int $length Maximum length in bytes * @param string $ellipsis String to append to the end of truncated text * @param bool $adjustLength Subtract length of ellipsis from $length * * @return string */ public function truncateForDatabase( $string, $length, $ellipsis = '...', $adjustLength = true ) { return $this->truncateInternal( $string, $length, $ellipsis, $adjustLength, 'strlen', 'mb_strcut' ); } /** * Truncate a string to a specified number of characters, appending an optional * string (e.g., for ellipsis). * * This provides the multibyte version of truncateForDatabase() method of this class, * suitable for truncation based on number of characters, instead of number of bytes. * * The input should be a raw UTF-8 string, and *NOT* be HTML * escaped. It is not safe to truncate HTML-escaped strings, * because the entity can be truncated! Use ::truncateHtml() if you * need a specific number of HTML-encoded bytes, or * ::truncateForDatabase() if you need a specific number of PHP * bytes. * * If $length is negative, the string will be truncated from the beginning. * * @since 1.31 * * @param string $string String to truncate * @param int $length Maximum number of characters * @param string $ellipsis String to append to the end of truncated text * @param bool $adjustLength Subtract length of ellipsis from $length * * @return string */ public function truncateForVisual( $string, $length, $ellipsis = '...', $adjustLength = true ) { // Passing encoding to mb_strlen and mb_substr is optional. // Encoding defaults to mb_internal_encoding(), which is set to UTF-8 in Setup.php, so // explicit specification of encoding is skipped. // Note: Both multibyte methods are callables invoked in truncateInternal. return $this->truncateInternal( $string, $length, $ellipsis, $adjustLength, 'mb_strlen', 'mb_substr' ); } /** * Internal method used for truncation. This method abstracts text truncation into * one common method, allowing users to provide the length measurement function and * function for finding substring. * * For usages, see truncateForDatabase and truncateForVisual. * * @param string $string String to truncate * @param int $length Maximum length of the final text * @param string $ellipsis String to append to the end of truncated text * @param bool $adjustLength Subtract length of ellipsis from $length * @param callable $measureLength Callable function used for determining the length of text * @param callable $getSubstring Callable function used for getting the substrings * * @return string */ private function truncateInternal( $string, $length, $ellipsis, $adjustLength, callable $measureLength, callable $getSubstring ) { # Check if there is no need to truncate if ( $measureLength( $string ) <= abs( $length ) ) { return $string; // no need to truncate } # Use the localized ellipsis character if ( $ellipsis == '...' ) { $ellipsis = $this->msg( 'ellipsis' )->text(); } if ( $length == 0 ) { return $ellipsis; // convention } $stringOriginal = $string; # If ellipsis length is >= $length then we can't apply $adjustLength if ( $adjustLength && $measureLength( $ellipsis ) >= abs( $length ) ) { $string = $ellipsis; // this can be slightly unexpected # Otherwise, truncate and add ellipsis... } else { $ellipsisLength = $adjustLength ? $measureLength( $ellipsis ) : 0; if ( $length > 0 ) { $length -= $ellipsisLength; $string = $getSubstring( $string, 0, $length ); // xyz... $string = rtrim( $string ) . $ellipsis; } else { $length += $ellipsisLength; $string = $getSubstring( $string, $length ); // ...xyz $string = $ellipsis . ltrim( $string ); } } # Do not truncate if the ellipsis makes the string longer/equal (T24181). # This check is *not* redundant if $adjustLength, due to the single case where # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string. if ( $measureLength( $string ) < $measureLength( $stringOriginal ) ) { return $string; } else { return $stringOriginal; } } /** * Remove bytes that represent an incomplete Unicode character * at the end of string (e.g. bytes of the char are missing) * * @param string $string * @return string */ protected function removeBadCharLast( $string ) { if ( $string != '' ) { $char = ord( $string[strlen( $string ) - 1] ); $m = []; if ( $char >= 0xc0 ) { # We got the first byte only of a multibyte char; remove it. $string = substr( $string, 0, -1 ); } elseif ( $char >= 0x80 && // Use the /s modifier (PCRE_DOTALL) so (.*) also matches newlines preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . '[\xf0-\xf7][\x80-\xbf]{1,2})$/s', $string, $m ) ) { # We chopped in the middle of a character; remove it $string = $m[1]; } } return $string; } /** * Truncate a string of valid HTML to a specified length in bytes, * appending an optional string (e.g., for ellipses), and return valid HTML * * This is only intended for styled/linked text, such as HTML with * tags like <span> and <a>, where the tags are self-contained (valid HTML). * Also, this will not detect things like "display:none" CSS. * * Note: since 1.18 you do not need to leave extra room in $length for ellipses. * * @param string $text HTML string to truncate * @param int $length (zero/positive) Maximum HTML length (including ellipses) * @param string $ellipsis String to append to the truncated text * @return string */ public function truncateHtml( $text, $length, $ellipsis = '...' ) { # Use the localized ellipsis character if ( $ellipsis == '...' ) { $ellipsis = $this->msg( 'ellipsis' )->escaped(); } # Check if there is clearly no need to truncate if ( $length <= 0 ) { return $ellipsis; // no text shown, nothing to format (convention) } elseif ( strlen( $text ) <= $length ) { return $text; // string short enough even *with* HTML (short-circuit) } $dispLen = 0; // innerHTML length so far $testingEllipsis = false; // check if ellipses will make the string longer/equal? $tagType = 0; // 0-open, 1-close $bracketState = 0; // 1-tag start, 2-tag name, 0-neither $entityState = 0; // 0-not entity, 1-entity $tag = $ret = ''; // accumulated tag name, accumulated result string $openTags = []; // open tag stack $maybeState = null; // possible truncation state $textLen = strlen( $text ); $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated for ( $pos = 0; true; ++$pos ) { # Consider truncation once the display length has reached the maximum. # We check if $dispLen > 0 to grab tags for the $neLength = 0 case. # Check that we're not in the middle of a bracket/entity... if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) { if ( !$testingEllipsis ) { $testingEllipsis = true; # Save where we are; we will truncate here unless there turn out to # be so few remaining characters that truncation is not necessary. if ( !$maybeState ) { // already saved? ($neLength = 0 case) $maybeState = [ $ret, $openTags ]; // save state } } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) { # The string in fact does need truncation, the truncation point was OK. // @phan-suppress-next-line PhanTypeInvalidExpressionArrayDestructuring [ $ret, $openTags ] = $maybeState; // reload state $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix $ret .= $ellipsis; // add ellipsis break; } } if ( $pos >= $textLen ) { break; // extra iteration just for the checks above } # Read the next char... $ch = $text[$pos]; $lastCh = $pos ? $text[$pos - 1] : ''; $ret .= $ch; // add to result string if ( $ch == '<' ) { $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML $entityState = 0; // for bad HTML $bracketState = 1; // tag started (checking for backslash) } elseif ( $ch == '>' ) { $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); $entityState = 0; // for bad HTML $bracketState = 0; // out of brackets } elseif ( $bracketState == 1 ) { if ( $ch == '/' ) { $tagType = 1; // close tag (e.g. "</span>") } else { $tagType = 0; // open tag (e.g. "<span>") $tag .= $ch; } $bracketState = 2; // building tag name } elseif ( $bracketState == 2 ) { if ( $ch != ' ' ) { $tag .= $ch; } else { // Name found (e.g. "<a href=..."), add on tag attributes... $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 ); } } elseif ( $bracketState == 0 ) { if ( $entityState ) { if ( $ch == ';' ) { $entityState = 0; $dispLen++; // entity is one displayed char } } else { if ( $neLength == 0 && !$maybeState ) { // Save the state without $ch. We want to *hit* the first // display char (to get tags) but not *use* it if truncating. $maybeState = [ substr( $ret, 0, -1 ), $openTags ]; } if ( $ch == '&' ) { $entityState = 1; // entity found, (e.g. " ") } else { $dispLen++; // this char is displayed // Add the next $max display text chars after this in one swoop... $max = ( $testingEllipsis ? $length : $neLength ) - $dispLen; $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max ); $dispLen += $skipped; $pos += $skipped; } } } } // Close the last tag if left unclosed by bad HTML $this->truncate_endBracket( $tag, $tagType, $text[$textLen - 1], $openTags ); while ( count( $openTags ) > 0 ) { $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags } return $ret; } /** * truncateHtml() helper function * like strcspn() but adds the skipped chars to $ret * * @param string &$ret * @param string $text * @param string $search * @param int $start * @param null|int $len * @return int */ private function truncate_skip( &$ret, $text, $search, $start, $len = null ) { if ( $len === null ) { // -1 means "no limit" for strcspn $len = -1; } elseif ( $len < 0 ) { $len = 0; } $skipCount = 0; if ( $start < strlen( $text ) ) { $skipCount = strcspn( $text, $search, $start, $len ); $ret .= substr( $text, $start, $skipCount ); } return $skipCount; } /** * truncateHtml() helper function * (a) push or pop $tag from $openTags as needed * (b) clear $tag value * * @param string &$tag Current HTML tag name we are looking at * @param int $tagType (0-open tag, 1-close tag) * @param string $lastCh Character before the '>' that ended this tag * @param array &$openTags Open tag stack (not accounting for $tag) */ private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) { $tag = ltrim( $tag ); if ( $tag != '' ) { if ( $tagType == 0 && $lastCh != '/' ) { $openTags[] = $tag; // tag opened (didn't close itself) } elseif ( $tagType == 1 ) { if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) { array_pop( $openTags ); // tag closed } } $tag = ''; } } /** * Grammatical transformations, needed for inflected languages * Invoked by putting {{grammar:case|word}} in a message * * @param string $word * @param string $case * @return string */ public function convertGrammar( $word, $case ) { $grammarForms = $this->config->get( MainConfigNames::GrammarForms ); if ( isset( $grammarForms[$this->getCode()][$case][$word] ) ) { return $grammarForms[$this->getCode()][$case][$word]; } $grammarTransformations = $this->getGrammarTransformations(); if ( isset( $grammarTransformations[$case] ) ) { $forms = $grammarTransformations[$case]; // Some names of grammar rules are aliases for other rules. // In such cases the value is a string rather than object, // so load the actual rules. if ( is_string( $forms ) ) { $forms = $grammarTransformations[$forms]; } foreach ( $forms as $rule ) { $form = $rule[0]; if ( $form === '@metadata' ) { continue; } $replacement = $rule[1]; $regex = '/' . addcslashes( $form, '/' ) . '/u'; $patternMatches = preg_match( $regex, $word ); if ( $patternMatches === false ) { wfLogWarning( 'An error occurred while processing grammar. ' . "Word: '$word'. Regex: /$form/." ); } elseif ( $patternMatches === 1 ) { $word = preg_replace( $regex, $replacement, $word ); break; } } } return $word; } /** * Get the grammar forms for the content language. * * @return array Array of grammar forms * @since 1.20 */ public function getGrammarForms() { $grammarForms = $this->config->get( MainConfigNames::GrammarForms ); if ( isset( $grammarForms[$this->getCode()] ) && is_array( $grammarForms[$this->getCode()] ) ) { return $grammarForms[$this->getCode()]; } return []; } /** * Get the grammar transformations data for the language. * Used like grammar forms, with {{GRAMMAR}} and cases, * but uses pairs of regexes and replacements instead of code. * * @return array[] Array of grammar transformations. * @since 1.28 */ public function getGrammarTransformations() { global $IP; if ( $this->grammarTransformCache !== null ) { return $this->grammarTransformCache; } $grammarDataFile = $IP . "/languages/data/grammarTransformations/{$this->getCode()}.json"; $this->grammarTransformCache = is_readable( $grammarDataFile ) ? FormatJson::decode( file_get_contents( $grammarDataFile ), true ) : []; if ( $this->grammarTransformCache === null ) { throw new RuntimeException( "Invalid grammar data for \"{$this->getCode()}\"." ); } return $this->grammarTransformCache; } /** * Provides an alternative text depending on specified gender. * Usage {{gender:username|masculine|feminine|unknown}}. * username is optional, in which case the gender of the current user is used, * but only in (some) interface messages; otherwise the default gender is used. * * If no forms are given, an empty string is returned. If only one form is * given, it will be returned unconditionally. These details are implied by * the caller and cannot be overridden in subclasses. * * If three forms are given, the default is to use the third (unknown) form. * If fewer than three forms are given, the default is to use the first (masculine) form. * These details can be overridden in subclasses. * * @param string $gender * @param array $forms * * @return string */ public function gender( $gender, $forms ) { if ( !count( $forms ) ) { return ''; } $forms = $this->preConvertPlural( $forms, 2 ); if ( $gender === 'male' ) { return $forms[0]; } if ( $gender === 'female' ) { return $forms[1]; } return $forms[2] ?? $forms[0]; } /** * Plural form transformations, needed for some languages. * For example, there are 3 forms of plural in Russian and Polish, * depending on "count mod 10". See [[w:Plural]] * For English it is pretty simple. * * Invoked by putting {{plural:count|wordform1|wordform2}} * or {{plural:count|wordform1|wordform2|wordform3}} * * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}} * * @param int $count Non-localized number * @param array $forms Different plural forms * @return string Correct form of plural for $count in this language */ public function convertPlural( $count, $forms ) { // Handle explicit n=pluralform cases $forms = $this->handleExplicitPluralForms( $count, $forms ); if ( is_string( $forms ) ) { return $forms; } if ( !count( $forms ) ) { return ''; } $pluralForm = $this->getPluralRuleIndexNumber( $count ); $pluralForm = min( $pluralForm, count( $forms ) - 1 ); return $forms[$pluralForm]; } /** * Handles explicit plural forms for Language::convertPlural() * * In {{PLURAL:$1|0=nothing|one|many}}, 0=nothing will be returned if $1 equals zero. * If an explicitly defined plural form matches the $count, then the * string value is returned. Otherwise the array is returned for further consideration * by CLDR rules or overridden convertPlural(). * * @since 1.23 * * @param int $count Non-localized number * @param string[] $forms Different plural forms * * @return string[]|string */ protected function handleExplicitPluralForms( $count, array $forms ) { foreach ( $forms as $index => $form ) { if ( preg_match( '/\d+=/i', $form ) ) { $pos = strpos( $form, '=' ); if ( substr( $form, 0, $pos ) === (string)$count ) { return substr( $form, $pos + 1 ); } unset( $forms[$index] ); } } return array_values( $forms ); } /** * Checks that convertPlural was given an array and pads it to requested * number of forms by copying the last one. * * @param array $forms * @param int $count Minimum number of forms * @return array Padded array of forms */ protected function preConvertPlural( /* Array */ $forms, $count ) { return array_pad( $forms, $count, end( $forms ) ); } /** * Some languages provide translations in different levels of formality (or manner of address), * for example using the T-V distinction. Since most translations do not differ by formality, * we provide magic word, e.g. `{{#FORMAL:Du|Sie}}` that allows translating only once * and defining different forms inline. This only works for languages that have a fallback * relation. For example `es` and `es-formal`. The return value indicates which form of * the forms given should be used for this language. Zero-based index. * * @since 1.43 */ public function getFormalityIndex(): int { return $this->localisationCache->getItem( $this->mCode, 'formalityIndex' ) ?? 0; } /** * Wraps argument with unicode control characters for directionality safety * * This solves the problem where directionality-neutral characters at the edge of * the argument string get interpreted with the wrong directionality from the * enclosing context, giving renderings that look corrupted like "(Ben_(WMF". * * The wrapping is LRE...PDF or RLE...PDF, depending on the detected * directionality of the argument string, using the BIDI algorithm's own "First * strong directional codepoint" rule. Essentially, this works round the fact that * there is no embedding equivalent of U+2068 FSI (isolation with heuristic * direction inference). The latter is cleaner but still not widely supported. * * Use of hidden control characters when the output allows use of HTML markup * is discouraged and the recommendation is to use bdi HTML tag which doesn't * have the issue of hidden characters ending up in user clipboard in text * copy paste, see T375975. * * @deprecated since 1.43, use bdi HTML tag in HTML context where possible. * @param string $text Text to wrap * @return string Text, wrapped in LRE...PDF or RLE...PDF or nothing */ public function embedBidi( $text = '' ) { $dir = self::strongDirFromContent( $text ); if ( $dir === 'ltr' ) { // Wrap in LEFT-TO-RIGHT EMBEDDING ... POP DIRECTIONAL FORMATTING return self::LRE . $text . self::PDF; } if ( $dir === 'rtl' ) { // Wrap in RIGHT-TO-LEFT EMBEDDING ... POP DIRECTIONAL FORMATTING return self::RLE . $text . self::PDF; } // No strong directionality: do not wrap return $text; } /** * Get an array of suggested block durations from MediaWiki:Ipboptions * @todo FIXME: This uses a rather odd syntax for the options, should it be converted * to the standard "**<duration>|<displayname>" format? * @since 1.42 * @param bool $includeOther Whether to include the 'other' option in the list of * suggestions * @return string[] */ public function getBlockDurations( $includeOther = true ): array { $msg = $this->msg( 'ipboptions' )->text(); if ( $msg == '-' ) { return []; } $a = XmlSelect::parseOptionsMessage( $msg ); if ( $a && $includeOther ) { // If options exist, add other to the end instead of the beginning (which // is what happens by default). $a[ $this->msg( 'ipbother' )->text() ] = 'other'; } return $a; } /** * @todo Maybe translate block durations. Note that this function is somewhat misnamed: it * deals with translating the *duration* ("1 week", "4 days", etc.), not the expiry time * (which is an absolute timestamp). Please note: do NOT add this blindly, as it is used * on old expiry lengths recorded in log entries. You'd need to provide the start date to * match up with it. * * @param string $str The validated block duration in English * @param UserIdentity|null $user User to use timezone from or null for the context user * @param int $now Current timestamp, for formatting relative block durations * @return string Somehow translated block duration * @see LanguageFi.php file for an implementation example */ public function translateBlockExpiry( $str, ?UserIdentity $user = null, $now = 0 ) { $duration = $this->getBlockDurations(); $show = array_search( $str, $duration, true ); if ( $show !== false ) { return trim( $show ); } if ( wfIsInfinity( $str ) ) { foreach ( $duration as $show => $value ) { if ( wfIsInfinity( $value ) ) { return trim( $show ); } } } // If all else fails, return a standard duration or timestamp description. $time = strtotime( $str, $now ); if ( $time === false ) { // Unknown format. Return it as-is in case. return $str; } elseif ( $time !== strtotime( $str, $now + 1 ) ) { // It's a relative timestamp. // The result differs based on current time, so the difference // is a fixed duration length. return $this->formatDurationBetweenTimestamps( $time, $now ); } else { // It's an absolute timestamp. if ( $time === 0 ) { // wfTimestamp() handles 0 as current time instead of epoch. $time = '19700101000000'; } if ( $user ) { return $this->userTimeAndDate( $time, $user ); } return $this->timeanddate( $time ); } } /** * Languages like Chinese need to be segmented in order for the diff * to be of any use * * @param string $text * @return string */ public function segmentForDiff( $text ) { return $text; } /** * And unsegment to show the result * * @param string $text * @return string */ public function unsegmentForDiff( $text ) { return $text; } /** * A regular expression to match legal word-trailing characters * which should be merged onto a link of the form [[foo]]bar. * * @return string */ public function linkTrail() { return $this->localisationCache->getItem( $this->mCode, 'linkTrail' ); } /** * A regular expression character set to match legal word-prefixing * characters which should be merged onto a link of the form foo[[bar]]. * * @return string */ public function linkPrefixCharset() { return $this->localisationCache->getItem( $this->mCode, 'linkPrefixCharset' ); } /** * Compare with another language object * * @since 1.28 * @param Language $lang * @return bool */ public function equals( Language $lang ) { return $lang === $this || $lang->getCode() === $this->mCode; } /** * Get the internal language code for this language object * * NOTE: The return value of this function is NOT HTML-safe and must be escaped with * htmlspecialchars() or similar * * @return string */ public function getCode() { return $this->mCode; } /** * Get the code in BCP 47 format which we can use * inside html lang="" tags. * * NOTE: The return value of this function is NOT HTML-safe and must be escaped with * htmlspecialchars() or similar. * * @since 1.19 * @return string */ public function getHtmlCode() { $this->mHtmlCode ??= LanguageCode::bcp47( $this->getCode() ); return $this->mHtmlCode; } /** * Implement the Bcp47Code interface. This is an alias for * ::getHtmlCode(). * * @since 1.40 * @return string */ public function toBcp47Code(): string { return $this->getHtmlCode(); } /** * Compare this Language object to a Bcp47Code. This is part of the * Bcp47Code interface. * @param Bcp47Code $other * @return bool * @since 1.41 */ public function isSameCodeAs( Bcp47Code $other ): bool { if ( $this === $other ) { return true; } if ( $other instanceof Language ) { // Compare the mediawiki-internal code return $this->equals( $other ); } // Bcp-47 codes are case insensitive. // See Bcp47CodeValue::isSameCode() return strcasecmp( $this->toBcp47Code(), $other->toBcp47Code() ) === 0; } /** * Get the language code from a file name. Inverse of getFileName() * * @param string $filename $prefix . $languageCode . $suffix * @param string $prefix Prefix before the language code * @param string $suffix Suffix after the language code * @return string|false Language code, or false if $prefix or $suffix isn't found */ public static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) { $m = null; preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' . preg_quote( $suffix, '/' ) . '/', $filename, $m ); if ( !count( $m ) ) { return false; } return str_replace( '_', '-', strtolower( $m[1] ) ); } /** * @param string $talk * @return string */ private function fixVariableInNamespace( $talk ) { if ( strpos( $talk, '$1' ) === false ) { return $talk; } $talk = str_replace( '$1', $this->config->get( MainConfigNames::MetaNamespace ), $talk ); # Allow grammar transformations # Allowing full message-style parsing would make simple requests # such as action=raw much more expensive than they need to be. # This will hopefully cover most cases. $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i', function ( $m ) { return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) ); }, $talk ); return str_replace( ' ', '_', $talk ); } /** * Decode an expiry (block, protection, etc.) which has come from the DB * * @param string $expiry Database expiry String * @param true|int $format True to process using language functions, or TS_ constant * to return the expiry in a given timestamp * @param string $infinity If $format is not true, use this string for infinite expiry * @param UserIdentity|null $user If $format is true, use this user for date format * @return string * @since 1.18 * @since 1.36 $user was added */ public function formatExpiry( $expiry, $format = true, $infinity = 'infinity', $user = null ) { static $dbInfinity; $dbInfinity ??= MediaWikiServices::getInstance()->getConnectionProvider() ->getReplicaDatabase() ->getInfinity(); if ( $expiry == '' || $expiry === 'infinity' || $expiry == $dbInfinity ) { return $format === true ? $this->getMessageFromDB( 'infiniteblock' ) : $infinity; } else { if ( $format === true ) { return $user ? $this->userTimeAndDate( $expiry, $user ) : $this->timeanddate( $expiry, /* User preference timezone */ true ); } return wfTimestamp( $format, $expiry ); } } /** * Formats a time given in seconds into a string representation of that time. * * @param int|float $seconds * @param array $format An optional argument that formats the returned string in different ways: * If $format['avoid'] === 'avoidhours': don't show hours, just show days * If $format['avoid'] === 'avoidseconds': don't show seconds if $seconds >= 1 hour, * If $format['avoid'] === 'avoidminutes': don't show seconds/minutes if $seconds > 48 hours, * If $format['noabbrevs'] is true: use 'seconds' and friends instead of 'seconds-abbrev' * and friends. * @note For backwards compatibility, $format may also be one of the strings 'avoidseconds' * or 'avoidminutes'. * @return string */ public function formatTimePeriod( $seconds, $format = [] ) { if ( !is_array( $format ) ) { $format = [ 'avoid' => $format ]; // For backwards compatibility } if ( !isset( $format['avoid'] ) ) { $format['avoid'] = false; } if ( !isset( $format['noabbrevs'] ) ) { $format['noabbrevs'] = false; } $secondsMsg = $this->msg( $format['noabbrevs'] ? 'seconds' : 'seconds-abbrev' ); $minutesMsg = $this->msg( $format['noabbrevs'] ? 'minutes' : 'minutes-abbrev' ); $hoursMsg = $this->msg( $format['noabbrevs'] ? 'hours' : 'hours-abbrev' ); $daysMsg = $this->msg( $format['noabbrevs'] ? 'days' : 'days-abbrev' ); $space = $this->msg( 'word-separator' )->text(); if ( round( $seconds * 10 ) < 100 ) { $s = $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) ); $s = $secondsMsg->params( $s )->text(); } elseif ( round( $seconds ) < 60 ) { $s = $this->formatNum( round( $seconds ) ); $s = $secondsMsg->params( $s )->text(); } elseif ( round( $seconds ) < 3600 ) { $minutes = floor( $seconds / 60 ); $secondsPart = round( fmod( $seconds, 60 ) ); if ( $secondsPart == 60 ) { $secondsPart = 0; $minutes++; } $s = $minutesMsg->params( $this->formatNum( $minutes ) )->text(); $s .= $space; $s .= $secondsMsg->params( $this->formatNum( $secondsPart ) )->text(); } elseif ( round( $seconds ) <= 2 * 86400 ) { $hours = floor( $seconds / 3600 ); $minutes = floor( ( $seconds - $hours * 3600 ) / 60 ); $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 ); if ( $secondsPart == 60 ) { $secondsPart = 0; $minutes++; } if ( $minutes == 60 ) { $minutes = 0; $hours++; } $s = $hoursMsg->params( $this->formatNum( $hours ) )->text(); $s .= $space; $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text(); if ( !in_array( $format['avoid'], [ 'avoidseconds', 'avoidminutes', 'avoidhours' ] ) ) { $s .= $space . $secondsMsg->params( $this->formatNum( $secondsPart ) )->text(); } } else { $days = floor( $seconds / 86400 ); if ( $format['avoid'] === 'avoidhours' ) { $hours = round( ( $seconds - $days * 86400 ) / 3600 ); if ( $hours == 24 ) { $days++; } $s = $daysMsg->params( $this->formatNum( $days ) )->text(); } elseif ( $format['avoid'] === 'avoidminutes' ) { $hours = round( ( $seconds - $days * 86400 ) / 3600 ); if ( $hours == 24 ) { $hours = 0; $days++; } $s = $daysMsg->params( $this->formatNum( $days ) )->text(); $s .= $space; $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text(); } elseif ( $format['avoid'] === 'avoidseconds' ) { $hours = floor( ( $seconds - $days * 86400 ) / 3600 ); $minutes = round( ( $seconds - $days * 86400 - $hours * 3600 ) / 60 ); if ( $minutes == 60 ) { $minutes = 0; $hours++; } if ( $hours == 24 ) { $hours = 0; $days++; } $s = $daysMsg->params( $this->formatNum( $days ) )->text(); $s .= $space; $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text(); $s .= $space; $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text(); } else { $s = $daysMsg->params( $this->formatNum( $days ) )->text(); $s .= $space; $s .= $this->formatTimePeriod( $seconds - $days * 86400, $format ); } } return $s; } /** * Format a bitrate for output, using an appropriate * unit (bps, kbps, Mbps, Gbps, Tbps, Pbps, Ebps, Zbps, Ybps, Rbps or Qbps) according to * the magnitude in question. * * This use base 1000. For base 1024 use formatSize(), for another base * see formatComputingNumbers(). * * @param int $bps * @return string */ public function formatBitrate( $bps ) { // messages used: bitrate-bits, bitrate-kilobits, bitrate-megabits, bitrate-gigabits, bitrate-terabits, // bitrate-petabits, bitrate-exabits, bitrate-zettabits, bitrate-yottabits, bitrate-ronnabits, // bitrate-quettabits return $this->formatComputingNumbers( $bps, 1000, "bitrate-$1bits" ); } /** * @param int $size Size of the unit * @param int $boundary Size boundary (1000, or 1024 in most cases) * @param string $messageKey Message key to be used * @return string */ public function formatComputingNumbers( $size, $boundary, $messageKey ) { if ( $size <= 0 ) { return str_replace( '$1', $this->formatNum( $size ), $this->getMessageFromDB( str_replace( '$1', '', $messageKey ) ) ); } $sizes = [ '', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zetta', 'yotta', 'ronna', 'quetta' ]; $index = 0; $maxIndex = count( $sizes ) - 1; while ( $size >= $boundary && $index < $maxIndex ) { $index++; $size /= $boundary; } // For small sizes no decimal places necessary $round = 0; if ( $index > 1 ) { // For MB and larger units, two decimal places are smarter $round = 2; } $msg = str_replace( '$1', $sizes[$index], $messageKey ); $size = round( $size, $round ); $text = $this->getMessageFromDB( $msg ); return str_replace( '$1', $this->formatNum( $size ), $text ); } /** * Format a size in bytes for output, using an appropriate * unit (B, KB, MB, GB, TB, PB, EB, ZB, YB, RB or QB) according to the magnitude in question * * This method use base 1024. For base 1000 use formatBitrate(), for * another base see formatComputingNumbers() * * @param int $size Size to format * @return string Plain text (not HTML) */ public function formatSize( $size ) { // messages used: size-bytes, size-kilobytes, size-megabytes, size-gigabytes, size-terabytes, // size-petabytes, size-exabytes, size-zettabytes, size-yottabytes, size-ronnabytes, size-quettabytes return $this->formatComputingNumbers( $size, 1024, "size-$1bytes" ); } /** * Make a list item, used by various special pages * * @param string $page Page link * @param string $details HTML safe text between brackets * @return string HTML escaped */ public function specialList( $page, $details ) { if ( !$details ) { return $page; } return Html::rawElement( 'bdi', [ 'dir' => $this->getDir() ], $page ) . $this->msg( 'word-separator' )->escaped() . $this->msg( 'parentheses' )->rawParams( $details )->escaped(); } private function getNumberFormatter(): NumberFormatter { if ( $this->numberFormatter === null ) { $digitGroupingPattern = $this->digitGroupingPattern(); $code = $this->getCode(); if ( !( $this->config->get( MainConfigNames::TranslateNumerals ) && $this->langNameUtils->isValidCode( $code ) ) ) { $code = Locale::getDefault(); // POSIX system default locale } $fmt = $this->createNumberFormatter( $code, $digitGroupingPattern ); if ( !$fmt ) { $fallbacks = $this->getFallbackLanguages(); foreach ( $fallbacks as $fallbackCode ) { $fmt = $this->createNumberFormatter( $fallbackCode, $digitGroupingPattern ); if ( $fmt ) { break; } } if ( !$fmt ) { throw new RuntimeException( 'Could not instance NumberFormatter for ' . $code . ' and all fallbacks' ); } } $this->numberFormatter = $fmt; } return $this->numberFormatter; } private function createNumberFormatter( string $code, ?string $digitGroupingPattern ): ?NumberFormatter { try { if ( $digitGroupingPattern ) { return new NumberFormatter( $code, NumberFormatter::PATTERN_DECIMAL, $digitGroupingPattern ); } // @suppress PhanParamTooFew Phan thinks this always requires 3 parameters, that's wrong return new NumberFormatter( $code, NumberFormatter::DECIMAL ); } catch ( \ValueError $_ ) { // Value Errors are thrown since php8.4 for invalid locales return null; } } /** * Get the compiled plural rules for the language * * @since 1.20 * @return array<int,string> Associative array with plural form, and plural rule as key-value pairs */ public function getCompiledPluralRules() { $pluralRules = $this->localisationCache->getItem( strtolower( $this->mCode ), 'compiledPluralRules' ); if ( !$pluralRules ) { $fallbacks = $this->getFallbackLanguages(); foreach ( $fallbacks as $fallbackCode ) { $pluralRules = $this->localisationCache ->getItem( strtolower( $fallbackCode ), 'compiledPluralRules' ); if ( $pluralRules ) { break; } } } return $pluralRules; } /** * Get the plural rules for the language * * @since 1.20 * @return array<int,string> Associative array with plural form number and plural rule as key-value pairs */ public function getPluralRules() { $pluralRules = $this->localisationCache->getItem( strtolower( $this->mCode ), 'pluralRules' ); if ( !$pluralRules ) { $fallbacks = $this->getFallbackLanguages(); foreach ( $fallbacks as $fallbackCode ) { $pluralRules = $this->localisationCache ->getItem( strtolower( $fallbackCode ), 'pluralRules' ); if ( $pluralRules ) { break; } } } return $pluralRules; } /** * Get the plural rule types for the language * * @since 1.22 * @return array<int,string> Associative array with plural form number and plural rule type as key-value pairs */ public function getPluralRuleTypes() { $pluralRuleTypes = $this->localisationCache->getItem( strtolower( $this->mCode ), 'pluralRuleTypes' ); if ( !$pluralRuleTypes ) { $fallbacks = $this->getFallbackLanguages(); foreach ( $fallbacks as $fallbackCode ) { $pluralRuleTypes = $this->localisationCache ->getItem( strtolower( $fallbackCode ), 'pluralRuleTypes' ); if ( $pluralRuleTypes ) { break; } } } return $pluralRuleTypes; } /** * Find the index number of the plural rule appropriate for the given number * * @param int $number * @return int The index number of the plural rule */ public function getPluralRuleIndexNumber( $number ) { $pluralRules = $this->getCompiledPluralRules(); return Evaluator::evaluateCompiled( $number, $pluralRules ); } /** * Find the plural rule type appropriate for the given number. * For example, if the language is set to Arabic, getPluralType(5) should * return 'few'. * * @since 1.22 * @param int $number * @return string The name of the plural rule type, e.g., one, two, few, many */ public function getPluralRuleType( $number ) { $index = $this->getPluralRuleIndexNumber( $number ); $pluralRuleTypes = $this->getPluralRuleTypes(); return $pluralRuleTypes[$index] ?? 'other'; } /** * Return the LanguageConverter for this language, * convenience function for use in the language classes only * * @return ILanguageConverter */ protected function getConverterInternal() { return $this->converterFactory->getLanguageConverter( $this ); } /** * Get a HookContainer, for hook metadata and running extension hooks * * @since 1.35 * @return HookContainer */ protected function getHookContainer() { return $this->hookContainer; } /** * Get a HookRunner, for running core hooks * * @internal This is for use by core only. Hook interfaces may be removed * without notice. * @since 1.35 * @return HookRunner */ protected function getHookRunner() { return $this->hookRunner; } /** * @internal Only for use by the 'mediawiki.language' ResourceLoader module and * generateJqueryMsgData.php * @return array */ public function getJsData() { return [ 'digitTransformTable' => $this->digitTransformTable(), 'separatorTransformTable' => $this->separatorTransformTable(), 'minimumGroupingDigits' => $this->minimumGroupingDigits(), 'formalityIndex' => $this->getFormalityIndex(), 'grammarForms' => $this->getGrammarForms(), 'grammarTransformations' => $this->getGrammarTransformations(), 'pluralRules' => $this->getPluralRules(), 'digitGroupingPattern' => $this->digitGroupingPattern(), 'fallbackLanguages' => $this->getFallbackLanguages(), 'bcp47Map' => LanguageCode::getNonstandardLanguageCodeMapping(), ]; } } /** @deprecated class alias since 1.43 */ class_alias( Language::class, 'Language' ); PK ! �BЌk k LanguageConverterSpecific.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use MediaWiki\Title\Title; /** * A class that extends LanguageConverter with specific behaviour. * * @ingroup Language */ abstract class LanguageConverterSpecific extends LanguageConverter { /** * A function wrapper: * - if there is no selected variant, leave the link * names as they were * - do not try to find variants for usernames * * @param string &$link * @param Title &$nt * @param bool $ignoreOtherCond */ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { // check for user namespace if ( is_object( $nt ) ) { $ns = $nt->getNamespace(); if ( $ns === NS_USER || $ns === NS_USER_TALK ) { return; } } $oldlink = $link; parent::findVariantLink( $link, $nt, $ignoreOtherCond ); if ( $this->getPreferredVariant() == $this->getMainCode() ) { $link = $oldlink; } } } PK ! s�t�� � TrivialLanguageConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use MediaWiki\Language\ILanguageConverter; use MediaWiki\Language\Language; use MediaWiki\Linker\LinkTarget; use MediaWiki\MediaWikiServices; use MediaWiki\StubObject\StubUserLang; use MediaWiki\Title\TitleFormatter; /** * A trivial language converter. * * For Languages which do not implement variant * conversion, for example, German, TrivialLanguageConverter is provided rather than a * LanguageConverter when asked for their converter. The TrivialLanguageConverter just * returns text unchanged, i.e., it doesn't do any conversion. * * See https://www.mediawiki.org/wiki/Writing_systems#LanguageConverter. * * @since 1.35 * @ingroup Language */ class TrivialLanguageConverter implements ILanguageConverter { /** * @var Language */ protected $language; /** * @var TitleFormatter */ private $titleFormatter; /** * Creates a converter for languages that don't have variants. This method is internal * and should be called for LanguageConverterFactory only * * @param Language|StubUserLang $langobj * @param TitleFormatter|null $titleFormatter * * @internal */ public function __construct( $langobj, ?TitleFormatter $titleFormatter = null ) { $this->language = $langobj; $this->titleFormatter = $titleFormatter ?? MediaWikiServices::getInstance()->getTitleFormatter(); } public function autoConvert( $text, $variant = false ) { return $text; } public function autoConvertToAllVariants( $text ) { return [ $this->language->getCode() => $text ]; } public function convert( $t ) { return $t; } public function convertTo( $text, $variant, bool $clearState = true ) { return $text; } public function convertSplitTitle( $title ) { $mainText = $this->titleFormatter->getText( $title ); $index = $title->getNamespace(); try { $nsWithUnderscores = $this->titleFormatter->getNamespaceName( $index, $mainText ); } catch ( InvalidArgumentException $e ) { // T165149: see MediaWikiTitleCodec::formatTitle() $nsWithUnderscores = $this->language->getNsText( NS_SPECIAL ); $mainText = "Badtitle/NS$index:$mainText"; } $nsText = str_replace( '_', ' ', $nsWithUnderscores ); return [ $nsText, ':', $mainText ]; } public function convertTitle( $title ) { return $this->titleFormatter->getPrefixedText( $title ); } public function convertNamespace( $index, $variant = null ) { return $this->language->getFormattedNsText( $index ); } public function getVariants() { return [ $this->language->getCode() ]; } public function getVariantFallbacks( $variant ) { return $this->language->getCode(); } public function getPreferredVariant() { return $this->language->getCode(); } public function getDefaultVariant() { return $this->language->getCode(); } public function getURLVariant() { return ''; } public function getConvRuleTitle() { return false; } public function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { } public function getExtraHashOptions() { return ''; } public function guessVariant( $text, $variant ) { return false; } public function markNoConversion( $text, $noParse = false ) { return $text; } public function convertCategoryKey( $key ) { return $key; } public function validateVariant( $variant = null ) { if ( $variant === null ) { return null; } $variant = strtolower( $variant ); return $variant === $this->language->getCode() ? $variant : null; } public function translate( $text, $variant ) { return $text; } public function updateConversionTable( LinkTarget $linkTarget ) { } /** * Used by test suites which need to reset the converter state. * * Called by ParserTestRunner with the help of TestingAccessWrapper */ private function reloadTables() { } public function hasVariants() { return count( $this->getVariants() ) > 1; } public function hasVariant( $variant ) { return $variant && ( $variant === $this->validateVariant( $variant ) ); } public function convertHtml( $text ) { return htmlspecialchars( $this->convert( $text ) ); } } PK ! �(p2B: B: ConverterRule.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com> */ namespace MediaWiki\Language; use MediaWiki\Logger\LoggerFactory; use StringUtils; /** * The rules used for language conversion, this processes the rules * extracted by Parser from the `-{ }-` wikitext syntax. * * @ingroup Language */ class ConverterRule { /** * @var string original text in -{text}- */ public $mText; /** * @var LanguageConverter */ public $mConverter; /** @var string|false */ public $mRuleDisplay = ''; /** @var string|false */ public $mRuleTitle = false; /** * @var string the text of the rules */ public $mRules = ''; /** @var string */ public $mRulesAction = 'none'; /** @var array */ public $mFlags = []; /** @var array */ public $mVariantFlags = []; /** @var array */ public $mConvTable = []; /** * @var array of the translation in each variant */ public $mBidtable = []; /** * @var array of the translation in each variant */ public $mUnidtable = []; /** * @param string $text The text between -{ and }- * @param LanguageConverter $converter */ public function __construct( $text, LanguageConverter $converter ) { $this->mText = $text; $this->mConverter = $converter; } /** * Check if the variant array is in the convert array. * * @param array|string $variants Variant language code * @return string|false Translated text */ public function getTextInBidtable( $variants ) { $variants = (array)$variants; if ( !$variants ) { return false; } foreach ( $variants as $variant ) { if ( isset( $this->mBidtable[$variant] ) ) { return $this->mBidtable[$variant]; } } return false; } /** * Parse flags with syntax -{FLAG| ... }- */ private function parseFlags() { $text = $this->mText; $flags = []; $variantFlags = []; $sepPos = strpos( $text, '|' ); if ( $sepPos !== false ) { $validFlags = $this->mConverter->getFlags(); $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); foreach ( $f as $ff ) { $ff = trim( $ff ); if ( isset( $validFlags[$ff] ) ) { $flags[$validFlags[$ff]] = true; } } $text = strval( substr( $text, $sepPos + 1 ) ); } if ( !$flags ) { $flags['S'] = true; } elseif ( isset( $flags['R'] ) ) { // remove other flags $flags = [ 'R' => true ]; } elseif ( isset( $flags['N'] ) ) { // remove other flags $flags = [ 'N' => true ]; } elseif ( isset( $flags['-'] ) ) { // remove other flags $flags = [ '-' => true ]; } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) { $flags['H'] = true; } elseif ( isset( $flags['H'] ) ) { // replace A flag, and remove other flags except T $temp = [ '+' => true, 'H' => true ]; if ( isset( $flags['T'] ) ) { $temp['T'] = true; } if ( isset( $flags['D'] ) ) { $temp['D'] = true; } $flags = $temp; } else { if ( isset( $flags['A'] ) ) { $flags['+'] = true; $flags['S'] = true; } if ( isset( $flags['D'] ) ) { unset( $flags['S'] ); } // try to find flags like "zh-hans", "zh-hant" // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() ); if ( $variantFlags ) { $variantFlags = array_fill_keys( $variantFlags, true ); $flags = []; } } $this->mVariantFlags = $variantFlags; $this->mRules = $text; $this->mFlags = $flags; } /** * Generate conversion table. */ private function parseRules() { $rules = $this->mRules; $bidtable = []; $unidtable = []; $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); // Split text according to $varsep_pattern, but ignore semicolons from HTML entities $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules ); $choice = preg_split( $varsep_pattern, $rules ); // @phan-suppress-next-line PhanTypeComparisonFromArray if ( $choice === false ) { $error = preg_last_error(); $errorText = preg_last_error_msg(); LoggerFactory::getInstance( 'parser' )->warning( 'ConverterRule preg_split error: {code} {errorText}', [ 'code' => $error, 'errorText' => $errorText ] ); $choice = []; } $choice = str_replace( "\x01", ';', $choice ); foreach ( $choice as $c ) { $v = explode( ':', $c, 2 ); if ( count( $v ) !== 2 ) { // syntax error, skip continue; } $to = trim( $v[1] ); $v = trim( $v[0] ); $u = explode( '=>', $v, 2 ); $vv = $this->mConverter->validateVariant( $v ); // if $to is empty (which is also used as $from in bidtable), // strtr() could return a wrong result. if ( count( $u ) === 1 && $to !== '' && $vv ) { $bidtable[$vv] = $to; } elseif ( count( $u ) === 2 ) { $from = trim( $u[0] ); $v = trim( $u[1] ); $vv = $this->mConverter->validateVariant( $v ); // if $from is empty, strtr() could return a wrong result. if ( array_key_exists( $vv, $unidtable ) && !is_array( $unidtable[$vv] ) && $from !== '' && $vv ) { $unidtable[$vv] = [ $from => $to ]; } elseif ( $from !== '' && $vv ) { $unidtable[$vv][$from] = $to; } } // syntax error, pass if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) { $bidtable = []; $unidtable = []; break; } } $this->mBidtable = $bidtable; $this->mUnidtable = $unidtable; } /** * @return string */ private function getRulesDesc() { $codesep = $this->mConverter->getDescCodeSeparator(); $varsep = $this->mConverter->getDescVarSeparator(); $text = ''; foreach ( $this->mBidtable as $k => $v ) { $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep"; } foreach ( $this->mUnidtable as $k => $a ) { foreach ( $a as $from => $to ) { $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] . "$codesep$to$varsep"; } } return $text; } /** * Parse rules conversion. * * @param string $variant * * @return string */ private function getRuleConvertedStr( $variant ) { $bidtable = $this->mBidtable; $unidtable = $this->mUnidtable; if ( count( $bidtable ) + count( $unidtable ) === 0 ) { return $this->mRules; } // display current variant in bidirectional array $disp = $this->getTextInBidtable( $variant ); // or display current variant in fallbacks if ( $disp === false ) { $disp = $this->getTextInBidtable( $this->mConverter->getVariantFallbacks( $variant ) ); } // or display current variant in unidirectional array if ( $disp === false && array_key_exists( $variant, $unidtable ) ) { $disp = array_values( $unidtable[$variant] )[0]; } // or display first text under disable manual convert if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) { if ( count( $bidtable ) > 0 ) { $disp = array_values( $bidtable )[0]; } else { $disp = array_values( array_values( $unidtable )[0] )[0]; } } return $disp; } /** * Similar to getRuleConvertedStr(), but this prefers to use MediaWiki\Title\Title; * use original page title if $variant === $this->mConverter->getMainCode(), * and may return false in this case (so this title conversion rule * will be ignored and the original title is shown). * * @since 1.22 * @param string $variant The variant code to display page title in * @return string|false The converted title or false if just page name */ private function getRuleConvertedTitle( $variant ) { if ( $variant === $this->mConverter->getMainCode() ) { // If a string targeting exactly this variant is set, // use it. Otherwise, just return false, so the real // page name can be shown (and because variant === main, // there'll be no further automatic conversion). $disp = $this->getTextInBidtable( $variant ); if ( $disp ) { return $disp; } if ( array_key_exists( $variant, $this->mUnidtable ) ) { $disp = array_values( $this->mUnidtable[$variant] )[0]; } // Assigned above or still false. return $disp; } return $this->getRuleConvertedStr( $variant ); } /** * Generate conversion table for all text. */ private function generateConvTable() { // Special case optimisation if ( !$this->mBidtable && !$this->mUnidtable ) { $this->mConvTable = []; return; } $bidtable = $this->mBidtable; $unidtable = $this->mUnidtable; $manLevel = $this->mConverter->getManualLevel(); $vmarked = []; foreach ( $this->mConverter->getVariants() as $v ) { /* for bidirectional array fill in the missing variants, if any, with fallbacks */ if ( !isset( $bidtable[$v] ) ) { $variantFallbacks = $this->mConverter->getVariantFallbacks( $v ); $vf = $this->getTextInBidtable( $variantFallbacks ); if ( $vf ) { $bidtable[$v] = $vf; } } if ( isset( $bidtable[$v] ) ) { foreach ( $vmarked as $vo ) { // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- // or -{H|zh:WordZh;zh-tw:WordTw}- // or -{-|zh:WordZh;zh-tw:WordTw}- // to introduce a custom mapping between // words WordZh and WordTw in the whole text if ( $manLevel[$v] === 'bidirectional' ) { $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; } if ( $manLevel[$vo] === 'bidirectional' ) { $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; } } $vmarked[] = $v; } /* for unidirectional array fill to convert tables */ if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' ) && isset( $unidtable[$v] ) ) { if ( isset( $this->mConvTable[$v] ) ) { $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v]; } else { $this->mConvTable[$v] = $unidtable[$v]; } } } } /** * Parse rules and flags. * @param string|null $variant Variant language code */ public function parse( $variant = null ) { if ( !$variant ) { $variant = $this->mConverter->getPreferredVariant(); } $this->parseFlags(); $flags = $this->mFlags; // convert to specified variant // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- if ( $this->mVariantFlags ) { // check if current variant in flags if ( isset( $this->mVariantFlags[$variant] ) ) { // then convert <text to convert> to current language $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variant ); } else { // if the current variant is not in flags, // then we check its fallback variants. $variantFallbacks = $this->mConverter->getVariantFallbacks( $variant ); if ( is_array( $variantFallbacks ) ) { foreach ( $variantFallbacks as $variantFallback ) { // if current variant's fallback exist in flags if ( isset( $this->mVariantFlags[$variantFallback] ) ) { // then convert <text to convert> to fallback language $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variantFallback ); break; } } } } $this->mFlags = $flags = [ 'R' => true ]; } if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags $this->mRules = str_replace( '=>', '=>', $this->mRules ); $this->parseRules(); } $rules = $this->mRules; if ( !$this->mBidtable && !$this->mUnidtable ) { if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { // fill all variants if the text in -{A/H/-|text}- is non-empty but without rules if ( $rules !== '' ) { foreach ( $this->mConverter->getVariants() as $v ) { $this->mBidtable[$v] = $rules; } } } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { $this->mFlags = $flags = [ 'R' => true ]; } } $this->mRuleDisplay = false; foreach ( $flags as $flag => $unused ) { switch ( $flag ) { case 'R': // if we don't do content convert, still strip the -{}- tags $this->mRuleDisplay = $rules; break; case 'N': // process N flag: output current variant name $ruleVar = trim( $rules ); $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? ''; break; case 'D': // process D flag: output rules description $this->mRuleDisplay = $this->getRulesDesc(); break; case 'H': // process H,- flag or T only: output nothing $this->mRuleDisplay = ''; break; case '-': $this->mRulesAction = 'remove'; $this->mRuleDisplay = ''; break; case '+': $this->mRulesAction = 'add'; $this->mRuleDisplay = ''; break; case 'S': $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); break; case 'T': $this->mRuleTitle = $this->getRuleConvertedTitle( $variant ); $this->mRuleDisplay = ''; break; default: // ignore unknown flags (but see error-case below) } } if ( $this->mRuleDisplay === false ) { $this->mRuleDisplay = '<span class="error">' . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() . '</span>'; } $this->generateConvTable(); } /** * Checks if there are conversion rules. * @return bool */ public function hasRules() { return $this->mRules !== ''; } /** * Get display text on markup -{...}- * @return string */ public function getDisplay() { return $this->mRuleDisplay; } /** * Get converted title. * @return string|false */ public function getTitle() { return $this->mRuleTitle; } /** * Return how to deal with conversion rules. * @return string */ public function getRulesAction() { return $this->mRulesAction; } /** * Get conversion table. (bidirectional and unidirectional * conversion table) * @return array */ public function getConvTable() { return $this->mConvTable; } /** * Get conversion rules string. * @return string */ public function getRules() { return $this->mRules; } /** * Get conversion flags. * @return array */ public function getFlags() { return $this->mFlags; } } /** @deprecated class alias since 1.43 */ class_alias( ConverterRule::class, 'ConverterRule' ); PK ! �) LanguageConverterFactory.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ namespace MediaWiki\Languages; use BanConverter; use CrhConverter; use EnConverter; use GanConverter; use IuConverter; use KuConverter; use MediaWiki\Config\ServiceOptions; use MediaWiki\Language\ILanguageConverter; use MediaWiki\Language\Language; use MediaWiki\MainConfigNames; use MediaWiki\StubObject\StubUserLang; use MniConverter; use ShConverter; use ShiConverter; use SrConverter; use TgConverter; use TlyConverter; use TrivialLanguageConverter; use UzConverter; use Wikimedia\ObjectFactory\ObjectFactory; use WuuConverter; use ZghConverter; use ZhConverter; /** * An interface for creating language converters. * * @since 1.35 * @ingroup Language */ class LanguageConverterFactory { /** @var ILanguageConverter[] */ private $cache = []; /** * @var array */ private $converterList = [ 'ban' => [ 'class' => BanConverter::class, ], 'crh' => [ 'class' => CrhConverter::class, ], 'gan' => [ 'class' => GanConverter::class, ], 'iu' => [ 'class' => IuConverter::class, ], 'ku' => [ 'class' => KuConverter::class, ], 'mni' => [ 'class' => MniConverter::class, ], 'shi' => [ 'class' => ShiConverter::class, ], 'sh' => [ 'class' => ShConverter::class, ], 'sr' => [ 'class' => SrConverter::class, ], 'tg' => [ 'class' => TgConverter::class, ], 'tly' => [ 'class' => TlyConverter::class, ], 'uz' => [ 'class' => UzConverter::class, ], 'wuu' => [ 'class' => WuuConverter::class, ], 'zgh' => [ 'class' => ZghConverter::class, ], 'zh' => [ 'class' => ZhConverter::class, ], ]; private const DEFAULT_CONVERTER = [ 'class' => TrivialLanguageConverter::class, 'services' => [ 'TitleFormatter', ] ]; private const EN_CONVERTER = [ 'class' => EnConverter::class, ]; /** * @internal For use by ServiceWiring */ public const CONSTRUCTOR_OPTIONS = [ MainConfigNames::UsePigLatinVariant, MainConfigNames::DisableLangConversion, MainConfigNames::DisableTitleConversion, ]; private ServiceOptions $options; private ObjectFactory $objectFactory; /** * @var callable callback of "() : Language" */ private $defaultLanguage; /** * @param ServiceOptions $options * @param ObjectFactory $objectFactory * @param callable $defaultLanguage callback of "() : Language", should return * default language. Used in getLanguageConverter when $language is null. * * @internal Should be called from MediaWikiServices only. */ public function __construct( ServiceOptions $options, ObjectFactory $objectFactory, callable $defaultLanguage ) { $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->options = $options; $this->objectFactory = $objectFactory; if ( $options->get( MainConfigNames::UsePigLatinVariant ) ) { $this->converterList['en'] = self::EN_CONVERTER; } $this->defaultLanguage = $defaultLanguage; } /** * Returns Converter instance for a given language object * * @param Language|StubUserLang $lang * @return ILanguageConverter */ private function instantiateConverter( $lang ): ILanguageConverter { $code = mb_strtolower( $lang->getCode() ); $spec = $this->converterList[$code] ?? self::DEFAULT_CONVERTER; // ObjectFactory::createObject accepts an array, not just a callable (phan bug) // @phan-suppress-next-line PhanTypeInvalidCallableArrayKey, PhanTypeInvalidCallableArraySize return $this->objectFactory->createObject( $spec, [ 'assertClass' => ILanguageConverter::class, 'extraArgs' => [ $lang ], ] ); } /** * Provide a LanguageConverter for given language * * @param Language|StubUserLang|null $language for which a LanguageConverter should be provided. * If it is null, then the LanguageConverter provided for current content language as returned * by the callback provided to the constructor. * * @return ILanguageConverter */ public function getLanguageConverter( $language = null ): ILanguageConverter { $lang = $language ?? ( $this->defaultLanguage )(); if ( isset( $this->cache[$lang->getCode()] ) ) { return $this->cache[$lang->getCode()]; } // @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive $converter = $this->instantiateConverter( $lang ); $this->cache[$lang->getCode()] = $converter; return $converter; } /** * Whether to disable language variant conversion. * * @return bool */ public function isConversionDisabled() { return $this->options->get( MainConfigNames::DisableLangConversion ); } /** * Whether to disable language variant conversion for links. * * @return bool */ public function isLinkConversionDisabled() { return $this->options->get( MainConfigNames::DisableLangConversion ) || // Note that this configuration option is misnamed. $this->options->get( MainConfigNames::DisableTitleConversion ); } } PK ! �s&c)� )� LanguageConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file * @author Zhengzhu Feng <zhengzhu@gmail.com> * @author fdcn <fdcn64@gmail.com> * @author shinjiman <shinjiman@gmail.com> * @author PhiLiP <philip.npc@gmail.com> */ namespace MediaWiki\Language; use InvalidArgumentException; use MediaWiki\Context\RequestContext; use MediaWiki\Debug\DeprecationHelper; use MediaWiki\HookContainer\HookRunner; use MediaWiki\Html\Html; use MediaWiki\Linker\LinkTarget; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MainConfigNames; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageIdentity; use MediaWiki\Parser\Parser; use MediaWiki\Parser\Sanitizer; use MediaWiki\Revision\RevisionRecord; use MediaWiki\Revision\SlotRecord; use MediaWiki\StubObject\StubUserLang; use MediaWiki\Title\Title; use MediaWiki\User\User; use RuntimeException; use StringUtils; use UnexpectedValueException; use Wikimedia\ObjectCache\BagOStuff; /** * Base class for multi-variant language conversion. * * @ingroup Language */ abstract class LanguageConverter implements ILanguageConverter { use DeprecationHelper; /** * languages supporting variants * @since 1.20 * @var string[] */ public static $languagesWithVariants = [ 'ban', 'en', 'crh', 'gan', 'iu', 'ku', 'mni', 'sh', 'shi', 'sr', 'tg', 'tly', 'uz', 'wuu', 'zgh', 'zh', ]; /** * static default variant of languages supporting variants * for use with DefaultOptionsLookup.php * @since 1.40 * @var array<string,string> */ public static $languagesWithStaticDefaultVariant = [ 'ban' => 'ban', 'en' => 'en', 'crh' => 'crh', 'gan' => 'gan', 'iu' => 'iu', 'ku' => 'ku', 'mni' => 'mni', 'sh' => 'sh-latn', 'shi' => 'shi', 'sr' => 'sr', 'tg' => 'tg', 'tly' => 'tly', 'uz' => 'uz', 'wuu' => 'wuu', 'zgh' => 'zgh', 'zh' => 'zh', ]; /** @var bool */ private $mTablesLoaded = false; /** @var ReplacementArray[] */ protected $mTables = []; /** @var Language|StubUserLang */ private $mLangObj; /** @var string|false */ private $mConvRuleTitle = false; /** @var string|null */ private $mURLVariant; /** @var string|null */ private $mUserVariant; /** @var string|null */ private $mHeaderVariant; /** @var int */ private $mMaxDepth = 10; /** @var string|null */ private $mVarSeparatorPattern; private const CACHE_VERSION_KEY = 'VERSION 7'; /** * @param Language|StubUserLang $langobj */ public function __construct( $langobj ) { $this->mLangObj = $langobj; } /** * Get the language code with converter (the "main" language code). * Page language code would be the same of the language code with converter. * Note that this code might not be included as one of the variant languages. * @since 1.36 * * @return string */ abstract public function getMainCode(): string; /** * Get static default variant. * For use of specify the default variant form when it different from the * default "unconverted/mixed-variant form". * @since 1.40 * * @return string */ protected function getStaticDefaultVariant(): string { $code = $this->getMainCode(); return self::$languagesWithStaticDefaultVariant[$code] ?? $code; } /** * Get supported variants of the language. * @since 1.36 * * @return array */ abstract protected function getLanguageVariants(): array; /** * Get language variants fallbacks. * @since 1.36 * * @return array */ abstract public function getVariantsFallbacks(): array; /** * Get the strings that map to the flags. * @since 1.36 * * @return array */ final public function getFlags(): array { $defaultflags = [ // 'S' show the converted text // '+' add rules for alltext // 'E' the flags have an error // these flags above are reserved for program 'A' => 'A', // add rule for convert code (all text converted) 'T' => 'T', // title convert 'R' => 'R', // raw content 'D' => 'D', // convert description (subclass implement) '-' => '-', // remove convert (not implement) 'H' => 'H', // add rule for convert code (but no display in placed code) 'N' => 'N', // current variant name ]; $flags = array_merge( $defaultflags, $this->getAdditionalFlags() ); foreach ( $this->getVariants() as $v ) { $flags[$v] = $v; } return $flags; } /** * Provides additional flags for converter. By default, it returns empty array and * typically should be overridden by implementation of converter. * * @return array */ protected function getAdditionalFlags(): array { return []; } /** * Get manual level limit for supported variants. * @since 1.36 * * @return array */ final public function getManualLevel() { $manualLevel = $this->getAdditionalManualLevel(); $result = []; foreach ( $this->getVariants() as $v ) { if ( array_key_exists( $v, $manualLevel ) ) { $result[$v] = $manualLevel[$v]; } else { $result[$v] = 'bidirectional'; } } return $result; } /** * Provides additional flags for converter. By default, this function returns an empty array and * typically should be overridden by the implementation of converter. * @since 1.36 * * @return array */ protected function getAdditionalManualLevel(): array { return []; } /** * Get desc code separator. By default returns ":", can be overridden by * implementation of converter. * @since 1.36 * * @return string */ public function getDescCodeSeparator(): string { return ':'; } /** * Get desc var separator. By default returns ";", can be overridden by * implementation of converter. * @since 1.36 * * @return string */ public function getDescVarSeparator(): string { return ';'; } /** * Get variant names. * * @return array */ public function getVariantNames(): array { return MediaWikiServices::getInstance() ->getLanguageNameUtils() ->getLanguageNames(); } final public function getVariants() { $disabledVariants = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DisabledVariants ); return array_diff( $this->getLanguageVariants(), $disabledVariants ); } public function getVariantFallbacks( $variant ) { return $this->getVariantsFallbacks()[$variant] ?? $this->getStaticDefaultVariant(); } public function getConvRuleTitle() { return $this->mConvRuleTitle; } public function getPreferredVariant() { $req = $this->getURLVariant(); $services = MediaWikiServices::getInstance(); ( new HookRunner( $services->getHookContainer() ) )->onGetLangPreferredVariant( $req ); if ( !$req ) { $user = RequestContext::getMain()->getUser(); // NOTE: For some calls there may not be a context user or session that is safe // to use, see (T235360) // Use case: During user autocreation, UserNameUtils::isUsable is called which uses interface // messages for reserved usernames. if ( $user->isSafeToLoad() && $user->isRegistered() ) { $req = $this->getUserVariant( $user ); } else { $req = $this->getHeaderVariant(); } } $defaultLanguageVariant = $services->getMainConfig() ->get( MainConfigNames::DefaultLanguageVariant ); if ( !$req && $defaultLanguageVariant ) { $req = $this->validateVariant( $defaultLanguageVariant ); } $req = $this->validateVariant( $req ); // This function, unlike the other get*Variant functions, is // not memoized (i.e., there return value is not cached) since // new information might appear during processing after this // is first called. return $req ?? $this->getStaticDefaultVariant(); } public function getDefaultVariant() { $defaultLanguageVariant = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::DefaultLanguageVariant ); $req = $this->getURLVariant() ?? $this->getHeaderVariant(); if ( !$req && $defaultLanguageVariant ) { $req = $this->validateVariant( $defaultLanguageVariant ); } return $req ?? $this->getStaticDefaultVariant(); } public function validateVariant( $variant = null ) { if ( $variant === null ) { return null; } // Our internal variants are always lower-case; the variant we // are validating may have mixed cases. $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) ); if ( in_array( $variant, $this->getVariants() ) ) { return $variant; } // Browsers are supposed to use BCP 47 standard in the // Accept-Language header, but not all of our internal // mediawiki variant codes are BCP 47. Map BCP 47 code // to our internal code. foreach ( $this->getVariants() as $v ) { // Case-insensitive match (BCP 47 is mixed-case) if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) { return $v; } } return null; } public function getURLVariant() { if ( $this->mURLVariant ) { return $this->mURLVariant; } $request = RequestContext::getMain()->getRequest(); // see if the preference is set in the request $ret = $request->getText( 'variant' ); if ( !$ret ) { $ret = $request->getVal( 'uselang' ); } $this->mURLVariant = $this->validateVariant( $ret ); return $this->mURLVariant; } /** * Determine if the user has a variant set. * * @param User $user * @return string|null Variant if one found, null otherwise */ protected function getUserVariant( User $user ) { // This should only be called within the class after the user is known to be // safe to load and logged in, but check just in case. if ( !$user->isSafeToLoad() ) { return null; } if ( !$this->mUserVariant ) { $services = MediaWikiServices::getInstance(); if ( $user->isRegistered() ) { // Get language variant preference from logged in users if ( $this->getMainCode() === $services->getContentLanguage()->getCode() ) { $optionName = 'variant'; } else { $optionName = 'variant-' . $this->getMainCode(); } } else { // figure out user lang without constructing wgLang to avoid // infinite recursion $optionName = 'language'; } $ret = $services->getUserOptionsLookup()->getOption( $user, $optionName ); $this->mUserVariant = $this->validateVariant( $ret ); } return $this->mUserVariant; } /** * Determine the language variant from the Accept-Language header. * * @return string|null Variant if one found, null otherwise */ protected function getHeaderVariant() { if ( $this->mHeaderVariant ) { return $this->mHeaderVariant; } $request = RequestContext::getMain()->getRequest(); // See if some supported language variant is set in the // HTTP header. $languages = array_keys( $request->getAcceptLang() ); if ( !$languages ) { return null; } $fallbackLanguages = []; foreach ( $languages as $language ) { $this->mHeaderVariant = $this->validateVariant( $language ); if ( $this->mHeaderVariant ) { break; } // To see if there are fallbacks of current language. // We record these fallback variants, and process // them later. $fallbacks = $this->getVariantFallbacks( $language ); if ( is_string( $fallbacks ) && $fallbacks !== $this->getStaticDefaultVariant() ) { $fallbackLanguages[] = $fallbacks; } elseif ( is_array( $fallbacks ) ) { $fallbackLanguages = array_merge( $fallbackLanguages, $fallbacks ); } } if ( !$this->mHeaderVariant ) { // process fallback languages now $fallback_languages = array_unique( $fallbackLanguages ); foreach ( $fallback_languages as $language ) { $this->mHeaderVariant = $this->validateVariant( $language ); if ( $this->mHeaderVariant ) { break; } } } return $this->mHeaderVariant; } public function autoConvert( $text, $toVariant = false ) { $this->loadTables(); if ( !$toVariant ) { $toVariant = $this->getPreferredVariant(); if ( !$toVariant ) { return $text; } } if ( $this->guessVariant( $text, $toVariant ) ) { return $text; } /** * We convert everything except: * 1. HTML markups (anything between < and >) * 2. HTML entities * 3. placeholders created by the parser * IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). * Minimize the use of backtracking where possible. */ static $reg; if ( $reg === null ) { $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; // Optimize for the common case where these tags have // few or no children. Thus try and possessively get as much as // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between <code> tags $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of <script> tags $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; // disable conversion of <pre> tags $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|'; // The "|.*+)" at the end, is in case we missed some part of html syntax, // we will fail securely (hopefully) by matching the rest of the string. $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|'; $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag . '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; } $startPos = 0; $sourceBlob = ''; $literalBlob = ''; // Guard against delimiter nulls in the input // (should never happen: see T159174) $text = str_replace( "\000", '', $text ); $text = str_replace( "\004", '', $text ); $markupMatches = null; $elementMatches = null; // We add a marker (\004) at the end of text, to ensure we always match the // entire text (Otherwise, pcre.backtrack_limit might cause silent failure) $textWithMarker = $text . "\004"; while ( $startPos < strlen( $text ) ) { if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { $elementPos = $markupMatches[0][1]; $element = $markupMatches[0][0]; if ( $element === "\004" ) { // We hit the end. $elementPos = strlen( $text ); $element = ''; } elseif ( substr( $element, -1 ) === "\004" ) { // This can sometimes happen if we have // unclosed html tags. For example, // when converting a title attribute // during a recursive call that contains // a < e.g. <div title="<">. $element = substr( $element, 0, -1 ); } } else { // If we hit here, then Language Converter could be tricked // into doing an XSS, so we refuse to translate. // If expected input manages to reach this code path, // we should consider it a bug. $log = LoggerFactory::getInstance( 'languageconverter' ); $log->error( "Hit pcre.backtrack_limit in " . __METHOD__ . ". Disabling language conversion for this page.", [ "method" => __METHOD__, "variant" => $toVariant, "startOfText" => substr( $text, 0, 500 ) ] ); return $text; } // Queue the part before the markup for translation in a batch $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; // Advance to the next position $startPos = $elementPos + strlen( $element ); // Translate any alt or title attributes inside the matched element if ( $element !== '' && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches ) ) { // FIXME, this decodes entities, so if you have something // like <div title="foo<bar"> the bar won't get // translated since after entity decoding it looks like // unclosed html and we call this method recursively // on attributes. $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); // Ensure self-closing tags stay self-closing. $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : ''; $changed = false; foreach ( [ 'title', 'alt' ] as $attrName ) { if ( !isset( $attrs[$attrName] ) ) { continue; } $attr = $attrs[$attrName]; // Don't convert URLs if ( !str_contains( $attr, '://' ) ) { $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); } if ( $attr !== $attrs[$attrName] ) { $attrs[$attrName] = $attr; $changed = true; } } if ( $changed ) { // @phan-suppress-next-line SecurityCheck-DoubleEscaped Explained above with decodeTagAttributes $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . $close . $elementMatches[3]; } } $literalBlob .= $element . "\000"; } // Do the main translation batch $translatedBlob = $this->translate( $sourceBlob, $toVariant ); // Put the output back together $translatedIter = StringUtils::explode( "\000", $translatedBlob ); $literalIter = StringUtils::explode( "\000", $literalBlob ); $output = ''; while ( $translatedIter->valid() && $literalIter->valid() ) { $output .= $translatedIter->current(); $output .= $literalIter->current(); $translatedIter->next(); $literalIter->next(); } return $output; } public function translate( $text, $variant ) { // If $text is empty or only includes spaces, do nothing // Otherwise translate it if ( trim( $text ) ) { $this->loadTables(); $text = $this->mTables[$variant]->replace( $text ); } return $text; } /** * @param string $text Text to convert * @param string $variant Variant language code * @return string Translated text */ protected function translateWithoutRomanNumbers( $text, $variant ) { $breaks = '[^\w\x80-\xff]'; // regexp for roman numbers // Lookahead assertion ensures $roman doesn't match the empty string $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks . $roman . '$|' . $breaks . $roman . $breaks . '/'; $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); $m = array_shift( $matches ); $this->loadTables(); if ( !isset( $this->mTables[$variant] ) ) { throw new RuntimeException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) ); } $ret = $this->mTables[$variant]->replace( $m[0] ); $mstart = (int)$m[1] + strlen( $m[0] ); foreach ( $matches as $m ) { $ret .= substr( $text, $mstart, (int)$m[1] - $mstart ); $ret .= $this->translate( $m[0], $variant ); $mstart = (int)$m[1] + strlen( $m[0] ); } return $ret; } public function autoConvertToAllVariants( $text ) { $this->loadTables(); $ret = []; foreach ( $this->getVariants() as $variant ) { $ret[$variant] = $this->translate( $text, $variant ); } return $ret; } /** * Apply manual conversion rules. * * @param ConverterRule $convRule */ protected function applyManualConv( ConverterRule $convRule ) { // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom // title conversion. // T26072: $mConvRuleTitle was overwritten by other manual // rule(s) not for title, this breaks the title conversion. $newConvRuleTitle = $convRule->getTitle(); if ( $newConvRuleTitle !== false ) { // So I add an empty check for getTitle() $this->mConvRuleTitle = $newConvRuleTitle; } // merge/remove manual conversion rules to/from global table $convTable = $convRule->getConvTable(); $action = $convRule->getRulesAction(); foreach ( $convTable as $variant => $pair ) { $v = $this->validateVariant( $variant ); if ( !$v ) { continue; } if ( $action == 'add' ) { // More efficient than array_merge(), about 2.5 times. foreach ( $pair as $from => $to ) { $this->mTables[$v]->setPair( $from, $to ); } } elseif ( $action == 'remove' ) { $this->mTables[$v]->removeArray( $pair ); } } } public function convertSplitTitle( $title ) { $variant = $this->getPreferredVariant(); $index = $title->getNamespace(); $nsText = $this->convertNamespace( $index, $variant ); $name = str_replace( '_', ' ', $title->getDBKey() ); $mainText = $this->translate( $name, $variant ); return [ $nsText, ':', $mainText ]; } public function convertTitle( $title ) { [ $nsText, $nsSeparator, $mainText ] = $this->convertSplitTitle( $title ); return $nsText !== '' ? $nsText . $nsSeparator . $mainText : $mainText; } public function convertNamespace( $index, $variant = null ) { if ( $index === NS_MAIN ) { return ''; } $variant ??= $this->getPreferredVariant(); $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant ); return $cache->getWithSetCallback( $key, BagOStuff::TTL_MINUTE, function () use ( $index, $variant ) { return $this->computeNsVariantText( $index, $variant ); } ); } /** * @param int $index * @param string|null $variant * @return string */ private function computeNsVariantText( int $index, ?string $variant ): string { $nsVariantText = false; // First check if a message gives a converted name in the target variant. $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); if ( $nsConvMsg->exists() ) { $nsVariantText = $nsConvMsg->plain(); } // Then check if a message gives a converted name in content language // which needs extra translation to the target variant. if ( $nsVariantText === false ) { $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); if ( $nsConvMsg->exists() ) { $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant ); } } if ( $nsVariantText === false ) { // No message exists, retrieve it from the target variant's namespace names. $mLangObj = MediaWikiServices::getInstance() ->getLanguageFactory() ->getLanguage( $variant ); $nsVariantText = $mLangObj->getFormattedNsText( $index ); } return $nsVariantText; } public function convert( $text ) { $variant = $this->getPreferredVariant(); return $this->convertTo( $text, $variant ); } public function convertTo( $text, $variant, bool $clearState = true ) { $languageConverterFactory = MediaWikiServices::getInstance()->getLanguageConverterFactory(); if ( $languageConverterFactory->isConversionDisabled() ) { return $text; } // Reset converter state for a new converter run. if ( $clearState ) { $this->mConvRuleTitle = false; } return $this->recursiveConvertTopLevel( $text, $variant ); } /** * Recursively convert text on the outside. Allow to use nested * markups to custom rules. * * @param string $text Text to be converted * @param string $variant The target variant code * @param int $depth Depth of recursion * @return string Converted text */ protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { $startPos = 0; $out = ''; $length = strlen( $text ); $shouldConvert = !$this->guessVariant( $text, $variant ); $continue = true; $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)'; $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)'; // phpcs:ignore Generic.Files.LineLength $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)'; while ( $startPos < $length && $continue ) { $continue = preg_match( // Only match "-{" outside the html. "/$noScript|$noStyle|$noHtml|-\{/", $text, $m, PREG_OFFSET_CAPTURE, $startPos ); if ( !$continue ) { // No more markup, append final segment $fragment = substr( $text, $startPos ); $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; return $out; } // Offset of the match of the regex pattern. $pos = $m[0][1]; // Append initial segment $fragment = substr( $text, $startPos, $pos - $startPos ); $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; // -{ marker found, not in attribute // Advance position up to -{ marker. $startPos = $pos; // Do recursive conversion // Note: This passes $startPos by reference, and advances it. $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); } return $out; } /** * Recursively convert text on the inside. * * @param string $text Text to be converted * @param string $variant The target variant code * @param int &$startPos * @param int $depth Depth of recursion * @return string Converted text */ protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { // Quick check (no function calls) if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { throw new InvalidArgumentException( __METHOD__ . ': invalid input string' ); } $startPos += 2; $inner = ''; $warningDone = false; $length = strlen( $text ); while ( $startPos < $length ) { $m = false; preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); if ( !$m ) { // Unclosed rule break; } $token = $m[0][0]; $pos = $m[0][1]; // Markup found // Append initial segment $inner .= substr( $text, $startPos, $pos - $startPos ); // Advance position $startPos = $pos; switch ( $token ) { case '-{': // Check max depth if ( $depth >= $this->mMaxDepth ) { $inner .= '-{'; if ( !$warningDone ) { $inner .= '<span class="error">' . wfMessage( 'language-converter-depth-warning' ) ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . '</span>'; $warningDone = true; } $startPos += 2; break; } // Recursively parse another rule $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); break; case '}-': // Apply the rule $startPos += 2; $rule = new ConverterRule( $inner, $this ); $rule->parse( $variant ); $this->applyManualConv( $rule ); return $rule->getDisplay(); default: throw new UnexpectedValueException( __METHOD__ . ': invalid regex match' ); } } // Unclosed rule if ( $startPos < $length ) { $inner .= substr( $text, $startPos ); } $startPos = $length; return '-{' . $this->autoConvert( $inner, $variant ); } public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { # If the article has already existed, there is no need to # check it again. Otherwise it may cause a fault. if ( $nt instanceof LinkTarget ) { $nt = Title::castFromLinkTarget( $nt ); if ( $nt->exists() ) { return; } } if ( $nt instanceof PageIdentity && $nt->exists() ) { return; } $request = RequestContext::getMain()->getRequest(); $isredir = $request->getText( 'redirect', 'yes' ); $action = $request->getText( 'action' ); if ( $action == 'edit' && $request->getBool( 'redlink' ) ) { $action = 'view'; } $linkconvert = $request->getText( 'linkconvert', 'yes' ); $disableLinkConversion = MediaWikiServices::getInstance()->getLanguageConverterFactory() ->isLinkConversionDisabled(); $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); $linkBatch = $linkBatchFactory->newLinkBatch(); $ns = NS_MAIN; if ( $disableLinkConversion || ( !$ignoreOtherCond && ( $isredir == 'no' || $action == 'edit' || $action == 'submit' || $linkconvert == 'no' ) ) ) { return; } if ( is_object( $nt ) ) { $ns = $nt->getNamespace(); } $variants = $this->autoConvertToAllVariants( $link ); if ( !$variants ) { // give up return; } $titles = []; foreach ( $variants as $v ) { if ( $v != $link ) { $varnt = Title::newFromText( $v, $ns ); if ( $varnt !== null ) { $linkBatch->addObj( $varnt ); $titles[] = $varnt; } } } // fetch all variants in single query $linkBatch->execute(); foreach ( $titles as $varnt ) { if ( $varnt->getArticleID() > 0 ) { $nt = $varnt; $link = $varnt->getText(); break; } } } public function getExtraHashOptions() { $variant = $this->getPreferredVariant(); return '!' . $variant; } public function guessVariant( $text, $variant ) { return false; } /** * Load default conversion tables. * * @return array */ abstract protected function loadDefaultTables(): array; /** * Load conversion tables either from the cache or the disk. * @private * @param bool $fromCache Whether to load from cache. Defaults to true. */ protected function loadTables( $fromCache = true ) { $services = MediaWikiServices::getInstance(); $languageConverterCacheType = $services ->getMainConfig()->get( MainConfigNames::LanguageConverterCacheType ); if ( $this->mTablesLoaded ) { return; } $cache = $services->getObjectCacheFactory()->getInstance( $languageConverterCacheType ); $cacheKey = $cache->makeKey( 'conversiontables', $this->getMainCode(), md5( implode( ',', $this->getVariants() ) ), self::CACHE_VERSION_KEY ); if ( !$fromCache ) { $cache->delete( $cacheKey ); } $this->mTables = $cache->getWithSetCallback( $cacheKey, $cache::TTL_HOUR * 12, function () { // We will first load the default tables // then update them using things in MediaWiki:Conversiontable/* $tables = $this->loadDefaultTables(); foreach ( $this->getVariants() as $var ) { $cached = $this->parseCachedTable( $var ); $tables[$var]->mergeArray( $cached ); } $this->postLoadTables( $tables ); return $tables; } ); $this->mTablesLoaded = true; } /** * Hook for post-processing after conversion tables are loaded. * * @param ReplacementArray[] &$tables */ protected function postLoadTables( &$tables ) { } /** * Reload the conversion tables. * * Also used by test suites which need to reset the converter state. * * Called by ParserTestRunner with the help of TestingAccessWrapper */ private function reloadTables() { if ( $this->mTables ) { $this->mTables = []; } $this->mTablesLoaded = false; $this->loadTables( false ); } /** * Parse the conversion table stored in the cache. * * The tables should be in blocks of the following form: * -{ * word => word ; * word => word ; * ... * }- * * To make the tables more manageable, subpages are allowed * and will be parsed recursively if $recursive == true. * * @param string $code Language code * @param string $subpage Subpage name * @param bool $recursive Parse subpages recursively? Defaults to true. * * @return array */ private function parseCachedTable( $code, $subpage = '', $recursive = true ) { static $parsed = []; $key = 'Conversiontable/' . $code; if ( $subpage ) { $key .= '/' . $subpage; } if ( array_key_exists( $key, $parsed ) ) { return []; } $parsed[$key] = true; if ( $subpage === '' ) { $messageCache = MediaWikiServices::getInstance()->getMessageCache(); $txt = $messageCache->getMsgFromNamespace( $key, $code ); } else { $txt = false; $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); if ( $title && $title->exists() ) { $revision = MediaWikiServices::getInstance() ->getRevisionLookup() ->getRevisionByTitle( $title ); if ( $revision ) { $model = $revision->getSlot( SlotRecord::MAIN, RevisionRecord::RAW )->getModel(); if ( $model == CONTENT_MODEL_WIKITEXT ) { // @phan-suppress-next-line PhanUndeclaredMethod $txt = $revision->getContent( SlotRecord::MAIN, RevisionRecord::RAW )->getText(); } // @todo in the future, use a specialized content model, perhaps based on json! } } } # Nothing to parse if there's no text if ( $txt === false || $txt === null || $txt === '' ) { return []; } // get all subpage links of the form // [[MediaWiki:Conversiontable/zh-xx/...|...]] $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . ':Conversiontable'; $subs = StringUtils::explode( '[[', $txt ); $sublinks = []; foreach ( $subs as $sub ) { $link = explode( ']]', $sub, 2 ); if ( count( $link ) != 2 ) { continue; } $b = explode( '|', $link[0], 2 ); $b = explode( '/', trim( $b[0] ), 3 ); if ( count( $b ) == 3 ) { $sublink = $b[2]; } else { $sublink = ''; } if ( $b[0] == $linkhead && $b[1] == $code ) { $sublinks[] = $sublink; } } // parse the mappings in this page $blocks = StringUtils::explode( '-{', $txt ); $ret = []; $first = true; foreach ( $blocks as $block ) { if ( $first ) { // Skip the part before the first -{ $first = false; continue; } $mappings = explode( '}-', $block, 2 )[0]; $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings ); $table = StringUtils::explode( ';', $stripped ); foreach ( $table as $t ) { $m = explode( '=>', $t, 3 ); if ( count( $m ) != 2 ) { continue; } // trim any trailing comments starting with '//' $tt = explode( '//', $m[1], 2 ); $ret[trim( $m[0] )] = trim( $tt[0] ); } } // recursively parse the subpages if ( $recursive ) { foreach ( $sublinks as $link ) { $s = $this->parseCachedTable( $code, $link, $recursive ); $ret = $s + $ret; } } return $ret; } public function markNoConversion( $text, $noParse = false ) { # don't mark if already marked if ( str_contains( $text, '-{' ) || str_contains( $text, '}-' ) ) { return $text; } return "-{R|$text}-"; } public function convertCategoryKey( $key ) { return $key; } public function updateConversionTable( LinkTarget $linkTarget ) { if ( $linkTarget->getNamespace() === NS_MEDIAWIKI ) { $t = explode( '/', $linkTarget->getDBkey(), 3 ); $c = count( $t ); if ( $c > 1 && $t[0] == 'Conversiontable' && $this->validateVariant( $t[1] ) ) { $this->reloadTables(); } } } /** * Get the cached separator pattern for ConverterRule::parseRules() * @return string */ public function getVarSeparatorPattern() { if ( $this->mVarSeparatorPattern === null ) { // varsep_pattern for preg_split: // The text should be split by ";" only if a valid variant // name exists after the markup. // For example // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ // <span style="font-size:120%;">yyy</span>;}- // we should split it as: // [ // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' // [2] => '' // ] $expandedVariants = []; foreach ( $this->getVariants() as $variant ) { $expandedVariants[ $variant ] = 1; // Accept standard BCP 47 names for variants as well. $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1; } // Accept old deprecated names for variants foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) { if ( isset( $expandedVariants[ $new ] ) ) { $expandedVariants[ $old ] = 1; } } $expandedVariants = implode( '|', array_keys( $expandedVariants ) ); $pat = '/;\s*(?='; // zh-hans:xxx;zh-hant:yyy $pat .= '(?:' . $expandedVariants . ')\s*:'; // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz $pat .= '|[^;]*?=>\s*(?:' . $expandedVariants . ')\s*:'; $pat .= '|\s*$)/'; $this->mVarSeparatorPattern = $pat; } return $this->mVarSeparatorPattern; } public function hasVariants() { return count( $this->getVariants() ) > 1; } public function hasVariant( $variant ) { return $variant && ( $variant === $this->validateVariant( $variant ) ); } public function convertHtml( $text ) { // @phan-suppress-next-line SecurityCheck-DoubleEscaped convert() is documented to return html return htmlspecialchars( $this->convert( $text ) ); } } /** @deprecated class alias since 1.43 */ class_alias( LanguageConverter::class, 'LanguageConverter' ); PK ! ��yI�� �� LocalisationCache.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use CLDRPluralRuleParser\Error as CLDRPluralRuleError; use CLDRPluralRuleParser\Evaluator; use MediaWiki\Config\ConfigException; use MediaWiki\Config\ServiceOptions; use MediaWiki\HookContainer\HookContainer; use MediaWiki\HookContainer\HookRunner; use MediaWiki\Json\FormatJson; use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\MainConfigNames; use Psr\Log\LoggerInterface; /** * Caching for the contents of localisation files. * * Including for i18n JSON files under `/languages/messages`, `Messages*.php`, * and `*.i18n.php`. * * An instance of this class is available using MediaWikiServices. * * The values retrieved from here are merged, containing items from extension * files, core messages files and the language fallback sequence (e.g. zh-cn -> * zh-hans -> en ). Some common errors are corrected, for example namespace * names with spaces instead of underscores, but heavyweight processing, such * as grammatical transformation, is done by the caller. * * @ingroup Language */ class LocalisationCache { public const VERSION = 5; /** @var ServiceOptions */ private $options; /** * True if re-caching should only be done on an explicit call to recache(). * Setting this reduces the overhead of cache freshness checking, which * requires doing a stat() for every extension i18n file. * * @var bool */ private $manualRecache; /** * The cache data. 2/3-d array, where the first key is the language code, * the second key is the item key e.g. 'messages', and the optional third key is * an item specific subkey index. Some items are not arrays, and so for those * items, there are no subkeys. * * @var array<string,array> */ protected $data = []; /** * The source language of cached data items. Only supports messages for now. * * @var array<string,array<string,array<string,string>>> */ protected $sourceLanguage = []; /** @var LCStore */ private $store; /** @var LoggerInterface */ private $logger; /** @var HookRunner */ private $hookRunner; /** @var callable[] See comment for parameter in constructor */ private $clearStoreCallbacks; /** @var LanguageNameUtils */ private $langNameUtils; /** * A 2-d associative array, code/key, where presence indicates that the item * is loaded. Value arbitrary. * * For split items, if set, this indicates that all the subitems have been * loaded. * * @var array<string,array<string,true>> */ private $loadedItems = []; /** * A 3-d associative array, code/key/subkey, where presence indicates that * the subitem is loaded. Only used for the split items, i.e. ,messages. * * @var array<string,array<string,array<string,true>>> */ private $loadedSubitems = []; /** * An array where the presence of a key indicates that that language has been * initialised. Initialisation includes checking for cache expiry and doing * any necessary updates. * * @var array<string,true> */ private $initialisedLangs = []; /** * An array mapping non-existent pseudo-languages to fallback languages. This * is filled by initShallowFallback() when data is requested from a language * that lacks a Messages*.php file. * * @var array<string,string> */ private $shallowFallbacks = []; /** * An array where the keys are codes that have been re-cached by this instance. * * @var array<string,true> */ private $recachedLangs = []; /** * An array indicating whether core data for a language has been loaded. * If the entry for a language code $code is true, * then {@link self::$data} is guaranteed to contain an array for $code, * with at least an entry (possibly null) for each of the {@link self::CORE_ONLY_KEYS}, * and all the core-only keys will be marked as loaded in {@link self::$loadedItems} too. * Additionally, there will be a 'deps' entry for $code with the dependencies tracked so far. * * @var array<string,bool> */ private $coreDataLoaded = []; /** * All item keys */ public const ALL_KEYS = [ 'fallback', 'namespaceNames', 'bookstoreList', 'magicWords', 'messages', 'rtl', 'digitTransformTable', 'separatorTransformTable', 'minimumGroupingDigits', 'fallback8bitEncoding', 'linkPrefixExtension', 'linkTrail', 'linkPrefixCharset', 'namespaceAliases', 'dateFormats', 'datePreferences', 'datePreferenceMigrationMap', 'defaultDateFormat', 'specialPageAliases', 'imageFiles', 'preloadedMessages', 'namespaceGenderAliases', 'digitGroupingPattern', 'pluralRules', 'pluralRuleTypes', 'compiledPluralRules', 'formalityIndex', ]; /** * Keys for items that can only be set in the core message files, * not in extensions. Assignments to these keys in extension messages files * are silently ignored. * * @since 1.41 */ private const CORE_ONLY_KEYS = [ 'fallback', 'rtl', 'digitTransformTable', 'separatorTransformTable', 'minimumGroupingDigits', 'fallback8bitEncoding', 'linkPrefixExtension', 'linkTrail', 'linkPrefixCharset', 'datePreferences', 'datePreferenceMigrationMap', 'defaultDateFormat', 'digitGroupingPattern', 'formalityIndex', ]; /** * ALL_KEYS - CORE_ONLY_KEYS. All of these can technically be set * both in core and in extension messages files, * though this is not necessarily useful for all these keys. * Some of these keys are mergeable too. * * @since 1.41 */ private const ALL_EXCEPT_CORE_ONLY_KEYS = [ 'namespaceNames', 'bookstoreList', 'magicWords', 'messages', 'namespaceAliases', 'dateFormats', 'specialPageAliases', 'imageFiles', 'preloadedMessages', 'namespaceGenderAliases', 'pluralRules', 'pluralRuleTypes', 'compiledPluralRules', ]; /** Keys for items which can be localized. */ public const ALL_ALIAS_KEYS = [ 'specialPageAliases' ]; /** * Keys for items which consist of associative arrays, which may be merged * by a fallback sequence. */ private const MERGEABLE_MAP_KEYS = [ 'messages', 'namespaceNames', 'namespaceAliases', 'dateFormats', 'imageFiles', 'preloadedMessages' ]; /** * Keys for items which contain an array of arrays of equivalent aliases * for each subitem. The aliases may be merged by a fallback sequence. */ private const MERGEABLE_ALIAS_LIST_KEYS = [ 'specialPageAliases' ]; /** * Keys for items which contain an associative array, and may be merged if * the primary value contains the special array key "inherit". That array * key is removed after the first merge. */ private const OPTIONAL_MERGE_KEYS = [ 'bookstoreList' ]; /** * Keys for items that are formatted like $magicWords */ private const MAGIC_WORD_KEYS = [ 'magicWords' ]; /** * Keys for items where the subitems are stored in the backend separately. */ private const SPLIT_KEYS = [ 'messages' ]; /** * Keys for items that will be prefixed with its source language code, * which should be stripped out when loading from cache. */ private const SOURCE_PREFIX_KEYS = [ 'messages' ]; /** * Separator for the source language prefix. */ private const SOURCEPREFIX_SEPARATOR = ':'; /** * Keys which are loaded automatically by initLanguage() */ private const PRELOADED_KEYS = [ 'dateFormats', 'namespaceNames' ]; private const PLURAL_FILES = [ // Load CLDR plural rules MW_INSTALL_PATH . '/languages/data/plurals.xml', // Override or extend with MW-specific rules MW_INSTALL_PATH . '/languages/data/plurals-mediawiki.xml', ]; /** * Associative array of cached plural rules. The key is the language code, * the value is an array of plural rules for that language. * * @var array<string,array<int,string>>|null */ private static $pluralRules = null; /** * Associative array of cached plural rule types. The key is the language * code, the value is an array of plural rule types for that language. For * example, $pluralRuleTypes['ar'] = ['zero', 'one', 'two', 'few', 'many']. * The index for each rule type matches the index for the rule in * $pluralRules, thus allowing correlation between the two. The reason we * don't just use the type names as the keys in $pluralRules is because * Language::convertPlural applies the rules based on numeric order (or * explicit numeric parameter), not based on the name of the rule type. For * example, {{plural:count|wordform1|wordform2|wordform3}}, rather than * {{plural:count|one=wordform1|two=wordform2|many=wordform3}}. * * @var array<string,array<int,string>>|null */ private static $pluralRuleTypes = null; /** * Return a suitable LCStore as specified by the given configuration. * * @since 1.34 * @param array $conf In the format of $wgLocalisationCacheConf * @param string|false|null $fallbackCacheDir In case 'storeDirectory' isn't specified * @return LCStore */ public static function getStoreFromConf( array $conf, $fallbackCacheDir ): LCStore { $storeArg = []; $storeArg['directory'] = $conf['storeDirectory'] ?: $fallbackCacheDir; if ( !empty( $conf['storeClass'] ) ) { $storeClass = $conf['storeClass']; } elseif ( $conf['store'] === 'files' || $conf['store'] === 'file' || ( $conf['store'] === 'detect' && $storeArg['directory'] ) ) { $storeClass = LCStoreCDB::class; } elseif ( $conf['store'] === 'db' || $conf['store'] === 'detect' ) { $storeClass = LCStoreDB::class; $storeArg['server'] = $conf['storeServer'] ?? []; } elseif ( $conf['store'] === 'array' ) { $storeClass = LCStoreStaticArray::class; } else { throw new ConfigException( 'Please set $wgLocalisationCacheConf[\'store\'] to something sensible.' ); } return new $storeClass( $storeArg ); } /** * @internal For use by ServiceWiring */ public const CONSTRUCTOR_OPTIONS = [ // True to treat all files as expired until they are regenerated by this object. 'forceRecache', 'manualRecache', MainConfigNames::ExtensionMessagesFiles, MainConfigNames::MessagesDirs, MainConfigNames::TranslationAliasesDirs, ]; /** * For constructor parameters, @ref \MediaWiki\MainConfigSchema::LocalisationCacheConf. * * @internal Do not construct directly, use MediaWikiServices instead. * @param ServiceOptions $options * @param LCStore $store What backend to use for storage * @param LoggerInterface $logger * @param callable[] $clearStoreCallbacks To be called whenever the cache is cleared. Can be * used to clear other caches that depend on this one, such as ResourceLoader's * MessageBlobStore. * @param LanguageNameUtils $langNameUtils * @param HookContainer $hookContainer */ public function __construct( ServiceOptions $options, LCStore $store, LoggerInterface $logger, array $clearStoreCallbacks, LanguageNameUtils $langNameUtils, HookContainer $hookContainer ) { $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->options = $options; $this->store = $store; $this->logger = $logger; $this->clearStoreCallbacks = $clearStoreCallbacks; $this->langNameUtils = $langNameUtils; $this->hookRunner = new HookRunner( $hookContainer ); // Keep this separate from $this->options so that it can be mutable $this->manualRecache = $options->get( 'manualRecache' ); } /** * Returns true if the given key is mergeable, that is, if it is an associative * array which can be merged through a fallback sequence. * @param string $key * @return bool */ private static function isMergeableKey( string $key ): bool { static $mergeableKeys; $mergeableKeys ??= array_fill_keys( [ ...self::MERGEABLE_MAP_KEYS, ...self::MERGEABLE_ALIAS_LIST_KEYS, ...self::OPTIONAL_MERGE_KEYS, ...self::MAGIC_WORD_KEYS, ], true ); return isset( $mergeableKeys[$key] ); } /** * Get a cache item. * * Warning: this may be slow for split items (messages), since it will * need to fetch all the subitems from the cache individually. * @param string $code * @param string $key * @return mixed */ public function getItem( $code, $key ) { if ( !isset( $this->loadedItems[$code][$key] ) ) { $this->loadItem( $code, $key ); } if ( $key === 'fallback' && isset( $this->shallowFallbacks[$code] ) ) { return $this->shallowFallbacks[$code]; } // @phan-suppress-next-line PhanTypeArraySuspiciousNullable return $this->data[$code][$key]; } /** * Get a subitem, for instance a single message for a given language. * @param string $code * @param string $key * @param string $subkey * @return mixed|null */ public function getSubitem( $code, $key, $subkey ) { if ( !isset( $this->loadedSubitems[$code][$key][$subkey] ) && !isset( $this->loadedItems[$code][$key] ) ) { $this->loadSubitem( $code, $key, $subkey ); } return $this->data[$code][$key][$subkey] ?? null; } /** * Get a subitem with its source language. Only supports messages for now. * * @since 1.41 * @param string $code * @param string $key * @param string $subkey * @return string[]|null Return [ subitem, sourceLanguage ] if the subitem is defined. */ public function getSubitemWithSource( $code, $key, $subkey ) { $subitem = $this->getSubitem( $code, $key, $subkey ); // Undefined in the backend. if ( $subitem === null ) { return null; } // The source language should have been set, but to avoid a Phan error and to be double sure. return [ $subitem, $this->sourceLanguage[$code][$key][$subkey] ?? $code ]; } /** * Get the list of subitem keys for a given item. * * This is faster than array_keys($lc->getItem(...)) for the items listed in * self::SPLIT_KEYS. * * Will return null if the item is not found, or false if the item is not an * array. * * @param string $code * @param string $key * @return bool|null|string|string[] */ public function getSubitemList( $code, $key ) { if ( in_array( $key, self::SPLIT_KEYS ) ) { return $this->getSubitem( $code, 'list', $key ); } else { $item = $this->getItem( $code, $key ); if ( is_array( $item ) ) { return array_keys( $item ); } else { return false; } } } /** * Load an item into the cache. * * @param string $code * @param string $key */ private function loadItem( $code, $key ) { if ( isset( $this->loadedItems[$code][$key] ) ) { return; } if ( in_array( $key, self::CORE_ONLY_KEYS, true ) || // "synthetic" keys added by loadCoreData based on "fallback" $key === 'fallbackSequence' || $key === 'originalFallbackSequence' ) { if ( $this->langNameUtils->isValidBuiltInCode( $code ) ) { $this->loadCoreData( $code ); return; } } if ( !isset( $this->initialisedLangs[$code] ) ) { $this->initLanguage( $code ); // Check to see if initLanguage() loaded it for us if ( isset( $this->loadedItems[$code][$key] ) ) { return; } } if ( isset( $this->shallowFallbacks[$code] ) ) { $this->loadItem( $this->shallowFallbacks[$code], $key ); return; } if ( in_array( $key, self::SPLIT_KEYS ) ) { $subkeyList = $this->getSubitem( $code, 'list', $key ); foreach ( $subkeyList as $subkey ) { if ( isset( $this->data[$code][$key][$subkey] ) ) { continue; } $this->loadSubitem( $code, $key, $subkey ); } } else { $this->data[$code][$key] = $this->store->get( $code, $key ); } $this->loadedItems[$code][$key] = true; } /** * Load a subitem into the cache. * * @param string $code * @param string $key * @param string $subkey */ private function loadSubitem( $code, $key, $subkey ) { if ( !in_array( $key, self::SPLIT_KEYS ) ) { $this->loadItem( $code, $key ); return; } if ( !isset( $this->initialisedLangs[$code] ) ) { $this->initLanguage( $code ); } // Check to see if initLanguage() loaded it for us if ( isset( $this->loadedItems[$code][$key] ) || isset( $this->loadedSubitems[$code][$key][$subkey] ) ) { return; } if ( isset( $this->shallowFallbacks[$code] ) ) { $this->loadSubitem( $this->shallowFallbacks[$code], $key, $subkey ); return; } $value = $this->store->get( $code, "$key:$subkey" ); if ( $value !== null && in_array( $key, self::SOURCE_PREFIX_KEYS ) ) { [ $this->sourceLanguage[$code][$key][$subkey], $this->data[$code][$key][$subkey] ] = explode( self::SOURCEPREFIX_SEPARATOR, $value, 2 ); } else { $this->data[$code][$key][$subkey] = $value; } $this->loadedSubitems[$code][$key][$subkey] = true; } /** * Returns true if the cache identified by $code is missing or expired. * * @param string $code * * @return bool */ public function isExpired( $code ) { if ( $this->options->get( 'forceRecache' ) && !isset( $this->recachedLangs[$code] ) ) { $this->logger->debug( __METHOD__ . "($code): forced reload" ); return true; } $deps = $this->store->get( $code, 'deps' ); $keys = $this->store->get( $code, 'list' ); $preload = $this->store->get( $code, 'preload' ); // Different keys may expire separately for some stores if ( $deps === null || $keys === null || $preload === null ) { $this->logger->debug( __METHOD__ . "($code): cache missing, need to make one" ); return true; } foreach ( $deps as $dep ) { // Because we're unserializing stuff from cache, we // could receive objects of classes that don't exist // anymore (e.g., uninstalled extensions) // When this happens, always expire the cache if ( !$dep instanceof CacheDependency || $dep->isExpired() ) { $this->logger->debug( __METHOD__ . "($code): cache for $code expired due to " . get_class( $dep ) ); return true; } } return false; } /** * Initialise a language in this object. Rebuild the cache if necessary. * * @param string $code */ private function initLanguage( $code ) { if ( isset( $this->initialisedLangs[$code] ) ) { return; } $this->initialisedLangs[$code] = true; # If the code is of the wrong form for a Messages*.php file, do a shallow fallback if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) { $this->initShallowFallback( $code, 'en' ); return; } # Re-cache the data if necessary if ( !$this->manualRecache && $this->isExpired( $code ) ) { if ( $this->langNameUtils->isSupportedLanguage( $code ) ) { $this->recache( $code ); } elseif ( $code === 'en' ) { throw new RuntimeException( 'MessagesEn.php is missing.' ); } else { $this->initShallowFallback( $code, 'en' ); } return; } # Preload some stuff $preload = $this->getItem( $code, 'preload' ); if ( $preload === null ) { if ( $this->manualRecache ) { // No Messages*.php file. Do shallow fallback to en. if ( $code === 'en' ) { throw new RuntimeException( 'No localisation cache found for English. ' . 'Please run maintenance/rebuildLocalisationCache.php.' ); } $this->initShallowFallback( $code, 'en' ); return; } else { throw new RuntimeException( 'Invalid or missing localisation cache.' ); } } foreach ( self::SOURCE_PREFIX_KEYS as $key ) { if ( !isset( $preload[$key] ) ) { continue; } foreach ( $preload[$key] as $subkey => $value ) { if ( $value !== null ) { [ $this->sourceLanguage[$code][$key][$subkey], $preload[$key][$subkey] ] = explode( self::SOURCEPREFIX_SEPARATOR, $value, 2 ); } else { $preload[$key][$subkey] = null; } } } if ( isset( $this->data[$code] ) ) { foreach ( $preload as $key => $value ) { // @phan-suppress-next-line PhanTypeArraySuspiciousNullable -- see isset() above $this->mergeItem( $key, $this->data[$code][$key], $value ); } } else { $this->data[$code] = $preload; } foreach ( $preload as $key => $item ) { if ( in_array( $key, self::SPLIT_KEYS ) ) { foreach ( $item as $subkey => $subitem ) { $this->loadedSubitems[$code][$key][$subkey] = true; } } else { $this->loadedItems[$code][$key] = true; } } } /** * Create a fallback from one language to another, without creating a * complete persistent cache. * * @param string $primaryCode * @param string $fallbackCode */ private function initShallowFallback( $primaryCode, $fallbackCode ) { $this->data[$primaryCode] =& $this->data[$fallbackCode]; $this->loadedItems[$primaryCode] =& $this->loadedItems[$fallbackCode]; $this->loadedSubitems[$primaryCode] =& $this->loadedSubitems[$fallbackCode]; $this->shallowFallbacks[$primaryCode] = $fallbackCode; $this->coreDataLoaded[$primaryCode] =& $this->coreDataLoaded[$fallbackCode]; } /** * Read a PHP file containing localisation data. * * @param string $_fileName * @param string $_fileType * @return array */ protected function readPHPFile( $_fileName, $_fileType ) { include $_fileName; $data = []; if ( $_fileType == 'core' ) { foreach ( self::ALL_KEYS as $key ) { // Not all keys are set in language files, so // check they exist first if ( isset( $$key ) ) { $data[$key] = $$key; } } } elseif ( $_fileType == 'extension' ) { foreach ( self::ALL_EXCEPT_CORE_ONLY_KEYS as $key ) { if ( isset( $$key ) ) { $data[$key] = $$key; } } } elseif ( $_fileType == 'aliases' ) { // @phan-suppress-next-line PhanImpossibleCondition May be set in the included file if ( isset( $aliases ) ) { $data['aliases'] = $aliases; } } else { throw new InvalidArgumentException( __METHOD__ . ": Invalid file type: $_fileType" ); } return $data; } /** * Read a JSON file containing localisation messages. * * @param string $fileName Name of file to read * @return array Array with a 'messages' key, or empty array if the file doesn't exist */ private function readJSONFile( $fileName ) { if ( !is_readable( $fileName ) ) { return []; } $json = file_get_contents( $fileName ); if ( $json === false ) { return []; } $data = FormatJson::decode( $json, true ); if ( $data === null ) { throw new RuntimeException( __METHOD__ . ": Invalid JSON file: $fileName" ); } // Remove keys starting with '@'; they are reserved for metadata and non-message data foreach ( $data as $key => $unused ) { if ( $key === '' || $key[0] === '@' ) { unset( $data[$key] ); } } return $data; } /** * Get the compiled plural rules for a given language from the XML files. * * @since 1.20 * @param string $code * @return array<int,string>|null */ private function getCompiledPluralRules( $code ) { $rules = $this->getPluralRules( $code ); if ( $rules === null ) { return null; } try { $compiledRules = Evaluator::compile( $rules ); } catch ( CLDRPluralRuleError $e ) { $this->logger->debug( $e->getMessage() ); return []; } return $compiledRules; } /** * Get the plural rules for a given language from the XML files. * * Cached. * * @since 1.20 * @param string $code * @return array<int,string>|null */ private function getPluralRules( $code ) { if ( self::$pluralRules === null ) { self::loadPluralFiles(); } return self::$pluralRules[$code] ?? null; } /** * Get the plural rule types for a given language from the XML files. * * Cached. * * @since 1.22 * @param string $code * @return array<int,string>|null */ private function getPluralRuleTypes( $code ) { if ( self::$pluralRuleTypes === null ) { self::loadPluralFiles(); } return self::$pluralRuleTypes[$code] ?? null; } /** * Load the plural XML files. */ private static function loadPluralFiles() { foreach ( self::PLURAL_FILES as $fileName ) { self::loadPluralFile( $fileName ); } } /** * Load a plural XML file with the given filename, compile the relevant * rules, and save the compiled rules in a process-local cache. * * @param string $fileName */ private static function loadPluralFile( $fileName ) { // Use file_get_contents instead of DOMDocument::load (T58439) $xml = file_get_contents( $fileName ); if ( !$xml ) { throw new RuntimeException( "Unable to read plurals file $fileName" ); } $doc = new DOMDocument; $doc->loadXML( $xml ); $rulesets = $doc->getElementsByTagName( "pluralRules" ); foreach ( $rulesets as $ruleset ) { $codes = $ruleset->getAttribute( 'locales' ); $rules = []; $ruleTypes = []; $ruleElements = $ruleset->getElementsByTagName( "pluralRule" ); foreach ( $ruleElements as $elt ) { $ruleType = $elt->getAttribute( 'count' ); if ( $ruleType === 'other' ) { // Don't record "other" rules, which have an empty condition continue; } $rules[] = $elt->nodeValue; $ruleTypes[] = $ruleType; } foreach ( explode( ' ', $codes ) as $code ) { self::$pluralRules[$code] = $rules; self::$pluralRuleTypes[$code] = $ruleTypes; } } } /** * Read the data from the source files for a given language, and register * the relevant dependencies in the $deps array. * * @param string $code * @param array &$deps * @return array */ private function readSourceFilesAndRegisterDeps( $code, &$deps ) { // This reads in the PHP i18n file with non-messages l10n data $fileName = $this->langNameUtils->getMessagesFileName( $code ); if ( !is_file( $fileName ) ) { $data = []; } else { $deps[] = new FileDependency( $fileName ); $data = $this->readPHPFile( $fileName, 'core' ); } return $data; } /** * Read and compile the plural data for a given language, * and register the relevant dependencies in the $deps array. * * @param string $code * @param array &$deps * @return array */ private function readPluralFilesAndRegisterDeps( $code, &$deps ) { $data = [ // Load CLDR plural rules for JavaScript 'pluralRules' => $this->getPluralRules( $code ), // And for PHP 'compiledPluralRules' => $this->getCompiledPluralRules( $code ), // Load plural rule types 'pluralRuleTypes' => $this->getPluralRuleTypes( $code ), ]; foreach ( self::PLURAL_FILES as $fileName ) { $deps[] = new FileDependency( $fileName ); } return $data; } /** * Merge two localisation values, a primary and a fallback, overwriting the * primary value in place. * * @param string $key * @param mixed &$value * @param mixed $fallbackValue */ private function mergeItem( $key, &$value, $fallbackValue ) { if ( $value !== null ) { if ( $fallbackValue !== null ) { if ( in_array( $key, self::MERGEABLE_MAP_KEYS ) ) { $value += $fallbackValue; } elseif ( in_array( $key, self::MERGEABLE_ALIAS_LIST_KEYS ) ) { $value = array_merge_recursive( $value, $fallbackValue ); } elseif ( in_array( $key, self::OPTIONAL_MERGE_KEYS ) ) { if ( !empty( $value['inherit'] ) ) { $value = array_merge( $fallbackValue, $value ); } unset( $value['inherit'] ); } elseif ( in_array( $key, self::MAGIC_WORD_KEYS ) ) { $this->mergeMagicWords( $value, $fallbackValue ); } } } else { $value = $fallbackValue; } } /** * @param array &$value * @param array $fallbackValue */ private function mergeMagicWords( array &$value, array $fallbackValue ): void { foreach ( $fallbackValue as $magicName => $fallbackInfo ) { if ( !isset( $value[$magicName] ) ) { $value[$magicName] = $fallbackInfo; } else { $value[$magicName] = [ $fallbackInfo[0], ...array_unique( [ // First value is 1 if the magic word is case-sensitive, 0 if not ...array_slice( $value[$magicName], 1 ), ...array_slice( $fallbackInfo, 1 ), ] ) ]; } } } /** * Gets the combined list of messages dirs from * core and extensions * * @since 1.25 * @return array */ public function getMessagesDirs() { global $IP; return [ 'core' => "$IP/languages/i18n", 'codex' => "$IP/languages/i18n/codex", 'exif' => "$IP/languages/i18n/exif", 'preferences' => "$IP/languages/i18n/preferences", 'api' => "$IP/includes/api/i18n", 'rest' => "$IP/includes/Rest/i18n", 'oojs-ui' => "$IP/resources/lib/ooui/i18n", 'paramvalidator' => "$IP/includes/libs/ParamValidator/i18n", ] + $this->options->get( MainConfigNames::MessagesDirs ); } /** * Load the core localisation data for a given language code, * without extensions, using only the process cache. * See {@link self::$coreDataLoaded} for what this guarantees. * * In addition to the core-only keys, * {@link self::$data} may contain additional entries for $code, * but those must not be used outside of {@link self::recache()} * (and accordingly, they are not marked as loaded yet). */ private function loadCoreData( string $code ) { if ( !$code ) { throw new InvalidArgumentException( "Invalid language code requested" ); } if ( $this->coreDataLoaded[$code] ?? false ) { return; } $coreData = array_fill_keys( self::CORE_ONLY_KEYS, null ); $deps = []; # Load the primary localisation from the source file $data = $this->readSourceFilesAndRegisterDeps( $code, $deps ); $this->logger->debug( __METHOD__ . ": got localisation for $code from source" ); # Merge primary localisation foreach ( $data as $key => $value ) { $this->mergeItem( $key, $coreData[ $key ], $value ); } # Fill in the fallback if it's not there already // @phan-suppress-next-line PhanSuspiciousValueComparison if ( ( $coreData['fallback'] === null || $coreData['fallback'] === false ) && $code === 'en' ) { $coreData['fallback'] = false; $coreData['originalFallbackSequence'] = $coreData['fallbackSequence'] = []; } else { if ( $coreData['fallback'] !== null ) { $coreData['fallbackSequence'] = array_map( 'trim', explode( ',', $coreData['fallback'] ) ); } else { $coreData['fallbackSequence'] = []; } $len = count( $coreData['fallbackSequence'] ); # Before we add the 'en' fallback for messages, keep a copy of # the original fallback sequence $coreData['originalFallbackSequence'] = $coreData['fallbackSequence']; # Ensure that the sequence ends at 'en' for messages if ( !$len || $coreData['fallbackSequence'][$len - 1] !== 'en' ) { $coreData['fallbackSequence'][] = 'en'; } } foreach ( $coreData['fallbackSequence'] as $fbCode ) { // load core fallback data $fbData = $this->readSourceFilesAndRegisterDeps( $fbCode, $deps ); foreach ( self::CORE_ONLY_KEYS as $key ) { // core-only keys are not mergeable, only set if not present in core data yet if ( isset( $fbData[$key] ) && !isset( $coreData[$key] ) ) { $coreData[$key] = $fbData[$key]; } } } $coreData['deps'] = $deps; foreach ( $coreData as $key => $item ) { $this->data[$code][$key] ??= null; // @phan-suppress-next-line PhanTypeArraySuspiciousNullable -- we just set a default null $this->mergeItem( $key, $this->data[$code][$key], $item ); if ( in_array( $key, self::CORE_ONLY_KEYS, true ) || // "synthetic" keys based on "fallback" (see above) $key === 'fallbackSequence' || $key === 'originalFallbackSequence' ) { // only mark core-only keys as loaded; // we may have loaded additional ones from the source file, // but they are not fully loaded yet, since recache() // may have to merge in additional values from fallback languages $this->loadedItems[$code][$key] = true; } } $this->coreDataLoaded[$code] = true; } /** * Load localisation data for a given language for both core and extensions * and save it to the persistent cache store and the process cache. * * @param string $code */ public function recache( $code ) { if ( !$code ) { throw new InvalidArgumentException( "Invalid language code requested" ); } $this->recachedLangs[ $code ] = true; # Initial values $initialData = array_fill_keys( self::ALL_KEYS, null ); $this->data[$code] = []; $this->loadedItems[$code] = []; $this->loadedSubitems[$code] = []; $this->coreDataLoaded[$code] = false; $this->loadCoreData( $code ); $coreData = $this->data[$code]; // @phan-suppress-next-line PhanTypeArraySuspiciousNullable -- guaranteed by loadCoreData() $deps = $coreData['deps']; $coreData += $this->readPluralFilesAndRegisterDeps( $code, $deps ); $codeSequence = array_merge( [ $code ], $coreData['fallbackSequence'] ); $messageDirs = $this->getMessagesDirs(); $translationAliasesDirs = $this->options->get( MainConfigNames::TranslationAliasesDirs ); # Load non-JSON localisation data for extensions $extensionData = array_fill_keys( $codeSequence, $initialData ); foreach ( $this->options->get( MainConfigNames::ExtensionMessagesFiles ) as $extension => $fileName ) { if ( isset( $messageDirs[$extension] ) || isset( $translationAliasesDirs[$extension] ) ) { # This extension has JSON message data; skip the PHP shim continue; } $data = $this->readPHPFile( $fileName, 'extension' ); $used = false; foreach ( $data as $key => $item ) { foreach ( $codeSequence as $csCode ) { if ( isset( $item[$csCode] ) ) { // Keep the behaviour the same as for json messages. // TODO: Consider deprecating using a PHP file for messages. if ( in_array( $key, self::SOURCE_PREFIX_KEYS ) ) { foreach ( $item[$csCode] as $subkey => $_ ) { $this->sourceLanguage[$code][$key][$subkey] ??= $csCode; } } $this->mergeItem( $key, $extensionData[$csCode][$key], $item[$csCode] ); $used = true; } } } if ( $used ) { $deps[] = new FileDependency( $fileName ); } } # Load the localisation data for each fallback, then merge it into the full array $allData = $initialData; foreach ( $codeSequence as $csCode ) { $csData = $initialData; # Load core messages and the extension localisations. foreach ( $messageDirs as $dirs ) { foreach ( (array)$dirs as $dir ) { $fileName = "$dir/$csCode.json"; $messages = $this->readJSONFile( $fileName ); foreach ( $messages as $subkey => $_ ) { $this->sourceLanguage[$code]['messages'][$subkey] ??= $csCode; } $this->mergeItem( 'messages', $csData['messages'], $messages ); $deps[] = new FileDependency( $fileName ); } } foreach ( $translationAliasesDirs as $dirs ) { foreach ( (array)$dirs as $dir ) { $fileName = "$dir/$csCode.json"; $data = $this->readJSONFile( $fileName ); foreach ( $data as $key => $item ) { // We allow the key in the JSON to be specified in PascalCase similar to key definitions in // extension.json, but eventually they are stored in camelCase $normalizedKey = lcfirst( $key ); if ( $normalizedKey === '@metadata' ) { // Don't store @metadata information in extension data. continue; } if ( !in_array( $normalizedKey, self::ALL_ALIAS_KEYS ) ) { throw new UnexpectedValueException( "Invalid key: \"$key\" for " . MainConfigNames::TranslationAliasesDirs . ". " . 'Valid keys: ' . implode( ', ', self::ALL_ALIAS_KEYS ) ); } $this->mergeItem( $normalizedKey, $extensionData[$csCode][$normalizedKey], $item ); } $deps[] = new FileDependency( $fileName ); } } # Merge non-JSON extension data if ( isset( $extensionData[$csCode] ) ) { foreach ( $extensionData[$csCode] as $key => $item ) { $this->mergeItem( $key, $csData[$key], $item ); } } if ( $csCode === $code ) { # Merge core data into extension data foreach ( $coreData as $key => $item ) { $this->mergeItem( $key, $csData[$key], $item ); } } else { # Load the secondary localisation from the source file to # avoid infinite cycles on cyclic fallbacks $fbData = $this->readSourceFilesAndRegisterDeps( $csCode, $deps ); $fbData += $this->readPluralFilesAndRegisterDeps( $csCode, $deps ); # Only merge the keys that make sense to merge foreach ( self::ALL_KEYS as $key ) { if ( !isset( $fbData[ $key ] ) ) { continue; } if ( !isset( $coreData[ $key ] ) || self::isMergeableKey( $key ) ) { $this->mergeItem( $key, $csData[ $key ], $fbData[ $key ] ); } } } # Allow extensions an opportunity to adjust the data for this fallback $this->hookRunner->onLocalisationCacheRecacheFallback( $this, $csCode, $csData ); # Merge the data for this fallback into the final array if ( $csCode === $code ) { $allData = $csData; } else { foreach ( self::ALL_KEYS as $key ) { if ( !isset( $csData[$key] ) ) { continue; } // @phan-suppress-next-line PhanTypeArraySuspiciousNullable if ( $allData[$key] === null || self::isMergeableKey( $key ) ) { $this->mergeItem( $key, $allData[$key], $csData[$key] ); } } } } if ( !isset( $allData['rtl'] ) ) { throw new RuntimeException( __METHOD__ . ': Localisation data failed validation check! ' . 'Check that your languages/messages/MessagesEn.php file is intact.' ); } // Add cache dependencies for any referenced configs // We use the keys prefixed with 'wg' for historical reasons. $deps['wgExtensionMessagesFiles'] = new MainConfigDependency( MainConfigNames::ExtensionMessagesFiles ); $deps['wgMessagesDirs'] = new MainConfigDependency( MainConfigNames::MessagesDirs ); $deps['version'] = new ConstantDependency( self::class . '::VERSION' ); # Add dependencies to the cache entry $allData['deps'] = $deps; # Replace spaces with underscores in namespace names $allData['namespaceNames'] = str_replace( ' ', '_', $allData['namespaceNames'] ); # And do the same for special page aliases. $page is an array. foreach ( $allData['specialPageAliases'] as &$page ) { $page = str_replace( ' ', '_', $page ); } # Decouple the reference to prevent accidental damage unset( $page ); # If there were no plural rules, return an empty array $allData['pluralRules'] ??= []; $allData['compiledPluralRules'] ??= []; # If there were no plural rule types, return an empty array $allData['pluralRuleTypes'] ??= []; # Set the list keys $allData['list'] = []; foreach ( self::SPLIT_KEYS as $key ) { $allData['list'][$key] = array_keys( $allData[$key] ); } # Run hooks $unused = true; // Used to be $purgeBlobs, removed in 1.34 $this->hookRunner->onLocalisationCacheRecache( $this, $code, $allData, $unused ); # Save to the process cache and register the items loaded $this->data[$code] = $allData; $this->loadedItems[$code] = []; $this->loadedSubitems[$code] = []; foreach ( $allData as $key => $item ) { $this->loadedItems[$code][$key] = true; } # Prefix each item with its source language code before save foreach ( self::SOURCE_PREFIX_KEYS as $key ) { // @phan-suppress-next-line PhanTypeArraySuspiciousNullable foreach ( $allData[$key] as $subKey => $value ) { // The source language should have been set, but to avoid Phan error and be double sure. $allData[$key][$subKey] = ( $this->sourceLanguage[$code][$key][$subKey] ?? $code ) . self::SOURCEPREFIX_SEPARATOR . $value; } } # Set the preload key $allData['preload'] = $this->buildPreload( $allData ); # Save to the persistent cache $this->store->startWrite( $code ); foreach ( $allData as $key => $value ) { if ( in_array( $key, self::SPLIT_KEYS ) ) { foreach ( $value as $subkey => $subvalue ) { $this->store->set( "$key:$subkey", $subvalue ); } } else { $this->store->set( $key, $value ); } } $this->store->finishWrite(); # Clear out the MessageBlobStore # HACK: If using a null (i.e., disabled) storage backend, we # can't write to the MessageBlobStore either if ( !$this->store instanceof LCStoreNull ) { foreach ( $this->clearStoreCallbacks as $callback ) { $callback(); } } } /** * Build the preload item from the given pre-cache data. * * The preload item will be loaded automatically, improving performance * for the commonly requested items it contains. * * @param array $data * @return array */ private function buildPreload( $data ) { $preload = [ 'messages' => [] ]; foreach ( self::PRELOADED_KEYS as $key ) { $preload[$key] = $data[$key]; } foreach ( $data['preloadedMessages'] as $subkey ) { $subitem = $data['messages'][$subkey] ?? null; $preload['messages'][$subkey] = $subitem; } return $preload; } /** * Unload the data for a given language from the object cache. * * Reduces memory usage. * * @param string $code */ public function unload( $code ) { unset( $this->data[$code] ); unset( $this->loadedItems[$code] ); unset( $this->loadedSubitems[$code] ); unset( $this->initialisedLangs[$code] ); unset( $this->shallowFallbacks[$code] ); unset( $this->sourceLanguage[$code] ); unset( $this->coreDataLoaded[$code] ); foreach ( $this->shallowFallbacks as $shallowCode => $fbCode ) { if ( $fbCode === $code ) { $this->unload( $shallowCode ); } } } /** * Unload all data */ public function unloadAll() { foreach ( $this->initialisedLangs as $lang => $unused ) { $this->unload( $lang ); } } /** * Disable the storage backend */ public function disableBackend() { $this->store = new LCStoreNull; $this->manualRecache = false; } } PK ! �X[�# �# LanguageFactory.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ namespace MediaWiki\Languages; use InvalidArgumentException; use LocalisationCache; use LogicException; use MapCacheLRU; use MediaWiki\Config\Config; use MediaWiki\Config\ServiceOptions; use MediaWiki\HookContainer\HookContainer; use MediaWiki\Language\Language; use MediaWiki\Language\LanguageCode; use MediaWiki\Language\LanguageConverter; use MediaWiki\MainConfigNames; use MediaWiki\Title\NamespaceInfo; use Wikimedia\Bcp47Code\Bcp47Code; /** * Internationalisation code * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information. * * @ingroup Language * @since 1.35 */ class LanguageFactory { /** @var ServiceOptions */ private $options; /** @var NamespaceInfo */ private $namespaceInfo; /** @var LocalisationCache */ private $localisationCache; /** @var LanguageNameUtils */ private $langNameUtils; /** @var LanguageFallback */ private $langFallback; /** @var LanguageConverterFactory */ private $langConverterFactory; /** @var HookContainer */ private $hookContainer; /** @var MapCacheLRU */ private $langObjCache; /** @var Config */ private $config; /** @var array */ private $parentLangCache = []; /** * @internal For use by ServiceWiring */ public const CONSTRUCTOR_OPTIONS = [ MainConfigNames::DummyLanguageCodes, ]; /** How many distinct Language objects to retain at most in memory (T40439). */ private const LANG_CACHE_SIZE = 10; /** * @param ServiceOptions $options * @param NamespaceInfo $namespaceInfo * @param LocalisationCache $localisationCache * @param LanguageNameUtils $langNameUtils * @param LanguageFallback $langFallback * @param LanguageConverterFactory $langConverterFactory * @param HookContainer $hookContainer * @param Config $config */ public function __construct( ServiceOptions $options, NamespaceInfo $namespaceInfo, LocalisationCache $localisationCache, LanguageNameUtils $langNameUtils, LanguageFallback $langFallback, LanguageConverterFactory $langConverterFactory, HookContainer $hookContainer, Config $config ) { // We have both ServiceOptions and a Config object because // the Language class hasn't (yet) been updated to use ServiceOptions // and for now gets a full Config $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->options = $options; $this->namespaceInfo = $namespaceInfo; $this->localisationCache = $localisationCache; $this->langNameUtils = $langNameUtils; $this->langFallback = $langFallback; $this->langConverterFactory = $langConverterFactory; $this->hookContainer = $hookContainer; $this->langObjCache = new MapCacheLRU( self::LANG_CACHE_SIZE ); $this->config = $config; } /** * Get a cached or new language object for a given language code * with normalization of the language code. * * If the language code comes from user input, check * LanguageNameUtils::isValidCode() before calling this method. * * The language code is presumed to be a MediaWiki-internal code, * unless you pass a Bcp47Code opaque object, in which case it is * presumed to be a standard BCP-47 code. (There are, regrettably, * some ambiguous codes where this makes a difference.) * * As the Language class itself implements Bcp47Code, this method is an efficient * and safe downcast if you pass in a Language object. * * @param string|Bcp47Code $code * @return Language */ public function getLanguage( $code ): Language { if ( $code instanceof Language ) { return $code; } if ( $code instanceof Bcp47Code ) { // Any compatibility remapping of valid BCP-47 codes would be done // inside ::bcp47ToInternal, not here. $code = LanguageCode::bcp47ToInternal( $code ); } else { // Perform various deprecated and compatibility mappings of // internal codes. $code = $this->options->get( MainConfigNames::DummyLanguageCodes )[$code] ?? $code; } return $this->getRawLanguage( $code ); } public function getLanguageCode( string $code ): LanguageCode { $code = $this->options->get( MainConfigNames::DummyLanguageCodes )[$code] ?? $code; if ( !$this->langNameUtils->isValidCode( $code ) ) { throw new InvalidArgumentException( "Invalid language code \"$code\"" ); } return new LanguageCode( $code ); } /** * Get a cached or new language object for a given language code * without normalization of the language code. * * If the language code comes from user input, check LanguageNameUtils::isValidCode() * before calling this method. * * @param string $code * @return Language * @since 1.39 */ public function getRawLanguage( $code ): Language { return $this->langObjCache->getWithSetCallback( $code, function () use ( $code ) { return $this->newFromCode( $code ); } ); } /** * Create a language object for a given language code. * * @param string $code * @param bool $fallback Whether we're going through the language fallback chain * @return Language */ private function newFromCode( $code, $fallback = false ): Language { if ( !$this->langNameUtils->isValidCode( $code ) ) { throw new InvalidArgumentException( "Invalid language code \"$code\"" ); } $constructorArgs = [ $code, $this->namespaceInfo, $this->localisationCache, $this->langNameUtils, $this->langFallback, $this->langConverterFactory, $this->hookContainer, $this->config ]; if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) { // It's not possible to customise this code with class files, so // just return a Language object. This is to support uselang= hacks. return new Language( ...$constructorArgs ); } // Check if there is a language class for the code $class = $this->classFromCode( $code, $fallback ); // LanguageCode does not inherit Language if ( class_exists( $class ) && is_a( $class, 'Language', true ) ) { return new $class( ...$constructorArgs ); } // Keep trying the fallback list until we find an existing class $fallbacks = $this->langFallback->getAll( $code ); foreach ( $fallbacks as $fallbackCode ) { $class = $this->classFromCode( $fallbackCode ); if ( class_exists( $class ) ) { // TODO allow additional dependencies to be injected for subclasses somehow return new $class( ...$constructorArgs ); } } throw new LogicException( "Invalid fallback sequence for language '$code'" ); } /** * @param string $code * @param bool $fallback Whether we're going through the language fallback chain * @return string Name of the language class */ private function classFromCode( $code, $fallback = true ) { if ( $fallback && $code == 'en' ) { return 'Language'; } else { return 'Language' . str_replace( '-', '_', ucfirst( $code ) ); } } /** * Get the "parent" language which has a converter to convert a "compatible" language * (in another variant) to this language (eg., zh for zh-cn, but not en for en-gb). * * @note This method does not contain the deprecated and compatibility * mappings of Language::getLanguage(string). * * @param string|Bcp47Code $code The language to convert to; can be an * internal MediaWiki language code or a Bcp47Code object (which includes * Language, which implements Bcp47Code). * @return Language|null A base language which has a converter to the given * language, or null if none exists. * @since 1.22 */ public function getParentLanguage( $code ) { if ( $code instanceof Language ) { $code = $code->getCode(); } elseif ( $code instanceof Bcp47Code ) { $code = LanguageCode::bcp47ToInternal( $code ); } // $code is now a mediawiki internal code string. // We deliberately use array_key_exists() instead of isset() because we cache null. if ( !array_key_exists( $code, $this->parentLangCache ) ) { if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) { $this->parentLangCache[$code] = null; return null; } foreach ( LanguageConverter::$languagesWithVariants as $mainCode ) { $lang = $this->getLanguage( $mainCode ); $converter = $this->langConverterFactory->getLanguageConverter( $lang ); if ( $converter->hasVariant( $code ) ) { $this->parentLangCache[$code] = $lang; return $lang; } } $this->parentLangCache[$code] = null; } return $this->parentLangCache[$code]; } } PK ! S�@�\ \ LCStoreNull.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Null store backend, used to avoid DB errors during MediaWiki installation. * * @ingroup Language */ class LCStoreNull implements LCStore { public function get( $code, $key ) { return null; } public function startWrite( $code ) { } public function finishWrite() { } public function set( $key, $value ) { } } PK ! ���_`� `� MessageCache.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use MediaWiki\Config\ServiceOptions; use MediaWiki\Content\Content; use MediaWiki\Context\RequestContext; use MediaWiki\Deferred\DeferredUpdates; use MediaWiki\Deferred\MessageCacheUpdate; use MediaWiki\HookContainer\HookContainer; use MediaWiki\HookContainer\HookRunner; use MediaWiki\Language\ILanguageConverter; use MediaWiki\Language\Language; use MediaWiki\Languages\LanguageConverterFactory; use MediaWiki\Languages\LanguageFactory; use MediaWiki\Languages\LanguageFallback; use MediaWiki\Languages\LanguageNameUtils; use MediaWiki\Linker\LinkTarget; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MainConfigNames; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageReference; use MediaWiki\Page\PageReferenceValue; use MediaWiki\Parser\Parser; use MediaWiki\Parser\ParserFactory; use MediaWiki\Parser\ParserOptions; use MediaWiki\Parser\ParserOutput; use MediaWiki\Revision\SlotRecord; use MediaWiki\StubObject\StubObject; use MediaWiki\StubObject\StubUserLang; use MediaWiki\Title\Title; use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Wikimedia\LightweightObjectStore\ExpirationAwareness; use Wikimedia\ObjectCache\BagOStuff; use Wikimedia\ObjectCache\EmptyBagOStuff; use Wikimedia\ObjectCache\WANObjectCache; use Wikimedia\Rdbms\Database; use Wikimedia\Rdbms\IDBAccessObject; use Wikimedia\Rdbms\IExpression; use Wikimedia\Rdbms\IResultWrapper; use Wikimedia\Rdbms\LikeValue; use Wikimedia\RequestTimeout\TimeoutException; use Wikimedia\ScopedCallback; /** * MediaWiki message cache structure version. * Bump this whenever the message cache format has changed. */ define( 'MSG_CACHE_VERSION', 2 ); /** * Cache messages that are defined by MediaWiki-namespace pages or by hooks. * * @ingroup Language */ class MessageCache implements LoggerAwareInterface { /** * Options to be included in the ServiceOptions */ public const CONSTRUCTOR_OPTIONS = [ MainConfigNames::UseDatabaseMessages, MainConfigNames::MaxMsgCacheEntrySize, MainConfigNames::AdaptiveMessageCache, MainConfigNames::UseXssLanguage, MainConfigNames::RawHtmlMessages, ]; /** * The size of the MapCacheLRU which stores message data. The maximum * number of languages which can be efficiently loaded in a given request. */ public const MAX_REQUEST_LANGUAGES = 10; private const FOR_UPDATE = 1; // force message reload /** How long to wait for memcached locks */ private const WAIT_SEC = 15; /** How long memcached locks last */ private const LOCK_TTL = 30; /** * Lifetime for cache, for keys stored in $wanCache, in seconds. */ private const WAN_TTL = ExpirationAwareness::TTL_DAY; /** @var LoggerInterface */ private $logger; /** * Process cache of loaded messages that are defined in MediaWiki namespace * * @var MapCacheLRU Map of (language code => key => " <MESSAGE>" or "!TOO BIG" or "!ERROR") */ private $cache; /** * Map of (lowercase message key => unused) for all software-defined messages * * @var array */ private $systemMessageNames; /** * @var bool[] Map of (language code => boolean) */ private $cacheVolatile = []; /** * Should mean that database cannot be used, but check * @var bool */ private $disable; /** @var int Maximum entry size in bytes */ private $maxEntrySize; /** @var bool */ private $adaptive; /** @var bool */ private $useXssLanguage; /** @var string[] */ private $rawHtmlMessages; /** * Message cache has its own parser which it uses to transform messages * @var ParserOptions */ private $parserOptions; /** @var ?Parser Lazy-created via self::getParser() */ private $parser = null; /** * @var bool */ private $inParser = false; /** @var WANObjectCache */ private $wanCache; /** @var BagOStuff */ private $clusterCache; /** @var BagOStuff */ private $srvCache; /** @var Language */ private $contLang; /** @var string */ private $contLangCode; /** @var ILanguageConverter */ private $contLangConverter; /** @var LanguageFactory */ private $langFactory; /** @var LocalisationCache */ private $localisationCache; /** @var LanguageNameUtils */ private $languageNameUtils; /** @var LanguageFallback */ private $languageFallback; /** @var HookRunner */ private $hookRunner; /** @var ParserFactory */ private $parserFactory; /** @var (string|callable)[]|null */ private $messageKeyOverrides; /** * Normalize message key input * * @param string $key Input message key to be normalized * @return string Normalized message key */ public static function normalizeKey( $key ) { $lckey = strtr( $key, ' ', '_' ); if ( $lckey === '' ) { // T300792 return $lckey; } if ( ord( $lckey ) < 128 ) { $lckey[0] = strtolower( $lckey[0] ); } else { $lckey = MediaWikiServices::getInstance()->getContentLanguage()->lcfirst( $lckey ); } return $lckey; } /** * @internal For use by ServiceWiring * @param WANObjectCache $wanCache * @param BagOStuff $clusterCache * @param BagOStuff $serverCache * @param Language $contLang Content language of site * @param LanguageConverterFactory $langConverterFactory * @param LoggerInterface $logger * @param ServiceOptions $options * @param LanguageFactory $langFactory * @param LocalisationCache $localisationCache * @param LanguageNameUtils $languageNameUtils * @param LanguageFallback $languageFallback * @param HookContainer $hookContainer * @param ParserFactory $parserFactory */ public function __construct( WANObjectCache $wanCache, BagOStuff $clusterCache, BagOStuff $serverCache, Language $contLang, LanguageConverterFactory $langConverterFactory, LoggerInterface $logger, ServiceOptions $options, LanguageFactory $langFactory, LocalisationCache $localisationCache, LanguageNameUtils $languageNameUtils, LanguageFallback $languageFallback, HookContainer $hookContainer, ParserFactory $parserFactory ) { $this->wanCache = $wanCache; $this->clusterCache = $clusterCache; $this->srvCache = $serverCache; $this->contLang = $contLang; $this->contLangConverter = $langConverterFactory->getLanguageConverter( $contLang ); $this->contLangCode = $contLang->getCode(); $this->logger = $logger; $this->langFactory = $langFactory; $this->localisationCache = $localisationCache; $this->languageNameUtils = $languageNameUtils; $this->languageFallback = $languageFallback; $this->hookRunner = new HookRunner( $hookContainer ); $this->parserFactory = $parserFactory; // limit size $this->cache = new MapCacheLRU( self::MAX_REQUEST_LANGUAGES ); $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->disable = !$options->get( MainConfigNames::UseDatabaseMessages ); $this->maxEntrySize = $options->get( MainConfigNames::MaxMsgCacheEntrySize ); $this->adaptive = $options->get( MainConfigNames::AdaptiveMessageCache ); $this->useXssLanguage = $options->get( MainConfigNames::UseXssLanguage ); $this->rawHtmlMessages = $options->get( MainConfigNames::RawHtmlMessages ); } public function setLogger( LoggerInterface $logger ) { $this->logger = $logger; } /** * ParserOptions is lazily initialised. * * @return ParserOptions */ private function getParserOptions() { if ( !$this->parserOptions ) { $context = RequestContext::getMain(); $user = $context->getUser(); if ( !$user->isSafeToLoad() ) { // It isn't safe to use the context user yet, so don't try to get a // ParserOptions for it. And don't cache this ParserOptions // either. $po = ParserOptions::newFromAnon(); $po->setAllowUnsafeRawHtml( false ); return $po; } $this->parserOptions = ParserOptions::newFromContext( $context ); // Messages may take parameters that could come // from malicious sources. As a precaution, disable // the <html> parser tag when parsing messages. $this->parserOptions->setAllowUnsafeRawHtml( false ); } return $this->parserOptions; } /** * Try to load the cache from APC. * * @param string $code Optional language code, see documentation of load(). * @return array|false The cache array, or false if not in cache. */ private function getLocalCache( $code ) { $cacheKey = $this->srvCache->makeKey( __CLASS__, $code ); return $this->srvCache->get( $cacheKey ); } /** * Save the cache to APC. * * @param string $code * @param array $cache The cache array */ private function saveToLocalCache( $code, $cache ) { $cacheKey = $this->srvCache->makeKey( __CLASS__, $code ); $this->srvCache->set( $cacheKey, $cache ); } /** * Loads messages from caches or from database in this order: * (1) local message cache (if $wgUseLocalMessageCache is enabled) * (2) memcached * (3) from the database. * * When successfully loading from (2) or (3), all higher level caches are * updated for the newest version. * * Nothing is loaded if member variable mDisable is true, either manually * set by calling code or if message loading fails (is this possible?). * * Returns true if cache is already populated, or it was successfully populated, * or false if populating empty cache fails. Also returns true if MessageCache * is disabled. * * @param string $code Which language to load messages for * @param int|null $mode Use MessageCache::FOR_UPDATE to skip process cache [optional] * @return bool */ private function load( string $code, $mode = null ) { // Don't do double loading... if ( $this->isLanguageLoaded( $code ) && $mode !== self::FOR_UPDATE ) { return true; } // Show a log message (once) if loading is disabled if ( $this->disable ) { static $shownDisabled = false; if ( !$shownDisabled ) { $this->logger->debug( __METHOD__ . ': disabled' ); $shownDisabled = true; } return true; } try { return $this->loadUnguarded( $code, $mode ); } catch ( Throwable $e ) { // Don't try to load again during the exception handler $this->disable = true; throw $e; } } /** * Load messages from the cache or database, without exception guarding. * * @param string $code Which language to load messages for * @param int|null $mode Use MessageCache::FOR_UPDATE to skip process cache [optional] * @return bool */ private function loadUnguarded( $code, $mode ) { $success = false; // Keep track of success $staleCache = false; // a cache array with expired data, or false if none has been loaded $where = []; // Debug info, delayed to avoid spamming debug log too much // A hash of the expected content is stored in a WAN cache key, providing a way // to invalid the local cache on every server whenever a message page changes. [ $hash, $hashVolatile ] = $this->getValidationHash( $code ); $this->cacheVolatile[$code] = $hashVolatile; $volatilityOnlyStaleness = false; // Try the local cache and check against the cluster hash key... $cache = $this->getLocalCache( $code ); if ( !$cache ) { $where[] = 'local cache is empty'; } elseif ( !isset( $cache['HASH'] ) || $cache['HASH'] !== $hash ) { $where[] = 'local cache has the wrong hash'; $staleCache = $cache; } elseif ( $this->isCacheExpired( $cache ) ) { $where[] = 'local cache is expired'; $staleCache = $cache; } elseif ( $hashVolatile ) { // Some recent message page changes might not show due to DB lag $where[] = 'local cache validation key is expired/volatile'; $staleCache = $cache; $volatilityOnlyStaleness = true; } else { $where[] = 'got from local cache'; $this->cache->set( $code, $cache ); $success = true; } if ( !$success ) { // Try the cluster cache, using a lock for regeneration... $cacheKey = $this->clusterCache->makeKey( 'messages', $code ); for ( $failedAttempts = 0; $failedAttempts <= 1; $failedAttempts++ ) { if ( $volatilityOnlyStaleness && $staleCache ) { // While the cluster cache *might* be more up-to-date, we do not want // the I/O strain of every application server fetching the key here during // the volatility period. Either this thread wins the lock and regenerates // the cache or the stale local cache value gets reused. $where[] = 'global cache is presumed expired'; } else { $cache = $this->clusterCache->get( $cacheKey ); if ( !$cache ) { $where[] = 'global cache is empty'; } elseif ( $this->isCacheExpired( $cache ) ) { $where[] = 'global cache is expired'; $staleCache = $cache; } elseif ( $hashVolatile ) { // Some recent message page changes might not show due to DB lag $where[] = 'global cache is expired/volatile'; $staleCache = $cache; } else { $where[] = 'got from global cache'; $this->cache->set( $code, $cache ); $this->saveToCaches( $cache, 'local-only', $code ); $success = true; break; } } // We need to call loadFromDB(). Limit the concurrency to one thread. // This prevents the site from going down when the cache expires. // Note that the DB slam protection lock here is non-blocking. $loadStatus = $this->loadFromDBWithMainLock( $code, $where, $mode ); if ( $loadStatus === true ) { $success = true; break; } elseif ( $staleCache ) { // Use the stale cache while some other thread constructs the new one $where[] = 'using stale cache'; $this->cache->set( $code, $staleCache ); $success = true; break; } elseif ( $failedAttempts > 0 ) { $where[] = 'failed to find cache after waiting'; // Already blocked once, so avoid another lock/unlock cycle. // This case will typically be hit if memcached is down, or if // loadFromDB() takes longer than LOCK_WAIT. break; } elseif ( $loadStatus === 'cantacquire' ) { // Wait for the other thread to finish, then retry. Normally, // the memcached get() will then yield the other thread's result. $where[] = 'waiting for other thread to complete'; [ , $ioError ] = $this->getReentrantScopedLock( $code ); if ( $ioError ) { $where[] = 'failed waiting'; // Call loadFromDB() with concurrency limited to one thread per server. // It should be rare for all servers to lack even a stale local cache. $success = $this->loadFromDBWithLocalLock( $code, $where, $mode ); break; } } else { // Disable cache; $loadStatus is 'disabled' break; } } } if ( !$success ) { $where[] = 'loading FAILED - cache is disabled'; $this->disable = true; $this->cache->set( $code, [] ); $this->logger->error( __METHOD__ . ": Failed to load $code" ); // This used to throw an exception, but that led to nasty side effects like // the whole wiki being instantly down if the memcached server died } if ( !$this->isLanguageLoaded( $code ) ) { throw new LogicException( "Process cache for '$code' should be set by now." ); } $info = implode( ', ', $where ); $this->logger->debug( __METHOD__ . ": Loading $code... $info" ); return $success; } /** * @param string $code * @param string[] &$where List of debug comments * @param int|null $mode Use MessageCache::FOR_UPDATE to use DB_PRIMARY * @return true|string One (true, "cantacquire", "disabled") */ private function loadFromDBWithMainLock( $code, array &$where, $mode = null ) { // If cache updates on all levels fail, give up on message overrides. // This is to avoid easy site outages; see $saveSuccess comments below. $statusKey = $this->clusterCache->makeKey( 'messages', $code, 'status' ); $status = $this->clusterCache->get( $statusKey ); if ( $status === 'error' ) { $where[] = "could not load; method is still globally disabled"; return 'disabled'; } // Now let's regenerate $where[] = 'loading from DB'; // Lock the cache to prevent conflicting writes. // This lock is non-blocking so stale cache can quickly be used. // Note that load() will call a blocking getReentrantScopedLock() // after this if it really needs to wait for any current thread. [ $scopedLock ] = $this->getReentrantScopedLock( $code, 0 ); if ( !$scopedLock ) { $where[] = 'could not acquire main lock'; return 'cantacquire'; } $cache = $this->loadFromDB( $code, $mode ); $this->cache->set( $code, $cache ); $saveSuccess = $this->saveToCaches( $cache, 'all', $code ); if ( !$saveSuccess ) { /** * Cache save has failed. * * There are two main scenarios where this could be a problem: * - The cache is more than the maximum size (typically 1MB compressed). * - Memcached has no space remaining in the relevant slab class. This is * unlikely with recent versions of memcached. * * Either way, if there is a local cache, nothing bad will happen. If there * is no local cache, disabling the message cache for all requests avoids * incurring a loadFromDB() overhead on every request, and thus saves the * wiki from complete downtime under moderate traffic conditions. */ if ( $this->srvCache instanceof EmptyBagOStuff ) { $this->clusterCache->set( $statusKey, 'error', 60 * 5 ); $where[] = 'could not save cache, disabled globally for 5 minutes'; } else { $where[] = "could not save global cache"; } } return true; } /** * @param string $code * @param string[] &$where List of debug comments * @param int|null $mode Use MessageCache::FOR_UPDATE to use DB_PRIMARY * @return bool Success */ private function loadFromDBWithLocalLock( $code, array &$where, $mode = null ) { $success = false; $where[] = 'loading from DB using local lock'; $scopedLock = $this->srvCache->getScopedLock( $this->srvCache->makeKey( 'messages', $code ), self::WAIT_SEC, self::LOCK_TTL, __METHOD__ ); if ( $scopedLock ) { $cache = $this->loadFromDB( $code, $mode ); $this->cache->set( $code, $cache ); $this->saveToCaches( $cache, 'local-only', $code ); $success = true; } return $success; } /** * Loads cacheable messages from the database. Messages bigger than * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded * on-demand from the database later. * * @param string $code Language code * @param int|null $mode Use MessageCache::FOR_UPDATE to skip process cache * @return array Loaded messages for storing in caches */ private function loadFromDB( $code, $mode = null ) { $icp = MediaWikiServices::getInstance()->getConnectionProvider(); $dbr = ( $mode === self::FOR_UPDATE ) ? $icp->getPrimaryDatabase() : $icp->getReplicaDatabase(); $cache = []; $mostused = []; // list of "<cased message key>/<code>" if ( $this->adaptive && $code !== $this->contLangCode ) { if ( !$this->cache->has( $this->contLangCode ) ) { $this->load( $this->contLangCode ); } $mostused = array_keys( $this->cache->get( $this->contLangCode ) ); foreach ( $mostused as $key => $value ) { $mostused[$key] = "$value/$code"; } } // Common conditions $conds = [ // Treat redirects as not existing (T376398) 'page_is_redirect' => 0, 'page_namespace' => NS_MEDIAWIKI, ]; if ( count( $mostused ) ) { $conds['page_title'] = $mostused; } elseif ( $code !== $this->contLangCode ) { $conds[] = $dbr->expr( 'page_title', IExpression::LIKE, new LikeValue( $dbr->anyString(), '/', $code ) ); } else { // Effectively disallows use of '/' character in NS_MEDIAWIKI for uses // other than language code. $conds[] = $dbr->expr( 'page_title', IExpression::NOT_LIKE, new LikeValue( $dbr->anyString(), '/', $dbr->anyString() ) ); } // Set the stubs for oversized software-defined messages in the main cache map $res = $dbr->newSelectQueryBuilder() ->select( [ 'page_title', 'page_latest' ] ) ->from( 'page' ) ->where( $conds ) ->andWhere( $dbr->expr( 'page_len', '>', intval( $this->maxEntrySize ) ) ) ->caller( __METHOD__ . "($code)-big" )->fetchResultSet(); foreach ( $res as $row ) { // Include entries/stubs for all keys in $mostused in adaptive mode if ( $this->adaptive || $this->isMainCacheable( $row->page_title ) ) { $cache[$row->page_title] = '!TOO BIG'; } // At least include revision ID so page changes are reflected in the hash $cache['EXCESSIVE'][$row->page_title] = $row->page_latest; } // RevisionStore cannot be injected as it would break the installer since // it instantiates MessageCache before the DB. $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); // Set the text for small software-defined messages in the main cache map $revQuery = $revisionStore->getQueryInfo( [ 'page' ] ); // T231196: MySQL/MariaDB (10.1.37) can sometimes irrationally decide that querying `actor` then // `revision` then `page` is somehow better than starting with `page`. Tell it not to reorder the // query (and also reorder it ourselves because as generated by RevisionStore it'll have // `revision` first rather than `page`). $revQuery['joins']['revision'] = $revQuery['joins']['page']; unset( $revQuery['joins']['page'] ); // It isn't actually necessary to reorder $revQuery['tables'] as Database does the right thing // when join conditions are given for all joins, but Gergő is wary of relying on that so pull // `page` to the start. $revQuery['tables'] = array_merge( [ 'page' ], array_diff( $revQuery['tables'], [ 'page' ] ) ); $res = $dbr->newSelectQueryBuilder() ->queryInfo( $revQuery ) ->where( $conds ) ->andWhere( [ $dbr->expr( 'page_len', '<=', intval( $this->maxEntrySize ) ), 'page_latest = rev_id' // get the latest revision only ] ) ->caller( __METHOD__ . "($code)-small" ) ->straightJoinOption() ->fetchResultSet(); // Don't load content from uncacheable rows (T313004) [ $cacheableRows, $uncacheableRows ] = $this->separateCacheableRows( $res ); $result = $revisionStore->newRevisionsFromBatch( $cacheableRows, [ 'slots' => [ SlotRecord::MAIN ], 'content' => true ] ); $revisions = $result->isOK() ? $result->getValue() : []; foreach ( $cacheableRows as $row ) { try { $rev = $revisions[$row->rev_id] ?? null; $content = $rev ? $rev->getContent( SlotRecord::MAIN ) : null; $text = $this->getMessageTextFromContent( $content ); } catch ( TimeoutException $e ) { throw $e; } catch ( Exception $ex ) { $text = false; } if ( !is_string( $text ) ) { $entry = '!ERROR'; $this->logger->error( __METHOD__ . ": failed to load message page text for {$row->page_title} ($code)" ); } else { $entry = ' ' . $text; } $cache[$row->page_title] = $entry; } foreach ( $uncacheableRows as $row ) { // T193271: The cache object gets too big and slow to generate. // At least include revision ID, so that page changes are reflected in the hash. $cache['EXCESSIVE'][$row->page_title] = $row->page_latest; } $cache['VERSION'] = MSG_CACHE_VERSION; ksort( $cache ); // Hash for validating local cache (APC). No need to take into account // messages larger than $wgMaxMsgCacheEntrySize, since those are only // stored and fetched from memcache. $cache['HASH'] = md5( serialize( $cache ) ); $cache['EXPIRY'] = wfTimestamp( TS_MW, time() + self::WAN_TTL ); unset( $cache['EXCESSIVE'] ); // only needed for hash return $cache; } /** * Whether the language was loaded and its data is still in the process cache. * * @param string $lang * @return bool */ private function isLanguageLoaded( $lang ) { // It is important that this only returns true if the cache was fully // populated by load(), so that callers can assume all cache keys exist. // It is possible for $this->cache to be only partially populated through // methods like MessageCache::replace(), which must not make this method // return true (T208897). And this method must cease to return true // if the language was evicted by MapCacheLRU (T230690). return $this->cache->hasField( $lang, 'VERSION' ); } /** * Can the given DB key be added to the main cache blob? To reduce the * abuse impact of the MediaWiki namespace by {{int:}} and CentralNotice, * this is only true if the page overrides a predefined message. * * @param string $name Message name (possibly with /code suffix) * @param string|null $code The language code. If this is null, message * presence will be bulk loaded for the content language. Otherwise, * presence will be detected by loading the specified message. * @return bool */ private function isMainCacheable( $name, $code = null ) { // Convert the first letter to lowercase, and strip /code suffix $name = $this->contLang->lcfirst( $name ); // Include common conversion table pages. This also avoids problems with // Installer::parse() bailing out due to disallowed DB queries (T207979). if ( strpos( $name, 'conversiontable/' ) === 0 ) { return true; } $msg = preg_replace( '/\/[a-z0-9-]{2,}$/', '', $name ); if ( $code === null ) { // Bulk load if ( $this->systemMessageNames === null ) { $this->systemMessageNames = array_fill_keys( $this->localisationCache->getSubitemList( $this->contLangCode, 'messages' ), true ); } return isset( $this->systemMessageNames[$msg] ); } else { // Use individual subitem return $this->localisationCache->getSubitem( $code, 'messages', $msg ) !== null; } } /** * Separate cacheable from uncacheable rows in a page/revsion query result. * * @param IResultWrapper $res * @return array{0:IResultWrapper|stdClass[],1:stdClass[]} An array with the cacheable * rows in the first element and the uncacheable rows in the second. */ private function separateCacheableRows( $res ) { if ( $this->adaptive ) { // Include entries/stubs for all keys in $mostused in adaptive mode return [ $res, [] ]; } $cacheableRows = []; $uncacheableRows = []; foreach ( $res as $row ) { if ( $this->isMainCacheable( $row->page_title ) ) { $cacheableRows[] = $row; } else { $uncacheableRows[] = $row; } } return [ $cacheableRows, $uncacheableRows ]; } /** * Updates cache as necessary when message page is changed * * @param string $title Message cache key with the initial uppercase letter * @param string|false $text New contents of the page (false if deleted) */ public function replace( $title, $text ) { if ( $this->disable ) { return; } [ $msg, $code ] = $this->figureMessage( $title ); if ( strpos( $title, '/' ) !== false && $code === $this->contLangCode ) { // Content language overrides do not use the /<code> suffix return; } // (a) Update the process cache with the new message text if ( $text === false ) { // Page deleted $this->cache->setField( $code, $title, '!NONEXISTENT' ); } else { // Ignore $wgMaxMsgCacheEntrySize so the process cache is up-to-date $this->cache->setField( $code, $title, ' ' . $text ); } // (b) Update the shared caches in a deferred update with a fresh DB snapshot DeferredUpdates::addUpdate( new MessageCacheUpdate( $code, $title, $msg ), DeferredUpdates::PRESEND ); } /** * @param string $code * @param array[] $replacements List of (title, message key) pairs */ public function refreshAndReplaceInternal( string $code, array $replacements ) { // Allow one caller at a time to avoid race conditions [ $scopedLock ] = $this->getReentrantScopedLock( $code ); if ( !$scopedLock ) { foreach ( $replacements as [ $title ] ) { $this->logger->error( __METHOD__ . ': could not acquire lock to update {title} ({code})', [ 'title' => $title, 'code' => $code ] ); } return; } // Load the existing cache to update it in the local DC cache. // The other DCs will see a hash mismatch. if ( $this->load( $code, self::FOR_UPDATE ) ) { $cache = $this->cache->get( $code ); } else { // Err? Fall back to loading from the database. $cache = $this->loadFromDB( $code, self::FOR_UPDATE ); } // Check if individual cache keys should exist and update cache accordingly $newTextByTitle = []; // map of (title => content) $newBigTitles = []; // map of (title => latest revision ID), like EXCESSIVE in loadFromDB() // Can not inject the WikiPageFactory as it would break the installer since // it instantiates MessageCache before the DB. $wikiPageFactory = MediaWikiServices::getInstance()->getWikiPageFactory(); foreach ( $replacements as [ $title ] ) { $page = $wikiPageFactory->newFromTitle( Title::makeTitle( NS_MEDIAWIKI, $title ) ); $page->loadPageData( IDBAccessObject::READ_LATEST ); $text = $this->getMessageTextFromContent( $page->getContent() ); // Remember the text for the blob store update later on $newTextByTitle[$title] = $text ?? ''; // Note that if $text is false, then $cache should have a !NONEXISTANT entry if ( !is_string( $text ) ) { $cache[$title] = '!NONEXISTENT'; } elseif ( strlen( $text ) > $this->maxEntrySize ) { $cache[$title] = '!TOO BIG'; $newBigTitles[$title] = $page->getLatest(); } else { $cache[$title] = ' ' . $text; } } // Update HASH for the new key. Incorporates various administrative keys, // including the old HASH (and thereby the EXCESSIVE value from loadFromDB() // and previous replace() calls), but that doesn't really matter since we // only ever compare it for equality with a copy saved by saveToCaches(). $cache['HASH'] = md5( serialize( $cache + [ 'EXCESSIVE' => $newBigTitles ] ) ); // Update the too-big WAN cache entries now that we have the new HASH foreach ( $newBigTitles as $title => $id ) { // Match logic of loadCachedMessagePageEntry() $this->wanCache->set( $this->bigMessageCacheKey( $cache['HASH'], $title ), ' ' . $newTextByTitle[$title], self::WAN_TTL ); } // Mark this cache as definitely being "latest" (non-volatile) so // load() calls do not try to refresh the cache with replica DB data $cache['LATEST'] = time(); // Update the process cache $this->cache->set( $code, $cache ); // Pre-emptively update the local datacenter cache so things like edit filter and // prevented changes are reflected immediately; these often use MediaWiki: pages. // The datacenter handling replace() calls should be the same one handling edits // as they require HTTP POST. $this->saveToCaches( $cache, 'all', $code ); // Release the lock now that the cache is saved ScopedCallback::consume( $scopedLock ); // Relay the purge. Touching this check key expires cache contents // and local cache (APC) validation hash across all datacenters. $this->wanCache->touchCheckKey( $this->getCheckKey( $code ) ); // Purge the messages in the message blob store and fire any hook handlers $blobStore = MediaWikiServices::getInstance()->getResourceLoader()->getMessageBlobStore(); foreach ( $replacements as [ $title, $msg ] ) { $blobStore->updateMessage( $this->contLang->lcfirst( $msg ) ); $this->hookRunner->onMessageCacheReplace( $title, $newTextByTitle[$title] ); } } /** * Is the given cache array expired due-to-time passing or a version change? * * @param array $cache * @return bool */ private function isCacheExpired( $cache ) { return !isset( $cache['VERSION'] ) || !isset( $cache['EXPIRY'] ) || $cache['VERSION'] !== MSG_CACHE_VERSION || $cache['EXPIRY'] <= wfTimestampNow(); } /** * Shortcut to update caches. * * @param array $cache Cached messages with a version. * @param string $dest Either "local-only" to save to local caches only * or "all" to save to all caches. * @param string|false $code Language code (default: false) * @return bool */ private function saveToCaches( array $cache, $dest, $code = false ) { if ( $dest === 'all' ) { $cacheKey = $this->clusterCache->makeKey( 'messages', $code ); $success = $this->clusterCache->set( $cacheKey, $cache ); $this->setValidationHash( $code, $cache ); } else { $success = true; } $this->saveToLocalCache( $code, $cache ); return $success; } /** * Get the md5 used to validate the local server cache * * @param string $code * @return array (hash or false, bool expiry/volatility status) */ private function getValidationHash( $code ) { $curTTL = null; $value = $this->wanCache->get( $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), $curTTL, [ $this->getCheckKey( $code ) ] ); if ( $value ) { $hash = $value['hash']; if ( ( time() - $value['latest'] ) < WANObjectCache::TTL_MINUTE ) { // Cache was recently updated via replace() and should be up-to-date. // That method is only called in the primary datacenter and uses FOR_UPDATE. $expired = false; } else { // See if the "check" key was bumped after the hash was generated $expired = ( $curTTL < 0 ); } } else { // No hash found at all; cache must regenerate to be safe $hash = false; $expired = true; } return [ $hash, $expired ]; } /** * Set the md5 used to validate the local server cache * * If $cache has a 'LATEST' UNIX timestamp key, then the hash will not * be treated as "volatile" by getValidationHash() for the next few seconds. * This is triggered when $cache is generated using FOR_UPDATE mode. * * @param string $code * @param array $cache Cached messages with a version */ private function setValidationHash( $code, array $cache ) { $this->wanCache->set( $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), [ 'hash' => $cache['HASH'], 'latest' => $cache['LATEST'] ?? 0 ], WANObjectCache::TTL_INDEFINITE ); } /** * @param string $code Which language to load messages for * @param int $timeout Wait timeout in seconds * @return array (ScopedCallback or null, whether locking failed due to an I/O error) * @phan-return array{0:ScopedCallback|null,1:bool} */ private function getReentrantScopedLock( $code, $timeout = self::WAIT_SEC ) { $key = $this->clusterCache->makeKey( 'messages', $code ); $watchPoint = $this->clusterCache->watchErrors(); $scopedLock = $this->clusterCache->getScopedLock( $key, $timeout, self::LOCK_TTL, __METHOD__ ); $error = ( !$scopedLock && $this->clusterCache->getLastError( $watchPoint ) ); return [ $scopedLock, $error ]; } /** * Get a message from either the content language or the user language. * * First, assemble a list of languages to attempt getting the message from. This * chain begins with the requested language and its fallbacks and then continues with * the content language and its fallbacks. For each language in the chain, the following * process will occur (in this order): * 1. If a language-specific override, i.e., [[MW:msg/lang]], is available, use that. * Note: for the content language, there is no /lang subpage. * 2. Fetch from the static CDB cache. * 3. If available, check the database for fallback language overrides. * * This process provides a number of guarantees. When changing this code, make sure all * of these guarantees are preserved. * * If the requested language is *not* the content language, then the CDB cache for that * specific language will take precedence over the root database page ([[MW:msg]]). * * Fallbacks will be just that: fallbacks. A fallback language will never be reached if * the message is available *anywhere* in the language for which it is a fallback. * * @param string $key The message key * @param bool $useDB If true, look for the message in the DB, false * to use only the compiled l10n cache. * @param bool|string|Language|null $language Code of the language to get the message for. * - If string and a valid code, will create a standard language object * - If string but not a valid code, will create a basic language object * - If false, create object from the current users language * - If true or null, create object from the wikis content language * - If language object, use it as given * - If this parameter omitted the object from the wikis content language is used * - Other values than a Language object or null are deprecated. * @param string &$usedKey @phan-output-reference If given, will be set to the message key * that the message was fetched from (the requested key may be overridden by hooks). * * @return string|false False if the message doesn't exist, otherwise the * message (which can be empty) */ public function get( $key, $useDB = true, $language = null, &$usedKey = '' ) { if ( is_int( $key ) ) { // Fix numerical strings that somehow become ints on their way here $key = (string)$key; } elseif ( !is_string( $key ) ) { throw new TypeError( 'Message key must be a string' ); } elseif ( $key === '' ) { // Shortcut: the empty key is always missing return false; } $language ??= $this->contLang; $language = $this->getLanguageObject( $language ); // Normalise title-case input (with some inlining) $lckey = self::normalizeKey( $key ); // Initialize the overrides here to prevent calling the hook too early. if ( $this->messageKeyOverrides === null ) { $this->messageKeyOverrides = []; $this->hookRunner->onMessageCacheFetchOverrides( $this->messageKeyOverrides ); } if ( isset( $this->messageKeyOverrides[$lckey] ) ) { $override = $this->messageKeyOverrides[$lckey]; // Strings are deliberately interpreted as message keys, // to prevent ambiguity between message keys and functions. if ( is_string( $override ) ) { $lckey = $override; } else { $lckey = $override( $lckey, $this, $language, $useDB ); } } $this->hookRunner->onMessageCache__get( $lckey ); $usedKey = $lckey; // Loop through each language in the fallback list until we find something useful $message = $this->getMessageFromFallbackChain( $language, $lckey, !$this->disable && $useDB ); // If we still have no message, maybe the key was in fact a full key so try that if ( $message === false ) { $parts = explode( '/', $lckey ); // We may get calls for things that are http-urls from sidebar // Let's not load nonexistent languages for those // They usually have more than one slash. if ( count( $parts ) === 2 && $parts[1] !== '' ) { $message = $this->localisationCache->getSubitem( $parts[1], 'messages', $parts[0] ) ?? false; } } // Post-processing if the message exists if ( $message !== false ) { // Fix whitespace $message = str_replace( [ // Fix for trailing whitespace, removed by textarea ' ', // Fix for NBSP, converted to space by firefox ' ', ' ', '­' ], [ ' ', "\u{00A0}", "\u{00A0}", "\u{00AD}" ], $message ); } return $message; } /** * Return a Language object from $langcode * * @param Language|string|bool $langcode Either: * - a Language object * - code of the language to get the message for, if it is * a valid code create a language for that language, if * it is a string but not a valid code then make a basic * language object * - a boolean: if it's false then use the global object for * the current user's language (as a fallback for the old parameter * functionality), or if it is true then use global object * for the wiki's content language. * @return Language|StubUserLang */ private function getLanguageObject( $langcode ) { # Identify which language to get or create a language object for. # Using is_object here due to Stub objects. if ( is_object( $langcode ) ) { # Great, we already have the object (hopefully)! return $langcode; } wfDeprecated( __METHOD__ . ' with not a Language object in $langcode', '1.43' ); if ( $langcode === true || $langcode === $this->contLangCode ) { # $langcode is the language code of the wikis content language object. # or it is a boolean and value is true return $this->contLang; } global $wgLang; if ( $langcode === false || $langcode === $wgLang->getCode() ) { # $langcode is the language code of user language object. # or it was a boolean and value is false return $wgLang; } $validCodes = array_keys( $this->languageNameUtils->getLanguageNames() ); if ( in_array( $langcode, $validCodes ) ) { # $langcode corresponds to a valid language. return $this->langFactory->getLanguage( $langcode ); } # $langcode is a string, but not a valid language code; use content language. $this->logger->debug( 'Invalid language code passed to' . __METHOD__ . ', falling back to content language.' ); return $this->contLang; } /** * Given a language, try and fetch messages from that language. * * Will also consider fallbacks of that language, the site language, and fallbacks for * the site language. * * @see MessageCache::get * @param Language|StubObject $lang Preferred language * @param string $lckey Lowercase key for the message (as for localisation cache) * @param bool $useDB Whether to include messages from the wiki database * @return string|false The message, or false if not found */ private function getMessageFromFallbackChain( $lang, $lckey, $useDB ) { $alreadyTried = []; // First try the requested language. $message = $this->getMessageForLang( $lang, $lckey, $useDB, $alreadyTried ); if ( $message !== false ) { return $message; } // Now try checking the site language. $message = $this->getMessageForLang( $this->contLang, $lckey, $useDB, $alreadyTried ); return $message; } /** * Given a language, try and fetch messages from that language and its fallbacks. * * @see MessageCache::get * @param Language|StubObject $lang Preferred language * @param string $lckey Lowercase key for the message (as for localisation cache) * @param bool $useDB Whether to include messages from the wiki database * @param bool[] &$alreadyTried Contains true for each language that has been tried already * @return string|false The message, or false if not found */ private function getMessageForLang( $lang, $lckey, $useDB, &$alreadyTried ) { $langcode = $lang->getCode(); // Try checking the database for the requested language if ( $useDB ) { $uckey = $this->contLang->ucfirst( $lckey ); if ( !isset( $alreadyTried[$langcode] ) ) { $message = $this->getMsgFromNamespace( $this->getMessagePageName( $langcode, $uckey ), $langcode ); if ( $message !== false ) { return $message; } $alreadyTried[$langcode] = true; } } else { $uckey = null; } // Return a special value handled in Message::format() to display the message key // (and fallback keys) and the parameters passed to the message. // TODO: Move to a better place. if ( $langcode === 'qqx' ) { return '($*)'; } elseif ( $langcode === 'x-xss' && $this->useXssLanguage && !in_array( $lckey, $this->rawHtmlMessages, true ) ) { $xssViaInnerHtml = "<script>alert('$lckey')</script>"; $xssViaAttribute = '">' . $xssViaInnerHtml . '<x y="'; return $xssViaInnerHtml . $xssViaAttribute . '($*)'; } // Check the localisation cache [ $defaultMessage, $messageSource ] = $this->localisationCache->getSubitemWithSource( $langcode, 'messages', $lckey ); if ( $messageSource === $langcode ) { return $defaultMessage; } // Try checking the database for all of the fallback languages if ( $useDB ) { $fallbackChain = $this->languageFallback->getAll( $langcode ); foreach ( $fallbackChain as $code ) { if ( isset( $alreadyTried[$code] ) ) { continue; } $message = $this->getMsgFromNamespace( // @phan-suppress-next-line PhanTypeMismatchArgumentNullable uckey is set when used $this->getMessagePageName( $code, $uckey ), $code ); if ( $message !== false ) { return $message; } $alreadyTried[$code] = true; // Reached the source language of the default message. Don't look for DB overrides // further back in the fallback chain. (T229992) if ( $code === $messageSource ) { return $defaultMessage; } } } return $defaultMessage ?? false; } /** * Get the message page name for a given language * * @param string $langcode * @param string $uckey Uppercase key for the message * @return string The page name */ private function getMessagePageName( $langcode, $uckey ) { if ( $langcode === $this->contLangCode ) { // Messages created in the content language will not have the /lang extension return $uckey; } else { return "$uckey/$langcode"; } } /** * Get a message from the MediaWiki namespace, with caching. The key must * first be converted to two-part lang/msg form if necessary. * * Unlike self::get(), this function doesn't resolve fallback chains, and * some callers require this behavior. LanguageConverter::parseCachedTable() * and self::get() are some examples in core. * * @param string $title Message cache key with the initial uppercase letter * @param string $code Code denoting the language to try * @return string|false The message, or false if it does not exist or on error */ public function getMsgFromNamespace( $title, $code ) { // Load all MediaWiki page definitions into cache. Note that individual keys // already loaded into the cache during this request remain in the cache, which // includes the value of hook-defined messages. $this->load( $code ); $entry = $this->cache->getField( $code, $title ); if ( $entry !== null ) { // Message page exists as an override of a software messages if ( substr( $entry, 0, 1 ) === ' ' ) { // The message exists and is not '!TOO BIG' or '!ERROR' return (string)substr( $entry, 1 ); } elseif ( $entry === '!NONEXISTENT' ) { // The text might be '-' or missing due to some data loss return false; } // Load the message page, utilizing the individual message cache. // If the page does not exist, there will be no hook handler fallbacks. $entry = $this->loadCachedMessagePageEntry( $title, $code, $this->cache->getField( $code, 'HASH' ) ); } else { // Message page either does not exist or does not override a software message if ( !$this->isMainCacheable( $title, $code ) ) { // Message page does not override any software-defined message. A custom // message might be defined to have content or settings specific to the wiki. // Load the message page, utilizing the individual message cache as needed. $entry = $this->loadCachedMessagePageEntry( $title, $code, $this->cache->getField( $code, 'HASH' ) ); } if ( $entry === null || substr( $entry, 0, 1 ) !== ' ' ) { // Message does not have a MediaWiki page definition; try hook handlers $message = false; // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args $this->hookRunner->onMessagesPreLoad( $title, $message, $code ); if ( $message !== false ) { $this->cache->setField( $code, $title, ' ' . $message ); } else { $this->cache->setField( $code, $title, '!NONEXISTENT' ); } return $message; } } if ( $entry !== false && substr( $entry, 0, 1 ) === ' ' ) { if ( $this->cacheVolatile[$code] ) { // Make sure that individual keys respect the WAN cache holdoff period too $this->logger->debug( __METHOD__ . ': loading volatile key \'{titleKey}\'', [ 'titleKey' => $title, 'code' => $code ] ); } else { $this->cache->setField( $code, $title, $entry ); } // The message exists, so make sure a string is returned return (string)substr( $entry, 1 ); } $this->cache->setField( $code, $title, '!NONEXISTENT' ); return false; } /** * @param string $dbKey * @param string $code * @param string $hash * @return string Either " <MESSAGE>" or "!NONEXISTANT" */ private function loadCachedMessagePageEntry( $dbKey, $code, $hash ) { $fname = __METHOD__; return $this->srvCache->getWithSetCallback( $this->srvCache->makeKey( 'messages-big', $hash, $dbKey ), BagOStuff::TTL_HOUR, function () use ( $code, $dbKey, $hash, $fname ) { return $this->wanCache->getWithSetCallback( $this->bigMessageCacheKey( $hash, $dbKey ), self::WAN_TTL, function ( $oldValue, &$ttl, &$setOpts ) use ( $dbKey, $code, $fname ) { // Try loading the message from the database $setOpts += Database::getCacheSetOptions( MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase() ); // Use newKnownCurrent() to avoid querying revision/user tables $title = Title::makeTitle( NS_MEDIAWIKI, $dbKey ); // Injecting RevisionStore breaks installer since it // instantiates MessageCache before DB. $revision = MediaWikiServices::getInstance() ->getRevisionLookup() ->getKnownCurrentRevision( $title ); if ( !$revision ) { // The wiki doesn't have a local override page. Cache absence with normal TTL. // When overrides are created, self::replace() takes care of the cache. return '!NONEXISTENT'; } $content = $revision->getContent( SlotRecord::MAIN ); if ( $content ) { $message = $this->getMessageTextFromContent( $content ); } else { $this->logger->warning( $fname . ': failed to load page text for \'{titleKey}\'', [ 'titleKey' => $dbKey, 'code' => $code ] ); $message = null; } if ( !is_string( $message ) ) { // Revision failed to load Content, or Content is incompatible with wikitext. // Possibly a temporary loading failure. $ttl = 5; return '!NONEXISTENT'; } return ' ' . $message; } ); } ); } /** * @param string $message * @param bool $interface * @param Language|null $language * @param PageReference|null $page * @return string */ public function transform( $message, $interface = false, $language = null, ?PageReference $page = null ) { // Avoid creating parser if nothing to transform if ( $this->inParser || !str_contains( $message, '{{' ) ) { return $message; } $parser = $this->getParser(); $popts = $this->getParserOptions(); $popts->setInterfaceMessage( $interface ); $popts->setTargetLanguage( $language ); $userlang = $popts->setUserLang( $language ); $this->inParser = true; $message = $parser->transformMsg( $message, $popts, $page ); $this->inParser = false; $popts->setUserLang( $userlang ); return $message; } /** * @return Parser */ public function getParser() { if ( !$this->parser ) { $this->parser = $this->parserFactory->create(); } return $this->parser; } /** * @param string $text * @param PageReference|null $page * @param bool $linestart Whether this is at the start of a line * @param bool $interface Whether this is an interface message * @param Language|StubUserLang|string|null $language Language code * @return ParserOutput|string */ public function parse( $text, ?PageReference $page = null, $linestart = true, $interface = false, $language = null ) { // phpcs:ignore MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgTitle global $wgTitle; if ( $this->inParser ) { return htmlspecialchars( $text ); } $parser = $this->getParser(); $popts = $this->getParserOptions(); $popts->setInterfaceMessage( $interface ); if ( is_string( $language ) ) { $language = $this->langFactory->getLanguage( $language ); } $popts->setTargetLanguage( $language ); if ( !$page ) { $logger = LoggerFactory::getInstance( 'GlobalTitleFail' ); $logger->info( __METHOD__ . ' called with no title set.', [ 'exception' => new RuntimeException ] ); $page = $wgTitle; } // Sometimes $wgTitle isn't set either... if ( !$page ) { // It's not uncommon having a null $wgTitle in scripts. See r80898 // Create a ghost title in such case $page = PageReferenceValue::localReference( NS_SPECIAL, 'Badtitle/title not set in ' . __METHOD__ ); } $this->inParser = true; $res = $parser->parse( $text, $page, $popts, $linestart ); $this->inParser = false; return $res; } public function disable() { $this->disable = true; } public function enable() { $this->disable = false; } /** * Whether DB/cache usage is disabled for determining messages * * If so, this typically indicates either: * - a) load() failed to find a cached copy nor query the DB * - b) we are in a special context or error mode that cannot use the DB * If the DB is ignored, any derived HTML output or cached objects may be wrong. * To avoid long-term cache pollution, TTLs can be adjusted accordingly. * * @return bool * @since 1.27 */ public function isDisabled() { return $this->disable; } /** * Clear all stored messages in global and local cache * * Mainly used after a mass rebuild */ public function clear() { $langs = $this->languageNameUtils->getLanguageNames(); foreach ( $langs as $code => $_ ) { $this->wanCache->touchCheckKey( $this->getCheckKey( $code ) ); } $this->cache->clear(); } /** * @param string $key * @return array */ public function figureMessage( $key ) { $pieces = explode( '/', $key ); if ( count( $pieces ) < 2 ) { return [ $key, $this->contLangCode ]; } $lang = array_pop( $pieces ); if ( !$this->languageNameUtils->getLanguageName( $lang, LanguageNameUtils::AUTONYMS, LanguageNameUtils::DEFINED ) ) { return [ $key, $this->contLangCode ]; } $message = implode( '/', $pieces ); return [ $message, $lang ]; } /** * Get all message keys stored in the message cache for a given language. * If $code is the content language code, this will return all message keys * for which MediaWiki:msgkey exists. If $code is another language code, this * will ONLY return message keys for which MediaWiki:msgkey/$code exists. * * @param string $code Language code * @return string[]|null Array of message keys */ public function getAllMessageKeys( $code ) { $this->load( $code ); if ( !$this->cache->has( $code ) ) { // Apparently load() failed return null; } // Remove administrative keys $cache = $this->cache->get( $code ); unset( $cache['VERSION'] ); unset( $cache['EXPIRY'] ); unset( $cache['EXCESSIVE'] ); // Remove any !NONEXISTENT keys $cache = array_diff( $cache, [ '!NONEXISTENT' ] ); // Keys may appear with a capital first letter. lcfirst them. return array_map( [ $this->contLang, 'lcfirst' ], array_keys( $cache ) ); } /** * Purge message caches when a MediaWiki: page is created, updated, or deleted * * @param LinkTarget $linkTarget Message page title * @param Content|null $content New content for edit/create, null on deletion * @since 1.29 */ public function updateMessageOverride( LinkTarget $linkTarget, ?Content $content = null ) { // treat null as not existing $msgText = $this->getMessageTextFromContent( $content ) ?? false; $this->replace( $linkTarget->getDBkey(), $msgText ); if ( $this->contLangConverter->hasVariants() ) { $this->contLangConverter->updateConversionTable( $linkTarget ); } } /** * @param string $code Language code * @return string WAN cache key usable as a "check key" against language page edits */ public function getCheckKey( $code ) { return $this->wanCache->makeKey( 'messages', $code ); } /** * @param Content|null $content Content or null if the message page does not exist * @return string|false|null Returns false if $content is null and null on error */ private function getMessageTextFromContent( ?Content $content = null ) { // @TODO: could skip pseudo-messages like js/css here, based on content model if ( $content && $content->isRedirect() ) { // Treat redirects as not existing (T376398) $msgText = false; } elseif ( $content ) { // Message page exists... // XXX: Is this the right way to turn a Content object into a message? // NOTE: $content is typically either WikitextContent, JavaScriptContent or // CssContent. $msgText = $content->getWikitextForTransclusion(); if ( $msgText === false || $msgText === null ) { // This might be due to some kind of misconfiguration... $msgText = null; $this->logger->warning( __METHOD__ . ": message content doesn't provide wikitext " . "(content model: " . $content->getModel() . ")" ); } } else { // Message page does not exist... $msgText = false; } return $msgText; } /** * @param string $hash Hash for this version of the entire key/value overrides map * @param string $title Message cache key with the initial uppercase letter * @return string */ private function bigMessageCacheKey( $hash, $title ) { return $this->wanCache->makeKey( 'messages-big', $hash, $title ); } } PK ! �O*rO O FormatterFactory.phpnu �Iw�� <?php namespace MediaWiki\Language; use MediaWiki\Block\BlockErrorFormatter; use MediaWiki\HookContainer\HookContainer; use MediaWiki\Languages\LanguageFactory; use MediaWiki\Status\StatusFormatter; use MediaWiki\Title\TitleFormatter; use MediaWiki\User\UserIdentityUtils; use MessageCache; use MessageLocalizer; use Psr\Log\LoggerInterface; /** * Factory for formatters of common complex objects * * @since 1.42 */ class FormatterFactory { private MessageCache $messageCache; private TitleFormatter $titleFormatter; private HookContainer $hookContainer; private UserIdentityUtils $userIdentityUtils; private LanguageFactory $languageFactory; private LoggerInterface $logger; public function __construct( MessageCache $messageCache, TitleFormatter $titleFormatter, HookContainer $hookContainer, UserIdentityUtils $userIdentityUtils, LanguageFactory $languageFactory, LoggerInterface $logger ) { $this->messageCache = $messageCache; $this->titleFormatter = $titleFormatter; $this->hookContainer = $hookContainer; $this->userIdentityUtils = $userIdentityUtils; $this->languageFactory = $languageFactory; $this->logger = $logger; } public function getStatusFormatter( MessageLocalizer $messageLocalizer ): StatusFormatter { return new StatusFormatter( $messageLocalizer, $this->messageCache, $this->logger ); } public function getBlockErrorFormatter( LocalizationContext $context ): BlockErrorFormatter { return new BlockErrorFormatter( $this->titleFormatter, $this->hookContainer, $this->userIdentityUtils, $this->languageFactory, $context ); } } PK ! ��`:�! �! ILanguageConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ namespace MediaWiki\Language; use MediaWiki\Linker\LinkTarget; use MediaWiki\Page\PageReference; use MediaWiki\Title\Title; /** * The shared interface for all language converters. * * @ingroup Language * @internal */ interface ILanguageConverter { /** * Get all valid variants. * * @return string[] Contains all valid variants */ public function getVariants(); /** * In case some variant is not defined in the markup, we need * to have some fallback. For example, in zh, normally people * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk. * * When zh-sg is preferred but not defined, we will pick zh-hans * in this case. Right now this is only used by zh. * * @param string $variant The language code of the variant * @return string|array The code of the fallback language or the * main code if there is no fallback */ public function getVariantFallbacks( $variant ); /** * Get the title produced by the conversion rule. * * @return string|false The converted title text */ public function getConvRuleTitle(); /** * Get preferred language variant. * * @return string The preferred language code */ public function getPreferredVariant(); /** * This function would not be affected by user's settings * * @return string The default variant code */ public function getDefaultVariant(); /** * Validate the variant and return an appropriate strict internal * variant code if one exists. Compare to Language::hasVariant() * which does a strict test. * * @param string|null $variant The variant to validate * @return string|null Returns an equivalent valid variant code if possible, * null otherwise */ public function validateVariant( $variant = null ); /** * Get the variant specified in the URL * * @return string|null Variant if one found, null otherwise */ public function getURLVariant(); /** * Dictionary-based conversion. * This function would not parse the conversion rules. * If you want to parse rules, try to use convert() or * convertTo(). * * @param string $text The text to be converted * @param string|false $toVariant The target language code * @return string The converted text */ public function autoConvert( $text, $toVariant = false ); /** * Translate a string to a variant. * Doesn't parse rules or do any of that other stuff, for that use * convert() or convertTo(). * * @param string $text Text to convert * @param string $variant Variant language code * @return string Translated text */ public function translate( $text, $variant ); /** * Call translate() to convert text to all valid variants. * * @param string $text The text to be converted * @return array Variant => converted text */ public function autoConvertToAllVariants( $text ); /** * Automatically converts a LinkTarget or PageReference to a readable string in the * preferred variant, separating the namespace and the main part of the title. * * @since 1.39 * @param LinkTarget|PageReference $title * @return string[] Three elements: converted namespace text, converted namespace separator, * and the converted main part of the title */ public function convertSplitTitle( $title ); /** * Automatically convert a LinkTarget or PageReference to a readable string in the * preferred variant. * * @param LinkTarget|PageReference $title * @return string Converted title text */ public function convertTitle( $title ); /** * Get the namespace display name in the preferred variant. * * @param int $index Namespace id * @param string|null $variant Variant code or null for preferred variant * @return string Namespace name for display */ public function convertNamespace( $index, $variant = null ); /** * Convert text to different variants of a language. The automatic * conversion is done in autoConvert(). Here we parse the text * marked with -{}-, which specifies special conversions of the * text that cannot be accomplished in autoConvert(). * * Syntax of the markup: * -{code1:text1;code2:text2;...}- or * -{flags|code1:text1;code2:text2;...}- or * -{text}- in which case no conversion should take place for text * * @warning Glossary state is maintained between calls. Never feed this * method input that hasn't properly been escaped as it may result in * an XSS in subsequent calls, even if those subsequent calls properly * escape things. * @param string $text Text to be converted; already html escaped. * @return string Converted text (html) */ public function convert( $text ); /** * Same as convert() except a extra parameter to custom variant. * * @param string $text Text to be converted; already html escaped * @param-taint $text exec_html * @param string $variant The target variant code * @param bool $clearState Whether to clear the converter title before * conversion (defaults to true) * @return string Converted text * @return-taint escaped */ public function convertTo( $text, $variant, bool $clearState = true ); /** * If a language supports multiple variants, it is possible that * non-existing link in one variant actually exists in another variant. * This function tries to find it. See e.g., LanguageZh.php * The input parameters may be modified upon return * * @param string &$link The name of the link * @param Title &$nt The title object of the link * @param bool $ignoreOtherCond To disable other conditions when * we need to transclude a template or update a category's link */ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ); /** * Returns language specific hash options. * * @return string */ public function getExtraHashOptions(); /** * Guess if a text is written in a variant. This should be implemented in subclasses. * * @param string $text The text to be checked * @param string $variant Language code of the variant to be checked for * @return bool True if $text appears to be written in $variant, false if not * * @author Nikola Smolenski <smolensk@eunet.rs> * @since 1.19 */ public function guessVariant( $text, $variant ); /** * Enclose a string with the "no conversion" tag. This is used by * various functions in the Parser. * * @param string $text Text to be tagged for no conversion * @param bool $noParse Unused * @return string The tagged text */ public function markNoConversion( $text, $noParse = false ); /** * Convert the sorting key for category links. This should make different * keys that are variants of each other map to the same key. * * @param string $key * * @return string */ public function convertCategoryKey( $key ); /** * Refresh the cache of conversion tables when * MediaWiki:Conversiontable* is updated. * * @param LinkTarget $linkTarget The LinkTarget of the page being updated */ public function updateConversionTable( LinkTarget $linkTarget ); /** * Check if this is a language with variants * * @since 1.35 * * @return bool */ public function hasVariants(); /** * Strict check if the language has the specific variant. * * Compare to LanguageConverter::validateVariant() which does a more * lenient check and attempts to coerce the given code to a valid one. * * @since 1.35 * @param string $variant * @return bool */ public function hasVariant( $variant ); /** * Perform output conversion on a string, and encode for safe HTML output. * * @since 1.35 * * @param string $text Text to be converted * @return string string converted to be safely used in HTML */ public function convertHtml( $text ); } /** @deprecated class alias since 1.43 */ class_alias( ILanguageConverter::class, 'ILanguageConverter' ); PK ! �4~<B B LCStoreStaticArray.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use Wikimedia\StaticArrayWriter; /** * Localisation cache storage based on PHP files and static arrays. * * @since 1.26 * @ingroup Language */ class LCStoreStaticArray implements LCStore { /** @var string|null Current language code. */ private $currentLang = null; /** @var array Localisation data. */ private $data = []; /** @var string|null File name. */ private $fname = null; /** @var string Directory for cache files. */ private $directory; public function __construct( $conf = [] ) { $this->directory = $conf['directory']; } public function startWrite( $code ) { if ( !is_dir( $this->directory ) && !wfMkdirParents( $this->directory, null, __METHOD__ ) ) { throw new RuntimeException( "Unable to create the localisation store " . "directory \"{$this->directory}\"" ); } $this->currentLang = $code; $this->fname = $this->directory . '/' . $code . '.l10n.php'; $this->data[$code] = []; if ( is_file( $this->fname ) ) { $this->data[$code] = require $this->fname; } } public function set( $key, $value ) { $this->data[$this->currentLang][$key] = self::encode( $value ); } /** * Determine whether this array contains only scalar values. * * @param array $arr * @return bool */ private static function isValueArray( array $arr ) { foreach ( $arr as $value ) { if ( is_scalar( $value ) || $value === null || ( is_array( $value ) && self::isValueArray( $value ) ) ) { continue; } return false; } return true; } /** * Encodes a value into an array format * * @param mixed $value * @return array|mixed * @throws RuntimeException */ public static function encode( $value ) { if ( is_array( $value ) && self::isValueArray( $value ) ) { // Type: scalar [v]alue. // Optimization: Write large arrays as one value to avoid recursive decoding cost. return [ 'v', $value ]; } if ( is_array( $value ) || is_object( $value ) ) { // Type: [s]serialized. // Optimization: Avoid recursive decoding cost. Write arrays with an objects // as one serialised value. return [ 's', serialize( $value ) ]; } if ( is_scalar( $value ) || $value === null ) { // Optimization: Reduce file size by not wrapping scalar values. return $value; } throw new RuntimeException( 'Cannot encode ' . var_export( $value, true ) ); } /** * Decode something that was encoded with 'encode' * * @param mixed $encoded * @return array|mixed * @throws RuntimeException */ public static function decode( $encoded ) { if ( !is_array( $encoded ) ) { // Unwrapped scalar value return $encoded; } [ $type, $data ] = $encoded; switch ( $type ) { case 'v': // Value array (1.35+) or unwrapped scalar value (1.32 and earlier) return $data; case 's': return unserialize( $data ); case 'a': // Support: MediaWiki 1.34 and earlier (older file format) return array_map( [ __CLASS__, 'decode' ], $data ); default: throw new RuntimeException( 'Unable to decode ' . var_export( $encoded, true ) ); } } public function finishWrite() { $writer = new StaticArrayWriter(); $out = $writer->create( $this->data[$this->currentLang], 'Generated by LCStoreStaticArray.php -- do not edit!' ); // Don't just write to the file, since concurrent requests may see a partial file (T304515). // Write to a file in the same filesystem so that it can be atomically moved. $tmpFileName = "{$this->fname}.tmp." . getmypid() . '.' . mt_rand(); file_put_contents( $tmpFileName, $out ); rename( $tmpFileName, $this->fname ); // Release the data to manage the memory in rebuildLocalisationCache unset( $this->data[$this->currentLang] ); $this->currentLang = null; $this->fname = null; } public function get( $code, $key ) { if ( !array_key_exists( $code, $this->data ) ) { $fname = $this->directory . '/' . $code . '.l10n.php'; if ( !is_file( $fname ) ) { return null; } $this->data[$code] = require $fname; } $data = $this->data[$code]; if ( array_key_exists( $key, $data ) ) { return self::decode( $data[$key] ); } return null; } } PK ! ��< < converters/ZhConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Chinese converter routine. * * @ingroup Languages */ class ZhConverter extends LanguageConverter { public function getMainCode(): string { return 'zh'; } public function getLanguageVariants(): array { return [ 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw' ]; } public function getVariantsFallbacks(): array { return [ 'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ], 'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ], 'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ], 'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ], 'zh-sg' => [ 'zh-my', 'zh-hans', 'zh-cn' ], 'zh-my' => [ 'zh-sg', 'zh-hans', 'zh-cn' ], 'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ], 'zh-hk' => [ 'zh-mo', 'zh-hant', 'zh-tw' ], 'zh-mo' => [ 'zh-hk', 'zh-hant', 'zh-tw' ], ]; } public function getAdditionalManualLevel(): array { return [ 'zh' => 'disable', 'zh-hans' => 'unidirectional', 'zh-hant' => 'unidirectional', ]; } public function getDescCodeSeparator(): string { return ':'; } public function getDescVarSeparator(): string { return ';'; } public function getVariantNames(): array { $names = [ 'zh' => '原文', 'zh-hans' => '简体', 'zh-hant' => '繁體', 'zh-cn' => '大陆', 'zh-tw' => '臺灣', 'zh-hk' => '香港', 'zh-mo' => '澳門', 'zh-sg' => '新加坡', 'zh-my' => '大马', ]; return array_merge( parent::getVariantNames(), $names ); } protected function loadDefaultTables(): array { return [ 'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_HANS ), 'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_HANT ), 'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_CN ), 'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_HK ), 'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_HK ), 'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_CN ), 'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_CN ), 'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_TW ), 'zh' => new ReplacementArray ]; } protected function postLoadTables( &$tables ) { $tables['zh-cn']->setArray( $tables['zh-cn']->getArray() + $tables['zh-hans']->getArray() ); $tables['zh-hk']->setArray( $tables['zh-hk']->getArray() + $tables['zh-hant']->getArray() ); $tables['zh-mo']->setArray( $tables['zh-mo']->getArray() + $tables['zh-hant']->getArray() ); $tables['zh-my']->setArray( $tables['zh-my']->getArray() + $tables['zh-hans']->getArray() ); $tables['zh-sg']->setArray( $tables['zh-sg']->getArray() + $tables['zh-hans']->getArray() ); $tables['zh-tw']->setArray( $tables['zh-tw']->getArray() + $tables['zh-hant']->getArray() ); } public function convertCategoryKey( $key ) { return $this->autoConvert( $key, 'zh' ); } } PK ! Rp�Q� � converters/WuuConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use MediaWiki\Languages\Data\ZhConversion; /** * Wu language specific code. * * @ingroup Languages */ class WuuConverter extends LanguageConverter { public function getMainCode(): string { return 'wuu'; } public function getLanguageVariants(): array { return [ 'wuu', 'wuu-hans', 'wuu-hant' ]; } public function getVariantsFallbacks(): array { return [ 'wuu' => [ 'wuu-hans', 'wuu-hant' ], 'wuu-hans' => [ 'wuu' ], 'wuu-hant' => [ 'wuu' ], ]; } protected function getAdditionalManualLevel(): array { return [ 'wuu' => 'disable' ]; } public function getDescCodeSeparator(): string { return ':'; } public function getDescVarSeparator(): string { return ';'; } public function getVariantNames(): array { $names = [ 'wuu' => '原文', 'wuu-hans' => '简体', 'wuu-hant' => '正體', ]; return array_merge( parent::getVariantNames(), $names ); } protected function loadDefaultTables(): array { return [ 'wuu-hans' => new ReplacementArray( ZhConversion::ZH_TO_HANS ), 'wuu-hant' => new ReplacementArray( ZhConversion::ZH_TO_HANT ), 'wuu' => new ReplacementArray, ]; } } PK ! ��G., , converters/SrConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Serbian (Српски / Srpski) specific code. * * There are two levels of conversion for Serbian: the script level * (Cyrillics <-> Latin), and the variant level (ekavian * <->iyekavian). The two are orthogonal. So we really only need two * dictionaries: one for Cyrillics and Latin, and one for ekavian and * iyekavian. * * @ingroup Languages */ class SrConverter extends LanguageConverterSpecific { private const TO_LATIN = [ 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'ђ' => 'đ', 'е' => 'e', 'ж' => 'ž', 'з' => 'z', 'и' => 'i', 'ј' => 'j', 'к' => 'k', 'л' => 'l', 'љ' => 'lj', 'м' => 'm', 'н' => 'n', 'њ' => 'nj', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'ћ' => 'ć', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c', 'ч' => 'č', 'џ' => 'dž', 'ш' => 'š', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 'Ђ' => 'Đ', 'Е' => 'E', 'Ж' => 'Ž', 'З' => 'Z', 'И' => 'I', 'Ј' => 'J', 'К' => 'K', 'Л' => 'L', 'Љ' => 'Lj', 'М' => 'M', 'Н' => 'N', 'Њ' => 'Nj', 'О' => 'O', 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'Ћ' => 'Ć', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'Č', 'Џ' => 'Dž', 'Ш' => 'Š', ]; private const TO_CYRILLICS = [ 'a' => 'а', 'b' => 'б', 'c' => 'ц', 'č' => 'ч', 'ć' => 'ћ', 'd' => 'д', 'dž' => 'џ', 'đ' => 'ђ', 'e' => 'е', 'f' => 'ф', 'g' => 'г', 'h' => 'х', 'i' => 'и', 'j' => 'ј', 'k' => 'к', 'l' => 'л', 'lj' => 'љ', 'm' => 'м', 'n' => 'н', 'nj' => 'њ', 'o' => 'о', 'p' => 'п', 'r' => 'р', 's' => 'с', 'š' => 'ш', 't' => 'т', 'u' => 'у', 'v' => 'в', 'z' => 'з', 'ž' => 'ж', 'A' => 'А', 'B' => 'Б', 'C' => 'Ц', 'Č' => 'Ч', 'Ć' => 'Ћ', 'D' => 'Д', 'Dž' => 'Џ', 'Đ' => 'Ђ', 'E' => 'Е', 'F' => 'Ф', 'G' => 'Г', 'H' => 'Х', 'I' => 'И', 'J' => 'Ј', 'K' => 'К', 'L' => 'Л', 'LJ' => 'Љ', 'M' => 'М', 'N' => 'Н', 'NJ' => 'Њ', 'O' => 'О', 'P' => 'П', 'R' => 'Р', 'S' => 'С', 'Š' => 'Ш', 'T' => 'Т', 'U' => 'У', 'V' => 'В', 'Z' => 'З', 'Ž' => 'Ж', 'DŽ' => 'Џ', 'd!ž' => 'дж', 'D!ž' => 'Дж', 'D!Ž' => 'ДЖ', 'Lj' => 'Љ', 'l!j' => 'лј', 'L!j' => 'Лј', 'L!J' => 'ЛЈ', 'Nj' => 'Њ', 'n!j' => 'нј', 'N!j' => 'Нј', 'N!J' => 'НЈ' ]; public function getMainCode(): string { return 'sr'; } public function getLanguageVariants(): array { return [ 'sr', 'sr-ec', 'sr-el' ]; } public function getVariantsFallbacks(): array { return [ 'sr' => 'sr-ec', 'sr-ec' => 'sr', 'sr-el' => 'sr', ]; } protected function getAdditionalFlags(): array { return [ 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S', 'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W' ]; } protected function loadDefaultTables(): array { return [ 'sr-ec' => new ReplacementArray( self::TO_CYRILLICS ), 'sr-el' => new ReplacementArray( self::TO_LATIN ), 'sr' => new ReplacementArray() ]; } /** * Omits roman numbers * * @inheritDoc */ public function translate( $text, $variant ) { return $this->translateWithoutRomanNumbers( $text, $variant ); } public function guessVariant( $text, $variant ) { $numCyrillic = preg_match_all( "/[шђчћжШЂЧЋЖ]/u", $text, $dummy ); $numLatin = preg_match_all( "/[šđč枊ĐČĆŽ]/u", $text, $dummy ); if ( $variant == 'sr-ec' ) { return $numCyrillic > $numLatin; } elseif ( $variant == 'sr-el' ) { return $numLatin > $numCyrillic; } else { return false; } } } PK ! 4DƐ� � converters/ShConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Converts Serbo-Croatian from Latin script to Cyrillic script * * @ingroup Languages */ class ShConverter extends LanguageConverter { private const TO_CYRILLIC = [ 'dž' => 'џ', 'lj' => 'љ', 'nj' => 'њ', 'Dž' => 'Џ', 'DŽ' => 'Џ', 'Lj' => 'Љ', 'LJ' => 'Љ', 'Nj' => 'Њ', 'NЈ' => 'Њ', 'a' => 'а', 'b' => 'б', 'c' => 'ц', 'č' => 'ч', 'ć' => 'ћ', 'd' => 'д', 'đ' => 'ђ', 'e' => 'е', 'f' => 'ф', 'g' => 'г', 'h' => 'х', 'i' => 'и', 'j' => 'ј', 'k' => 'к', 'l' => 'л', 'm' => 'м', 'n' => 'н', 'o' => 'о', 'p' => 'п', 'r' => 'р', 's' => 'с', 'š' => 'ш', 't' => 'т', 'u' => 'у', 'v' => 'в', 'z' => 'з', 'ž' => 'ж', 'A' => 'А', 'B' => 'Б', 'C' => 'Ц', 'Č' => 'Ч', 'Ć' => 'Ћ', 'D' => 'Д', 'Đ' => 'Ђ', 'E' => 'Е', 'F' => 'Ф', 'G' => 'Г', 'H' => 'Х', 'I' => 'И', 'J' => 'Ј', 'K' => 'К', 'L' => 'Л', 'M' => 'М', 'N' => 'Н', 'O' => 'О', 'P' => 'П', 'R' => 'Р', 'S' => 'С', 'Š' => 'Ш', 'T' => 'Т', 'U' => 'У', 'V' => 'В', 'Z' => 'З', 'Ž' => 'Ж', ]; public function getMainCode(): string { return 'sh'; } public function getLanguageVariants(): array { return [ 'sh-latn', 'sh-cyrl' ]; } public function getVariantsFallbacks(): array { return [ 'sh-cyrl' => 'sh-latn', ]; } protected function loadDefaultTables(): array { return [ 'sh-cyrl' => new ReplacementArray( self::TO_CYRILLIC ), 'sh-latn' => new ReplacementArray(), ]; } /** * Omits roman numbers * * @inheritDoc */ public function translate( $text, $variant ) { return $this->translateWithoutRomanNumbers( $text, $variant ); } } PK ! I6��a �a converters/BanConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Balinese specific code. * * @ingroup Languages */ class BanConverter extends LanguageConverterIcu { public function getMainCode(): string { return 'ban'; } public function getLanguageVariants(): array { return [ 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku' ]; } public function getVariantsFallbacks(): array { return [ 'ban-bali' => 'ban', 'ban-x-dharma' => 'ban', 'ban-x-palmleaf' => 'ban', 'ban-x-pku' => 'ban', ]; } public function getVariantNames(): array { $names = [ 'ban' => 'Basa Bali', 'ban-bali' => 'ᬩᬲᬩᬮᬶ', 'ban-x-dharma' => 'Basa Bali (alih aksara DHARMA)', 'ban-x-palmleaf' => 'Basa Bali (alih aksara Palmleaf.org)', 'ban-x-pku' => 'Basa Bali (alih aksara Puri Kauhan Ubud)', ]; return array_merge( parent::getVariantNames(), $names ); } protected function getIcuRules() { $rules = []; # transliteration rules developed for Palmleaf.org $rules['ban-x-palmleaf'] = <<<'EOF' ::NFC; ᬒᬁ → \uE050; # OM ᬁ → \uE001; # SIGN ULU CANDRA ᬂ → \uE002; # SIGN CECEK ᬄ → \uE003; # SIGN BISAH ᬅ → \uE005; # LETTER AKARA ᬆ → \uE006; # LETTER AKARA TEDUNG ᬇ → \uE007; # LETTER IKARA ᬈ → \uE008; # LETTER IKARA TEDUNG ᬉ → \uE009; # LETTER UKARA ᬊ → \uE00A; # LETTER UKARA TEDUNG ᬋ → \uE00B; # LETTER RA REPA ᬌ → \uE060; # LETTER RA REPA TEDUNG ᬍ → \uE00C; # LETTER LA LENGA ᬎ → \uE061; # LETTER LA LENGA TEDUNG ᬏ → \uE00F; # LETTER EKARA ᬐ → \uE010; # LETTER AIKARA ᬑ → \uE013; # LETTER OKARA ᬒ → \uE014; # LETTER OKARA TEDUNG ᬓ → \uE015; # LETTER KA ᬔ → \uE016; # LETTER KA MAHAPRANA ᬕ → \uE017; # LETTER GA ᬖ → \uE018; # LETTER GA GORA ᬗ → \uE019; # LETTER NGA ᬘ → \uE01A; # LETTER CA ᬙ → \uE01B; # LETTER CA LACA ᬚ → \uE01C; # LETTER JA ᬛ → \uE01D; # LETTER JA JERA ᬜ → \uE01E; # LETTER NYA ᬝ → \uE01F; # LETTER TA LATIK ᬞ → \uE020; # LETTER TA MURDA MAHAPRANA ᬟ → \uE021; # LETTER DA MURDA ALPAPRANA ᬠ → \uE022; # LETTER DA MURDA MAHAPRANA ᬡ → \uE023; # LETTER NA RAMBAT ᬢ → \uE024; # LETTER TA ᬣ → \uE025; # LETTER TA TAWA ᬤ → \uE026; # LETTER DA ᬥ → \uE027; # LETTER DA MADU ᬦ → \uE028; # LETTER NA ᬧ → \uE02A; # LETTER PA ᬨ → \uE02B; # LETTER PA KAPAL ᬩ → \uE02C; # LETTER BA ᬪ → \uE02D; # LETTER BA KEMBANG ᬫ → \uE02E; # LETTER MA ᬬ → \uE02F; # LETTER YA ᬭ → \uE030; # LETTER RA ᬮ → \uE032; # LETTER LA ᬯ → \uE035; # LETTER WA ᬰ → \uE036; # LETTER SA SAGA ᬱ → \uE037; # LETTER SA SAPA ᬲ → \uE038; # LETTER SA ᬳ → \uE039; # LETTER HA ᬴ → \uE03C; # SIGN REREKAN ᬵ → \uE03E; # VOWEL SIGN TEDUNG ᬶ → \uE03F; # VOWEL SIGN ULU ᬷ → \uE040; # VOWEL SIGN ULU SARI ᬸ → \uE041; # VOWEL SIGN SUKU ᬹ → \uE042; # VOWEL SIGN SUKU ILUT ᬺ → \uE043; # VOWEL SIGN RA REPA ᬻ → \uE044; # VOWEL SIGN RA REPA TEDUNG ᬼ→ \uE062; # VOWEL SIGN LA LENGA ᬽ → \uE063; # VOWEL SIGN LA LENGA TEDUNG ᬾ → \uE047; # VOWEL SIGN TALING ᬿ → \uE048; # VOWEL SIGN TALING REPA ᭀ → \uE04B; # VOWEL SIGN TALING TEDUNG ᭁ → \uE04C; # VOWEL SIGN TALING REPA TEDUNG ᭂ → \uE045; # VOWEL SIGN PEPET ᭃ → \uE049; # VOWEL SIGN PEPET TEDUNG ᭄ → \uE04D; # ADEG ADEG ᭅ → \uE058; # LETTER KAF SASAK ᭆ → \uE059; # LETTER KHOT SASAK ᭇ → \uE024\uE03C; # LETTER TZIR SASAK ᭈ → \uE05E; # LETTER EF SASAK ᭉ → \uE081; # LETTER VE SASAK ᭊ → \uE05B; # LETTER ZAL SASAK ᭋ → \uE038\uE03C; # LETTER ASYURA SASAK ᭐ → \uE066; # DIGIT ZERO ᭑ → \uE067; # DIGIT ONE ᭒ → \uE068; # DIGIT TWO ᭓ → \uE069; # DIGIT THREE ᭔ → \uE06A; # DIGIT FO ᭕ → \uE06B; # DIGIT FIVE ᭖ → \uE06C; # DIGIT SIX ᭗ → \uE06D; # DIGIT SEVEN ᭘ → \uE06E; # DIGIT EIGHT ᭙ → \uE06F; # DIGIT NINE ᭚ → '//'; # PANTI ᭛ → '///'; # PAMADA ᭜ → •; # WINDU ᭟᭜᭟ → '\\•\\'; ᭟ ' ' ᭜ ' ' ᭟ → '\\ • \\'; ᭝ → \:; # CARIK PAMUNGKAH ᭞ → \uE064; # CARIK SIKI ᭟ → \uE065; # CARIK PAREREN ᭠ → ‐; # PAMENENG #consonants $chandrabindu=\uE001; $ardhachandra=\u1B00; $anusvara=\uE002; $visarga=\uE003; # w←vowel→ represents the stand-alone form $wa=\uE005; $waa=\uE006; $wi=\uE007; $wii=\uE008; $wu=\uE009; $wuu=\uE00A; $wr=\uE00B; $wl=\uE00C; $wce=\uE00D; # LETTER CANDRA E $wse=\uE00E; # LETTER SHORT E $we=\uE00F; # ए LETTER E $wai=\uE010; $wco=\uE011; # LETTER CANDRA O $wso=\uE012; # LETTER SHORT O $wo=\uE013; # ओ LETTER O $wau=\uE014; $ka=\uE015; $kha=\uE016; $ga=\uE017; $gha=\uE018; $nga=\uE019; $ca=\uE01A; $cha=\uE01B; $ja=\uE01C; $jha=\uE01D; $nya=\uE01E; $tta=\uE01F; $ttha=\uE020; $dda=\uE021; $ddha=\uE022; $nna=\uE023; $ta=\uE024; $tha=\uE025; $da=\uE026; $dha=\uE027; $na=\uE028; $ena=\uE029; #compatibility $pa=\uE02A; $pha=\uE02B; $ba=\uE02C; $bha=\uE02D; $ma=\uE02E; $ya=\uE02F; $ra=\uE030; $vva=\uE081; $rra=\uE031; $la=\uE032; $lla=\uE033; $ela=\uE034; #compatibility $va=\uE035; $sha=\uE036; $ssa=\uE037; $sa=\uE038; $ha=\uE039; $nukta=\uE03C; $avagraha=\uE03D; # SIGN AVAGRAHA # ←vowel→ represents the dependent form $aa=\uE03E; $i=\uE03F; $ii=\uE040; $u=\uE041; $uu=\uE042; $rh=\uE043; $rrh=\uE044; $ce=\uE045; #VOWEL SIGN CANDRA E $se=\uE046; #VOWEL SIGN SHORT E $e=\uE047; $ai=\uE048; $co=\uE049; # VOWEL SIGN CANDRA O $so=\uE04A; # VOWEL SIGN SHORT O $o=\uE04B; # ो $au=\uE04C; $virama=\uE04D; $om=\uE050; # OM \uE051→; # UNMAPPED STRESS SIGN UDATTA \uE052→; # UNMAPPED STRESS SIGN ANUDATTA \uE053→; # UNMAPPED GRAVE ACCENT \uE054→; # UNMAPPED ACUTE ACCENT $lm = \uE055;# Telugu Length Mark $ailm=\uE056;# AI Length Mark $aulm=\uE057;# AU Length Mark #urdu compatibity forms $uka=\uE058; $ukha=\uE059; $ugha=\uE05A; $ujha=\uE05B; $uddha=\uE05C; $udha=\uE05D; $ufa=\uE05E; $uya=\uE05F; $wrr=\uE060; $wll=\uE061; $lh=\uE062; $llh=\uE063; $danda=\uE064; $doubleDanda=\uE065; $zero=\uE066; # DIGIT ZERO $one=\uE067; # DIGIT ONE $two=\uE068; # DIGIT TWO $three=\uE069; # DIGIT THREE $four=\uE06A; # DIGIT FOUR $five=\uE06B; # DIGIT FIVE $six=\uE06C; # DIGIT SIX $seven=\uE06D; # DIGIT SEVEN $eight=\uE06E; # DIGIT EIGHT $nine=\uE06F; # DIGIT NINE # Glottal stop $dgs=\uE082; #Khanda-ta $kta=\uE083; $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; $depVowelBelow=[\uE041-\uE044]; # $x was originally called '§'; $z was '%' $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; $z=[bcdfghjklmnpqrstvwxyz]; $vowels=[aeiour̥̄̆]; $forceIndependentMatra = [^[[:L:][̀-͌]]]; $strike=\u0336; ###################################################################### # normalize input ###################################################################### # delete zwnj \u200C→; # reprocess from beginning ::Null; ###################################################################### # convert from Native letters to Latin letters ###################################################################### #glottal stop $wa$virama → k''; #anusvara $anusvara → ng; #surang ᬃ → r̀; # Urdu compatibility $ya$nukta}$x → y; $ya$nukta$virama → y; $ya$nukta → ya; $la$nukta }$x → l; $la$nukta$virama → l; $la$nukta → la; $na$nukta }$x → n; $na$nukta$virama → n; $na$nukta → na; $ena }$x → n; $ena$virama → n; $ena → na; $uka → qa; $ka$nukta }$x → q; $ka$nukta$virama → q; $ka$nukta → qa; $kha$nukta }$x → kh; $kha$nukta$virama → kh; $kha$nukta → kha; $ukha$virama → kh; $ukha → kha; $ugha → gha; $ga$nukta }$x → gh; $ga$nukta$virama → gh; $ga$nukta → gha; $ujha → za; $ja$nukta }$x → z; $ja$nukta$virama → z; $ja$nukta → za; $ddha$nukta}$x → r; $ddha$nukta$virama → r; $ddha$nukta → ra; $uddha}$x → r; $uddha$virama → r; $uddha → ra; $udha → ra; $dda$nukta}$x → r; $dda$nukta$virama → r; $dda$nukta → ra; $pha$nukta }$x → f; $pha$nukta$virama → f; $pha$nukta → fa; $ufa }$x → f; $ufa$virama → f; $ufa → fa; $ra$nukta}$x → r; $ra$nukta$virama → r; $ra$nukta → ra; $lla$nukta}$x → l; $lla$nukta$virama → l; $lla$nukta → la; $ela}$x → l; $ela$virama → l; $ela → la; $uya}$x → y; $uya$virama → y; $uya → ya; # normal consonants $ka$virama}$ha→k''; $ka}$x→k; $ka$virama→k; $ka→ka; $kha$i$u→k $strike h $strike; $kha}$x→kh; $kha$virama→kh; $kha→kha; $ga$virama}$ha→g''; $ga}$x→g; $ga$virama→g; $ga→ga; $gha$i$u→g $strike h $strike; $gha}$x→gh; $gha$virama→gh; $gha→gha; $nga$i$u→n $strike g $strike; $nga}$x→ng; $nga$virama→ng; $nga→nga; $ca$virama}$ha→c''; $ca}$x→c; $ca$virama→c; $ca→ca; $cha$i$u→c $strike h $strike; $cha}$x→ch; $cha$virama→ch; $cha→cha; $ja$virama}$ha→j''; $ja}$x→j; $ja$virama→j; $ja→ja; $jha$i$u→j $strike h $strike; $jha}$x→jh; $jha$virama→jh; $jha→jha; $nya }$x→ñ; $nya$virama→ñ; $nya → ña; $tta$virama}$ha→ṭ''; $tta}$x→ṭ; $tta$virama→ṭ; $tta→ṭa; $ttha$i$u→ṭ $strike h $strike; $ttha}$x→ṭh; $ttha$virama→ṭh; $ttha→ṭha; $dda}$x$ha→ḍ''; $dda}$x→ḍ; $dda$virama→ḍ; $dda→ḍa; $ddha$i$u→ḍ $strike h $strike; $ddha}$x→ḍh; $ddha$virama→ḍh; $ddha→ḍha; $nna}$x→ṇ; $nna$virama→ṇ; $nna→ṇa; $ta$virama}$ha→t''; $ta}$x→t; $ta$virama→t; $ta→ta; $tha$i$u→t $strike h $strike; $tha}$x→th; $tha$virama→th; $tha→tha; $da$virama}$ha→d''; $da}$x→d; $da$virama→d; $da→da; $dha$i$u→d $strike h $strike; $dha}$x→dh; $dha$virama→dh; $dha→dha; $na$virama}$ga→n''; $na}$x→n; $na$virama→n; $na→na; $pa$virama}$ha→p''; $pa}$x→p; $pa$virama→p; $pa→pa; $pha$i$u→p $strike h $strike; $pha}$x→ph; $pha$virama→ph; $pha→pha; $ba$virama}$ha→b''; $ba}$x→b; $ba$virama→b; $ba→ba; $bha$i$u→b $strike h $strike; $bha}$x→bh; $bha$virama→bh; $bha→bha; $ma}$x→m; $ma$virama→m; $ma→ma; $ya}$x→y; $ya$virama→y; $ya→ya; $ra}$x→r; $ra$virama→r; $ra→ra; $vva}$x→v; $vva$virama→v; $vva→va; $rra}$x→r; $rra$virama→r; $rra→ra; $la}$x→l; $la$virama→l; $la→la; $lla}$x→l; $lla$virama→l; $lla→la; $va}$x→w; $va$virama→w; $va→wa; $sa}$x→s; $sa$virama→s; #for gurmukhi $sa$nukta}$x→sy; $sa$nukta$virama→sy; $sa$nukta→sya; $sa→sa; $sha}$x→ś; $sha$virama→ś; $sha→śa; $ssa}$x→sy; $ssa$virama→ṣ; $ssa→ṣa; $ha}$x→h; $ha$virama→h; $ha→ha; # dependent vowels (should never occur except following consonants) $forceIndependentMatra{$aa → ̔ā; $forceIndependentMatra{$ai → ̔ai; $forceIndependentMatra{$au → ̔au; $forceIndependentMatra{$ii → ̔ī; $forceIndependentMatra{$i → ̔i; $forceIndependentMatra{$uu → ̔ū; $forceIndependentMatra{$u → ̔u; $forceIndependentMatra{$rrh → ̔r̥ö; $forceIndependentMatra{$rh → ̔r̥ĕ; $forceIndependentMatra{$llh → ̔l̥ö; $forceIndependentMatra{$lh → ̔l̥ĕ; $forceIndependentMatra{$e → ̔e; $forceIndependentMatra{$o → ̔o; #extra vowels $forceIndependentMatra{$ce → ̔ĕ; $forceIndependentMatra{$co → ̔ö; $forceIndependentMatra{$se → ̔ĕ; $forceIndependentMatra{$so → ̔o; $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character $i$u → $strike; $aa → ā; $ai → ai; $au → au; $ii → ī; $i → i; $uu → ū; $u → u; $rrh → r̥ö; $rh → r̥ĕ; $llh → l̥ö; $lh → l̥ĕ; $e → e; $o → o; #extra vowels $ce → ĕ; $co → ö; $se → ĕ; $so → o; #dependent vowels when following independent vowels. Generally Illegal only for roundtripping $waa} $x → ā; $wai} $x → ai; $wau} $x → au; $wii} $x → ī; $wi } $x → i; $wuu} $x → ū; $wu } $x → u; $wrr} $x → r̥ö; $wr } $x → r̥ĕ; $wll} $x → l̥ö; $wl } $x → l̥ĕ; $we } $x → e; $wo } $x → o; $wa } $x → a; #extra vowels $wce} $x → ĕ; $wco} $x → ö; $wse} $x → ĕ; $wso} $x → o; $om} $x → oṁ; # independent vowels when preceeded by vowels $vowels{$waa → ''ā; $vowels{$wai → ''ai; $vowels{$wau → ''au; $vowels{$wii → ''ī; $vowels{$wi → ''i; $vowels{$wuu → ''ū; $vowels{$wu → ''u; $vowels{$we → ''e; $vowels{$wo → ''o; $vowels{$wa → ''a; #extra vowels $vowels{$wce → ''ĕ; $vowels{$wco → ''ö; $vowels{$wse → ''ĕ; $vowels{$wso → ''o; $vowels{$om → ''oṁ; # independent vowels (otherwise) $waa → ā; $wai → ai; $wau → au; $wii → ī; $wi → i; $wuu → ū; $wu → u; $wrr → r̥ö; $wr → r̥ĕ; $wll → l̥ö; $wl → l̥ĕ; $we → e; $wo → o; $wa → a; #extra vowels $wce → ĕ; $wco → ö; $wse → ĕ; $wso → o; $om → oṁ; # stress marks $avagraha → ; $chandrabindu → ṅġ; $ardhachandra → ṃ; $visarga → ḥ; # numbers $zero → 0; $one → 1; $two → 2; $three → 3; $four → 4; $five → 5; $six → 6; $seven → 7; $eight → 8; $nine → 9; $lm →; $ailm →; $aulm →; $dgs→''; $kta→t; # Balinese numbers are surrounded by dandas which can be removed $danda } [$zero$one$two$three$four$five$six$seven$eight$nine] → ' '; [0123456789] { $danda → ' '; $danda→', '; $doubleDanda→'. '; \uE070→; # ABBREVIATION SIGN # LETTER RA WITH MIDDLE DIAGONAL \uE071}$x→ra; \uE071$virama→r; \uE071→ra; # LETTER RA WITH LOWER DIAGONAL \uE072}$x→ra; \uE072$virama→r; \uE072→ra; \uE073→; # RUPEE MARK \uE074→; # RUPEE SIGN \uE075→; # CURRENCY NUMERATOR ONE \uE076→; # CURRENCY NUMERATOR TWO \uE077→; # CURRENCY NUMERATOR THREE \uE078→; # CURRENCY NUMERATOR FOUR \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR \uE07A→; # CURRENCY DENOMINATOR SIXTEEN \uE07B→; # ISSHAR \uE07C→; # TIPPI \uE07D→; # ADDAK \uE07E→; # IRI \uE07F→; # URA \uE080→; # EK ONKAR \uE004→; # DEVANAGARI VOWEL SIGN SHORT A ::NFC; EOF; # transliteration rules following DHARMA project "strict transliteration" # mostly follows ISO-15919, with modifications for precision and broader coverage # https://hal.inria.fr/halshs-02272407/ $rules['ban-x-dharma'] = <<<'EOF' ::NFC; $dv_no_rerekan = [\u1B35-\u1B44]; $dv = [\u1B34$dv_no_rerekan]; $c = [\u1B13-\u1B33 \u1B45-\u1B4C]; # disambiguation from aspirates [kgcjṭḍtdpb] { ᭄ } ᬳ → \:; # various signs ᬀ → ṁ\*; # ulu ricem / ardhacandra ᬁ → m̐; # ulu candra / candrabindu ᬂ → ṁ; # cecek / anusvara ᬃ → r\=; # surang / repha (note, "Indonesian mode" not "Indian mode") ᬄ → ḥ; # bisah / visarga # akara used as glottal ᬅ } $dv_no_rerekan → q; # independent vowels ᬅ → A; # LETTER AKARA ᬆ → A\:; # LETTER AKARA TEDUNG ᬇ → I; # LETTER IKARA ᬈ → I\:; # LETTER IKARA TEDUNG ᬉ → U; # LETTER UKARA ᬊ → U\:; # LETTER UKARA TEDUNG ᬋ → R̥; # LETTER RA REPA ᬌ → R̥\:; # LETTER RA REPA TEDUNG ᬍ → L̥; # LETTER LA LENGA ᬎ → L̥̄; # LETTER LA LENGA TEDUNG ᬏ → E; # LETTER EKARA ᬐ → Ai; # LETTER AIKARA ᬑ → O; # LETTER OKARA ᬒ → O\:; # LETTER OKARA TEDUNG # consonants ᬓ } $dv → k; ᬓ → ka; # LETTER KA ᬔ } $dv → kh; ᬔ → kha; # LETTER KA MAHAPRANA ᬕ } $dv → g; ᬕ → ga; # LETTER GA ᬖ } $dv → gh; ᬖ → gha; # LETTER GA GORA ᬗ } $dv → ṅ; ᬗ → ṅa; # LETTER NGA ᬘ } $dv → c; ᬘ → ca; # LETTER CA ᬙ } $dv → ch; ᬙ → cha; # LETTER CA LACA ᬚ } $dv → j; ᬚ → ja; # LETTER JA ᬛ } $dv → jh; ᬛ → jha; # LETTER JA JERA ᬜ } $dv → ñ; ᬜ → ña; # LETTER NYA ᬝ } $dv → ṭ; ᬝ → ṭa; # LETTER TA LATIK ᬞ } $dv → ṭh; ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA ᬟ } $dv → ḍ; ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA ᬠ } $dv → ḍh; ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA ᬡ } $dv → ṇ; ᬡ → ṇa; # LETTER NA RAMBAT ᬢ } $dv → t; ᬢ → ta; # LETTER TA ᬣ } $dv → th; ᬣ → tha; # LETTER TA TAWA ᬤ } $dv → d; ᬤ → da; # LETTER DA ᬥ } $dv → dh; ᬥ → dha; # LETTER DA MADU ᬦ } $dv → n; ᬦ → na; # LETTER NA ᬧ } $dv → p; ᬧ → pa; # LETTER PA ᬨ } $dv → ph; ᬨ → pha; # LETTER PA KAPAL ᬩ } $dv → b; ᬩ → ba; # LETTER BA ᬪ } $dv → bh; ᬪ → bha; # LETTER BA KEMBANG ᬫ } $dv → m; ᬫ → ma; # LETTER MA ᬬ } $dv → y; ᬬ → ya; # LETTER YA ᬭ } $dv → r; ᬭ → ra; # LETTER RA ᬮ } $dv → l; ᬮ → la; # LETTER LA ᬯ } $dv → v; ᬯ → va; # LETTER WA ᬰ } $dv → ś; ᬰ → śa; # LETTER SA SAGA ᬱ } $dv → ṣ; ᬱ → ṣa; # LETTER SA SAPA ᬲ } $dv → s; ᬲ → sa; # LETTER SA ᬳ } $dv → h; ᬳ → ha; # LETTER HA \u1B4C } $dv → j\=ñ; \u1B4C → j\=ña; # LETTER ARCHAIC JNYA # rerekan (not present in DHARMA, "*" used as impromptu mark) ᬴ } $dv_no_rerekan → \*; ᬴ → \* a; # SIGN REREKAN # dependent vowels ᬵ → ā; # VOWEL SIGN TEDUNG ᬶ → i; # VOWEL SIGN ULU ᬷ → ī; # VOWEL SIGN ULU SARI ᬸ → u; # VOWEL SIGN SUKU ᬹ → ū; # VOWEL SIGN SUKU ILUT ᬺ → r̥; # VOWEL SIGN RA REPA ᬻ → r̥\:; # VOWEL SIGN RA REPA TEDUNG ᬼ→ l̥; # VOWEL SIGN LA LENGA ᬽ → l̥\:; # VOWEL SIGN LA LENGA TEDUNG ᬾ → e; # VOWEL SIGN TALING ᬿ → ai; # VOWEL SIGN TALING REPA ᭀ → o; # VOWEL SIGN TALING TEDUNG ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG ᭂ → ə; # VOWEL SIGN PEPET ᭃ → ə\:; # VOWEL SIGN PEPET TEDUNG # adeg-adeg ᭄\u200C → ·; # explicit ADEG ADEG ᭄ } $c → ; # ADEG ADEG ᭄ → ·; # ADEG ADEG # Sasak consonants (not present in DHARMA, "'" used as impromptu mark) ᭅ } $dv → k\'; ᭅ → k\'a; # LETTER KAF SASAK ᭆ } $dv → kh\'; ᭆ → kh\'a; # LETTER KHOT SASAK ᭇ } $dv → t\'; ᭇ → t\'a; # LETTER TZIR SASAK ᭈ } $dv → p\'; ᭈ → p\'a; # LETTER EF SASAK ᭉ } $dv → v\'; ᭉ → v\'a; # LETTER VE SASAK ᭊ } $dv → j\'; ᭊ → j\'a; # LETTER ZAL SASAK ᭋ } $dv → s\'; ᭋ → s\'a; # LETTER ASYURA SASAK # digits ᭐ → 0; # DIGIT ZERO ᭑ → 1; # DIGIT ONE ᭒ → 2; # DIGIT TWO ᭓ → 3; # DIGIT THREE ᭔ → 4; # DIGIT FOUR ᭕ → 5; # DIGIT FIVE ᭖ → 6; # DIGIT SIX ᭗ → 7; # DIGIT SEVEN ᭘ → 8; # DIGIT EIGHT ᭙ → 9; # DIGIT NINE # punctuation ᭚ → '<g type="panti"/>'; # PANTI ᭛ → '<g type="pamada"/>'; # PAMADA ᭜ → \@; # WINDU ᭝ → '<g type="pamungkah"/>'; # CARIK PAMUNGKAH ᭞ → \,; # CARIK SIKI ᭟ → \,\,; # CARIK PAREREN ᭠ → '<g type="pameneng"/>'; # PAMENENG \u1B7D → '<g type="pantiLantang"/>'; \u1B7E → '<g type="pamadaLantang"/>'; EOF; # transliteration rules developed at Puri Kauhan Ubud and widely used in Bali # default Balinese to Latin transliteration variant $rules['ban-x-pku'] = <<<'EOF' ::NFC; $dv_no_rerekan = [\u1B35-\u1B44]; $dv = [\u1B34$dv_no_rerekan]; $c = [\u1B13-\u1B33 \u1B45-\u1B4C]; $base = [\u1B05-\u1B33 \u1B45-\u1B60]; # ulu suku deletion mark $base ᬶᬸ → ∅; # disambiguation from aspirates [kgcjṭḍtdpb] { ᭄ } ᬳ → \:; # various signs ᬀ → ṃ; # ulu ricem / ardhacandra ᬁ → m̐; # ulu candra / candrabindu ᬂ → ŋ; # cecek / anusvara ᬃ → ŕ; # surang / repha (note, "Indonesian mode" not "Indian mode") ᬄ → ḥ; # bisah / visarga # akara used as glottal ᬅ } $dv_no_rerekan → \*; # independent vowels ᬅ → ᵒa; # LETTER AKARA ᬆ → ᵒā; # LETTER AKARA TEDUNG ᬇ → ᵒi; # LETTER IKARA ᬈ → ᵒī; # LETTER IKARA TEDUNG ᬉ → ᵒu; # LETTER UKARA ᬊ → ᵒū; # LETTER UKARA TEDUNG ᬋ → r̥; # LETTER RA REPA ᬌ → r̥̄; # LETTER RA REPA TEDUNG ᬍ → l̥; # LETTER LA LENGA ᬎ → l̥̄; # LETTER LA LENGA TEDUNG ᬏ → ᵒe; # LETTER EKARA ᬐ → ᵒai; # LETTER AIKARA ᬑ → ᵒo; # LETTER OKARA ᬒ → ᵒau; # LETTER OKARA TEDUNG # consonants ᬓ } $dv → k; ᬓ → ka; # LETTER KA ᬔ } $dv → kh; ᬔ → kha; # LETTER KA MAHAPRANA ᬕ } $dv → g; ᬕ → ga; # LETTER GA ᬖ } $dv → gh; ᬖ → gha; # LETTER GA GORA ᬗ } $dv → ṅ; ᬗ → ṅa; # LETTER NGA ᬘ } $dv → c; ᬘ → ca; # LETTER CA ᬙ } $dv → ch; ᬙ → cha; # LETTER CA LACA ᬚ } $dv → j; ᬚ → ja; # LETTER JA ᬛ } $dv → jh; ᬛ → jha; # LETTER JA JERA ᬜ } $dv → ñ; ᬜ → ña; # LETTER NYA ᬝ } $dv → ṭ; ᬝ → ṭa; # LETTER TA LATIK ᬞ } $dv → ṭh; ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA ᬟ } $dv → ḍ; ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA ᬠ } $dv → ḍh; ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA ᬡ } $dv → ṇ; ᬡ → ṇa; # LETTER NA RAMBAT ᬢ } $dv → t; ᬢ → ta; # LETTER TA ᬣ } $dv → th; ᬣ → tha; # LETTER TA TAWA ᬤ } $dv → d; ᬤ → da; # LETTER DA ᬥ } $dv → dh; ᬥ → dha; # LETTER DA MADU ᬦ } $dv → n; ᬦ → na; # LETTER NA ᬧ } $dv → p; ᬧ → pa; # LETTER PA ᬨ } $dv → ph; ᬨ → pha; # LETTER PA KAPAL ᬩ } $dv → b; ᬩ → ba; # LETTER BA ᬪ } $dv → bh; ᬪ → bha; # LETTER BA KEMBANG ᬫ } $dv → m; ᬫ → ma; # LETTER MA ᬬ } $dv → y; ᬬ → ya; # LETTER YA ᬭ } $dv → r; ᬭ → ra; # LETTER RA ᬮ } $dv → l; ᬮ → la; # LETTER LA ᬯ } $dv → w; ᬯ → wa; # LETTER WA ᬰ } $dv → ś; ᬰ → śa; # LETTER SA SAGA ᬱ } $dv → ṣ; ᬱ → ṣa; # LETTER SA SAPA ᬲ } $dv → s; ᬲ → sa; # LETTER SA ᬳ } $dv → h; ᬳ → ha; # LETTER HA \u1B4C } $dv → j\=ñ; \u1B4C → j\=ña; # LETTER ARCHAIC JNYA # rerekan (not present in DHARMA, "*" used as impromptu mark) ᬴ } $dv_no_rerekan → \*; ᬴ → \* a; # SIGN REREKAN # dependent vowels ᬵ → ā; # VOWEL SIGN TEDUNG ᬶ → i; # VOWEL SIGN ULU ᬷ → ī; # VOWEL SIGN ULU SARI ᬸ → u; # VOWEL SIGN SUKU ᬹ → ū; # VOWEL SIGN SUKU ILUT ᬺᭂ → r̥ĕ; ᬺ → r̥ĕ; # VOWEL SIGN RA REPA ᬻ → r̥ö; # VOWEL SIGN RA REPA TEDUNG ᬼ→ lĕ; # VOWEL SIGN LA LENGA ᬽ → lö; # VOWEL SIGN LA LENGA TEDUNG ᬾ → e; # VOWEL SIGN TALING ᬿ → ai; # VOWEL SIGN TALING REPA ᭀ → o; # VOWEL SIGN TALING TEDUNG ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG ᭂ → ĕ; # VOWEL SIGN PEPET ᭃ → ö; # VOWEL SIGN PEPET TEDUNG # adeg-adeg ᭄\u200C → ·; # explicit ADEG ADEG ᭄ } $c → ; # ADEG ADEG ᭄ → ·; # ADEG ADEG # Sasak consonants (not present in DHARMA, "'" used as impromptu mark) ᭅ } $dv → k\'; ᭅ → k\'a; # LETTER KAF SASAK ᭆ } $dv → kh\'; ᭆ → kh\'a; # LETTER KHOT SASAK ᭇ } $dv → t\'; ᭇ → t\'a; # LETTER TZIR SASAK ᭈ } $dv → p\'; ᭈ → p\'a; # LETTER EF SASAK ᭉ } $dv → w\'; ᭉ → w\'a; # LETTER VE SASAK ᭊ } $dv → j\'; ᭊ → j\'a; # LETTER ZAL SASAK ᭋ } $dv → s\'; ᭋ → s\'a; # LETTER ASYURA SASAK # digits ᭐ → 0; # DIGIT ZERO ᭑ → 1; # DIGIT ONE ᭒ → 2; # DIGIT TWO ᭓ → 3; # DIGIT THREE ᭔ → 4; # DIGIT FOUR ᭕ → 5; # DIGIT FIVE ᭖ → 6; # DIGIT SIX ᭗ → 7; # DIGIT SEVEN ᭘ → 8; # DIGIT EIGHT ᭙ → 9; # DIGIT NINE # punctuation ᭚ → '||'; # PANTI ᭛ → '//'; # PAMADA ᭜ → 0; # WINDU ᭝ → \=; # CARIK PAMUNGKAH ᭞ → \,; # CARIK SIKI ᭟ → \.; # CARIK PAREREN ᭠ → \-; # PAMENENG \u1B7D → '|||'; \u1B7E → '///'; EOF; return $rules; } protected function getTransliteratorAliases() { return [ 'ban' => 'ban-x-pku', 'ban-bali' => 'ban-x-pku', ]; } /** * Guess if a text is written in Balinese or Latin. * Overrides LanguageConverter::guessVariant() * * @param string $text The text to be checked * @param string $variant Language code of the variant to be checked for * @return bool True if $text appears to be written in $variant */ public function guessVariant( $text, $variant ) { $hasBalinese = preg_match( "/[\x{1B00}-\x{1B7F}]/u", $text ); return ( $variant == 'ban-bali' ) == $hasBalinese; } } PK ! ���h� � converters/ShiConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Shilha specific code. * * Conversion script between Latin and Tifinagh for Tachelhit. * - Tifinagh -> lowercase Latin * - lowercase/uppercase Latin -> Tifinagh * * * Based on: * - https://en.wikipedia.org/wiki/Shilha_language * - LanguageSr.php * * @ingroup Languages */ class ShiConverter extends LanguageConverterSpecific { /** * The Tifinagh alphabet sequence is based on * "Dictionnaire Général de la Langue Amazighe Informatisé" * by IRCAM (https://tal.ircam.ma/dglai/lexieam.php, DGLAi), * with the labio-velarization mark in the end */ private const TO_LATIN = [ 'ⴰ' => 'a', 'ⴱ' => 'b', 'ⴳ' => 'g', 'ⴷ' => 'd', 'ⴹ' => 'ḍ', 'ⴻ' => 'e', 'ⴼ' => 'f', 'ⴽ' => 'k', 'ⵀ' => 'h', 'ⵃ' => 'ḥ', 'ⵄ' => 'ɛ', 'ⵅ' => 'x', 'ⵇ' => 'q', 'ⵉ' => 'i', 'ⵊ' => 'j', 'ⵍ' => 'l', 'ⵎ' => 'm', 'ⵏ' => 'n', 'ⵓ' => 'u', 'ⵔ' => 'r', 'ⵕ' => 'ṛ', 'ⵖ' => 'ɣ', 'ⵙ' => 's', 'ⵚ' => 'ṣ', 'ⵛ' => 'c', 'ⵜ' => 't', 'ⵟ' => 'ṭ', 'ⵡ' => 'w', 'ⵢ' => 'y', 'ⵣ' => 'z', 'ⵥ' => 'ẓ', 'ⵯ' => 'ʷ', ]; /** The sequence is based on DGLAi, with the non-standard letters in the end */ private const UPPER_TO_LOWER_CASE_LATIN = [ 'A' => 'a', 'B' => 'b', 'G' => 'g', 'D' => 'd', 'Ḍ' => 'ḍ', 'E' => 'e', 'F' => 'f', 'K' => 'k', 'H' => 'h', 'Ḥ' => 'ḥ', 'Ɛ' => 'ɛ', 'X' => 'x', 'Q' => 'q', 'I' => 'i', 'J' => 'j', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'U' => 'u', 'R' => 'r', 'Ṛ' => 'ṛ', 'Ɣ' => 'ɣ', 'S' => 's', 'Ṣ' => 'ṣ', 'C' => 'c', 'T' => 't', 'Ṭ' => 'ṭ', 'W' => 'w', 'Y' => 'y', 'Z' => 'z', 'Ẓ' => 'ẓ', 'O' => 'o', 'P' => 'p', 'V' => 'v', ]; /** * The sequence is based on DGLAi, with the labio-velarization mark and * the non-standard letters in the end */ private const TO_TIFINAGH = [ 'a' => 'ⴰ', 'b' => 'ⴱ', 'g' => 'ⴳ', 'd' => 'ⴷ', 'ḍ' => 'ⴹ', 'e' => 'ⴻ', 'f' => 'ⴼ', 'k' => 'ⴽ', 'h' => 'ⵀ', 'ḥ' => 'ⵃ', 'ɛ' => 'ⵄ', 'x' => 'ⵅ', 'q' => 'ⵇ', 'i' => 'ⵉ', 'j' => 'ⵊ', 'l' => 'ⵍ', 'm' => 'ⵎ', 'n' => 'ⵏ', 'u' => 'ⵓ', 'r' => 'ⵔ', 'ṛ' => 'ⵕ', 'ɣ' => 'ⵖ', 's' => 'ⵙ', 'ṣ' => 'ⵚ', 'c' => 'ⵛ', 't' => 'ⵜ', 'ṭ' => 'ⵟ', 'w' => 'ⵡ', 'y' => 'ⵢ', 'z' => 'ⵣ', 'ẓ' => 'ⵥ', 'ʷ' => 'ⵯ', 'o' => 'ⵓ', 'p' => 'ⴱ', 'v' => 'ⴼ', ]; public function getMainCode(): string { return 'shi'; } public function getLanguageVariants(): array { return [ 'shi', 'shi-tfng', 'shi-latn' ]; } public function getVariantsFallbacks(): array { return [ 'shi' => [ 'shi-latn', 'shi-tfng' ], 'shi-tfng' => 'shi', 'shi-latn' => 'shi', ]; } protected function loadDefaultTables(): array { return [ 'lowercase' => new ReplacementArray( self::UPPER_TO_LOWER_CASE_LATIN ), 'shi-tfng' => new ReplacementArray( self::TO_TIFINAGH ), 'shi-latn' => new ReplacementArray( self::TO_LATIN ), 'shi' => new ReplacementArray() ]; } public function translate( $text, $toVariant ) { // If $text is empty or only includes spaces, do nothing // Otherwise translate it if ( trim( $text ) ) { $this->loadTables(); // For Tifinagh, first translate uppercase to lowercase Latin if ( $toVariant == 'shi-tfng' ) { $text = $this->mTables['lowercase']->replace( $text ); } $text = $this->mTables[$toVariant]->replace( $text ); } return $text; } } PK ! Z�� � converters/MniConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file MniConverter.php * @author Nokib Sarkar * @author Haoreima */ /** * Meitei specific converter routines. * * @ingroup Languages */ class MniConverter extends LanguageConverterSpecific { private const O = 'ꯑ'; private const OO = 'ꯑꯣ'; private const U = 'ꯎ'; private const EE = 'ꯑꯤ'; private const YA = 'ꯌ'; private const Y_ = 'য'; private const WA = 'ꯋ'; private const BA = 'ꯕ'; private const NA_ = 'ꯟ'; private const NA = 'ꯅ'; private const DIACRITIC_AA = 'ꯥ'; private const HALANTA = '꯭'; private const SKIP = ''; private const PERIOD = '꯫'; private const PA_ = 'ꯞ'; private const DIACRITICS_WITH_O = [ 'ꯣ' => 'ো', 'ꯤ' => 'ী', 'ꯥ' => 'া', 'ꯦ' => 'ে', 'ꯧ' => 'ৌ', 'ꯩ' => 'ৈ', 'ꯪ' => 'ং', ]; private const CONJUGATE_WITH_O = [ 'ꯑꯣ' => 'ও', 'ꯑꯤ' => 'ঈ', 'ꯑꯥ' => 'আ', 'ꯑꯦ' => 'এ', 'ꯑꯧ' => 'ঔ', 'ꯑꯩ' => 'ঐ', 'ꯑꯪ' => 'অং', ]; private const NOT_WEIRD_AFTER_NA_ = [ 'ꯇ', 'ꯊ', 'ꯗ', 'ꯙ', 'ꯟ', 'ꯕ', 'ꯌ', 'ꯁ' ]; private const NUMERALS = [ '꯰' => '০', '꯱' => '১', '꯲' => '২', '꯳' => '৩', '꯴' => '৪', '꯵' => '৫', '꯶' => '৬', '꯷' => '৭', '꯸' => '৮', '꯹' => '৯', ]; private const HALANTA_CONSONANTS = [ 'ꯟ' => 'ন্', 'ꯛ' => 'ক্', 'ꯝ' => 'ম্', 'ꯡ' => 'ং', 'ꯜ' => 'ল্', 'ꯠ' => 'ৎ', 'ꯞ' => 'প্', ]; private const HALANTA_CONSONANTS_TO_NORMAL = [ 'ꯟ' => 'ন', 'ꯛ' => 'ক', 'ꯝ' => 'ম', 'ꯡ' => 'ং', 'ꯜ' => 'ল', 'ꯠ' => 'ৎ', 'ꯞ' => 'প', ]; private const NON_WORD_CHARACTER_PATTERN = "/[\s꯫\p{P}<>=\-\|$+^~]+?/u"; private const CONSONANTS = self::HALANTA_CONSONANTS + [ 'ꯀ' => 'ক', 'ꯈ' => 'খ', 'ꯒ' => 'গ', 'ꯘ' => 'ঘ', 'ꯉ' => 'ঙ', 'ꯆ' => 'চ', 'ꯖ' => 'জ', 'ꯓ' => 'ঝ', 'ꯇ' => 'ত', 'ꯊ' => 'থ', 'ꯗ' => 'দ', 'ꯙ' => 'ধ', 'ꯅ' => 'ন', 'ꯄ' => 'প', 'ꯐ' => 'ফ', 'ꯕ' => 'ব', 'ꯚ' => 'ভ', 'ꯃ' => 'ম', 'ꯌ' => 'য়', 'ꯔ' => 'র', 'ꯂ' => 'ল', 'ꯋ' => 'ৱ', 'ꫩ' => 'শ', 'ꫪ' => 'ষ', 'ꯁ' => 'স', 'ꯍ' => 'হ', ]; private const VOWELS = [ 'ꯑ' => 'অ', 'ꯏ' => 'ই', 'ꯎ' => 'উ', 'ꯢ' => 'ই', 'ꯨ' => 'ু', ]; private const MTEI_TO_BENG_MAP_EXTRA = [ '꯫' => '।', '꯭' => '্', ]; private const MTEI_TO_BENG_MAP = self::VOWELS + self::DIACRITICS_WITH_O + self::CONJUGATE_WITH_O + self::CONSONANTS + self::NUMERALS + self::MTEI_TO_BENG_MAP_EXTRA; private function isBeginning( $position, $text ) { $at_first = $position === 0; return $at_first || preg_match( self::NON_WORD_CHARACTER_PATTERN, $text[$position - 1] ); } private function isEndOfWord( $char ) { if ( $char === self::PERIOD ) { return true; } $status = preg_match( self::NON_WORD_CHARACTER_PATTERN, $char, $matches ); return count( $matches ) > 0; } private function mteiToBengali( $text ) { $chars = mb_str_split( $text ); $l = count( $chars ); $i = 0; while ( $i < $l ) { $char = $chars[$i]; if ( $char === self::O && $i + 1 < $l && array_key_exists( $chars[ $i + 1 ], self::DIACRITICS_WITH_O ) ) { /** * We have only 3 true vowels, * ꯑ(a), ꯏ(i), ꯎ (u) * Others are just extension from "a" by mixing with diacritics */ yield self::CONJUGATE_WITH_O[$char . $chars[ $i + 1 ]]; $i += 1; } elseif ( $char === self::HALANTA && $i > 0 && array_key_exists( $chars[ $i - 1 ], self::HALANTA_CONSONANTS ) ) { // Remove halanta if the consonant has halanta already yield self::SKIP; } elseif ( array_key_exists( $char, self::HALANTA_CONSONANTS ) && ( $i === $l - 1 || ( $i + 1 < $l && $this->isEndOfWord( $chars[ $i + 1 ] ) ) ) ) { // Remove halanta if this is the last character of the word yield self::HALANTA_CONSONANTS_TO_NORMAL[$char]; } elseif ( $char === self::YA && $i > 0 && $chars[ $i - 1 ] === self::HALANTA ) { // য + ্ = য় yield self::Y_; } elseif ( $char === self::WA && $i - 2 >= 0 && $chars[ $i - 1 ] === self::HALANTA && array_key_exists( $chars[ $i - 2 ], self::CONSONANTS ) ) { // ব + ্ + র = ব্র yield self::CONSONANTS[self::BA]; } elseif ( $char === self::PA_ && $i + 1 < $l && $chars[ $i + 1 ] === 'ꯀ' ) { // do not conjugate with halanta if it's followed by "ক" yield self::HALANTA_CONSONANTS_TO_NORMAL[$char]; } elseif ( $char === self::NA_ && $i + 1 < $l && !in_array( $chars[ $i + 1 ], self::NOT_WEIRD_AFTER_NA_ ) && array_key_exists( $chars[ $i + 1 ], self::CONSONANTS ) ) { /** * ন্ / ণ্ + any consonant * (except, ট, ঠ, ড, ঢ, , ত, থ, দ, ধ, ন, ব, য, য়) = weird * Any consonant + ্ + ন = maybe ok */ yield self::MTEI_TO_BENG_MAP[self::NA]; $i += 1; continue; } elseif ( $char === self::U && !$this->isBeginning( $i, $text ) ) { // উ/ঊ in the middle of words are often replaced by ও yield self::MTEI_TO_BENG_MAP[self::OO]; } elseif ( $char === self::O && $i + 2 < $l && $chars[$i + 1] === self::EE[0] && $chars[ $i + 2 ] === self::EE[1] ) { /** * Instead of হাঈবা, people love to use হায়বা. * But this is only in the case when ee or ya is * in the middle of the words, * never to do it if it's in the beginning. */ yield self::MTEI_TO_BENG_MAP[self::YA]; } elseif ( !array_key_exists( $char, self::HALANTA_CONSONANTS ) && array_key_exists( $char, self::CONSONANTS ) && ( $i === $l - 1 || ( $i + 1 < $l && $this->isEndOfWord( $chars[ $i + 1 ] ) ) ) ) { // Consonants without halantas should end with diacritics of aa sound everytime. yield self::MTEI_TO_BENG_MAP[$char] . self::MTEI_TO_BENG_MAP[self::DIACRITIC_AA]; } else { yield ( array_key_exists( $char, self::MTEI_TO_BENG_MAP ) ? self::MTEI_TO_BENG_MAP[$char] : $char ); } $i += 1; } } public function transliterate( $text ) { $transliterated = ''; foreach ( $this->mteiToBengali( $text ) as $char ) { $transliterated .= $char; } return $transliterated; } public function getMainCode(): string { return 'mni'; } public function getLanguageVariants(): array { return [ 'mni', 'mni-beng' ]; } public function getVariantsFallbacks(): array { return [ 'mni-beng' => 'mni' ]; } protected function loadDefaultTables(): array { return [ 'mni' => new ReplacementArray(), 'mni-beng' => new ReplacementArray(), ]; } /** * Transliterates text into Bangla Script. This allows developers to test the language variants * functionality and user interface without having to switch wiki language away from default. * This method also processes custom conversion rules to allow testing these parts of the * language converter as well. * * @param string $text * @param string $toVariant * @return string */ public function translate( $text, $toVariant ) { if ( $toVariant === 'mni-beng' ) { return $this->transliterate( $text ); } return $text; } } PK ! ���F converters/IuConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Inuktitut specific code. * * Conversion script between Latin and Syllabics for Inuktitut. * - Syllabics -> lowercase Latin * - lowercase/uppercase Latin -> Syllabics * * * Based on: * - https://commons.wikimedia.org/wiki/Image:Inuktitut.png * - LanguageSr.php * * @ingroup Languages */ class IuConverter extends LanguageConverterSpecific { private const TO_LATIN = [ 'ᐦ' => 'h', 'ᐃ' => 'i', 'ᐄ' => 'ii', 'ᐅ' => 'u', 'ᐆ' => 'uu', 'ᐊ' => 'a', 'ᐋ' => 'aa', 'ᑉ' => 'p', 'ᐱ' => 'pi', 'ᐲ' => 'pii', 'ᐳ' => 'pu', 'ᐴ' => 'puu', 'ᐸ' => 'pa', 'ᐹ' => 'paa', 'ᑦ' => 't', 'ᑎ' => 'ti', 'ᑏ' => 'tii', 'ᑐ' => 'tu', 'ᑑ' => 'tuu', 'ᑕ' => 'ta', 'ᑖ' => 'taa', 'ᒃ' => 'k', 'ᑭ' => 'ki', 'ᑮ' => 'kii', 'ᑯ' => 'ku', 'ᑰ' => 'kuu', 'ᑲ' => 'ka', 'ᑳ' => 'kaa', 'ᖅᒃ' => 'qq', 'ᖅᑭ' => 'qqi', 'ᖅᑮ' => 'qqii', 'ᖅᑯ' => 'qqu', 'ᖅᑰ' => 'ᖅqquu', 'ᖅᑲ' => 'qqa', 'ᖅᑳ' => 'qqaa', 'ᒡ' => 'g', 'ᒋ' => 'gi', 'ᒌ' => 'gii', 'ᒍ' => 'gu', 'ᒎ' => 'guu', 'ᒐ' => 'ga', 'ᒑ' => 'gaa', 'ᒻ' => 'm', 'ᒥ' => 'mi', 'ᒦ' => 'mii', 'ᒧ' => 'mu', 'ᒨ' => 'muu', 'ᒪ' => 'ma', 'ᒫ' => 'maa', 'ᓐ' => 'n', 'ᓂ' => 'ni', 'ᓃ' => 'nii', 'ᓄ' => 'nu', 'ᓅ' => 'nuu', 'ᓇ' => 'na', 'ᓈ' => 'naa', 'ᔅ' => 's', 'ᓯ' => 'si', 'ᓰ' => 'sii', 'ᓱ' => 'su', 'ᓲ' => 'suu', 'ᓴ' => 'sa', 'ᓵ' => 'saa', 'ᓪ' => 'l', 'ᓕ' => 'li', 'ᓖ' => 'lii', 'ᓗ' => 'lu', 'ᓘ' => 'luu', 'ᓚ' => 'la', 'ᓛ' => 'laa', 'ᔾ' => 'j', 'ᔨ' => 'ji', 'ᔩ' => 'jii', 'ᔪ' => 'ju', 'ᔫ' => 'juu', 'ᔭ' => 'ja', 'ᔮ' => 'jaa', 'ᕝ' => 'v', 'ᕕ' => 'vi', 'ᕖ' => 'vii', 'ᕗ' => 'vu', 'ᕘ' => 'vuu', 'ᕙ' => 'va', 'ᕚ' => 'vaa', 'ᕐ' => 'r', 'ᕆ' => 'ri', 'ᕇ' => 'rii', 'ᕈ' => 'ru', 'ᕉ' => 'ruu', 'ᕋ' => 'ra', 'ᕌ' => 'raa', 'ᖅ' => 'q', 'ᕿ' => 'qi', 'ᖀ' => 'qii', 'ᖁ' => 'qu', 'ᖂ' => 'quu', 'ᖃ' => 'qa', 'ᖄ' => 'qaa', 'ᖕ' => 'ng', 'ᖏ' => 'ngi', 'ᖐ' => 'ngii', 'ᖑ' => 'ngu', 'ᖒ' => 'nguu', 'ᖓ' => 'nga', 'ᖔ' => 'ngaa', 'ᖖ' => 'nng', 'ᙱ' => 'nngi', 'ᙲ' => 'nngii', 'ᙳ' => 'nngu', 'ᙴ' => 'nnguu', 'ᙵ' => 'nnga', 'ᙶ' => 'nngaa', 'ᖦ' => 'ɫ', 'ᖠ' => 'ɫi', 'ᖡ' => 'ɫii', 'ᖢ' => 'ɫu', 'ᖣ' => 'ɫuu', 'ᖤ' => 'ɫa', 'ᖥ' => 'ɫaa', ]; private const UPPER_TO_LOWER_CASE_LATIN = [ 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y', 'Z' => 'z', ]; private const TO_SYLLABICS = [ 'h' => 'ᐦ', 'i' => 'ᐃ', 'ii' => 'ᐄ', 'u' => 'ᐅ', 'uu' => 'ᐆ', 'a' => 'ᐊ', 'aa' => 'ᐋ', 'p' => 'ᑉ', 'pi' => 'ᐱ', 'pii' => 'ᐲ', 'pu' => 'ᐳ', 'puu' => 'ᐴ', 'pa' => 'ᐸ', 'paa' => 'ᐹ', 't' => 'ᑦ', 'ti' => 'ᑎ', 'tii' => 'ᑏ', 'tu' => 'ᑐ', 'tuu' => 'ᑑ', 'ta' => 'ᑕ', 'taa' => 'ᑖ', 'k' => 'ᒃ', 'ki' => 'ᑭ', 'kii' => 'ᑮ', 'ku' => 'ᑯ', 'kuu' => 'ᑰ', 'ka' => 'ᑲ', 'kaa' => 'ᑳ', 'g' => 'ᒡ', 'gi' => 'ᒋ', 'gii' => 'ᒌ', 'gu' => 'ᒍ', 'guu' => 'ᒎ', 'ga' => 'ᒐ', 'gaa' => 'ᒑ', 'm' => 'ᒻ', 'mi' => 'ᒥ', 'mii' => 'ᒦ', 'mu' => 'ᒧ', 'muu' => 'ᒨ', 'ma' => 'ᒪ', 'maa' => 'ᒫ', 'n' => 'ᓐ', 'ni' => 'ᓂ', 'nii' => 'ᓃ', 'nu' => 'ᓄ', 'nuu' => 'ᓅ', 'na' => 'ᓇ', 'naa' => 'ᓈ', 's' => 'ᔅ', 'si' => 'ᓯ', 'sii' => 'ᓰ', 'su' => 'ᓱ', 'suu' => 'ᓲ', 'sa' => 'ᓴ', 'saa' => 'ᓵ', 'l' => 'ᓪ', 'li' => 'ᓕ', 'lii' => 'ᓖ', 'lu' => 'ᓗ', 'luu' => 'ᓘ', 'la' => 'ᓚ', 'laa' => 'ᓛ', 'j' => 'ᔾ', 'ji' => 'ᔨ', 'jii' => 'ᔩ', 'ju' => 'ᔪ', 'juu' => 'ᔫ', 'ja' => 'ᔭ', 'jaa' => 'ᔮ', 'v' => 'ᕝ', 'vi' => 'ᕕ', 'vii' => 'ᕖ', 'vu' => 'ᕗ', 'vuu' => 'ᕘ', 'va' => 'ᕙ', 'vaa' => 'ᕚ', 'r' => 'ᕐ', 'ri' => 'ᕆ', 'rii' => 'ᕇ', 'ru' => 'ᕈ', 'ruu' => 'ᕉ', 'ra' => 'ᕋ', 'raa' => 'ᕌ', 'qq' => 'ᖅᒃ', 'qqi' => 'ᖅᑭ', 'qqii' => 'ᖅᑮ', 'qqu' => 'ᖅᑯ', 'qquu' => 'ᖅᑰ', 'qqa' => 'ᖅᑲ', 'qqaa' => 'ᖅᑳ', 'q' => 'ᖅ', 'qi' => 'ᕿ', 'qii' => 'ᖀ', 'qu' => 'ᖁ', 'quu' => 'ᖂ', 'qa' => 'ᖃ', 'qaa' => 'ᖄ', 'ng' => 'ᖕ', 'ngi' => 'ᖏ', 'ngii' => 'ᖐ', 'ngu' => 'ᖑ', 'nguu' => 'ᖒ', 'nga' => 'ᖓ', 'ngaa' => 'ᖔ', 'nng' => 'ᖖ', 'nngi' => 'ᙱ', 'nngii' => 'ᙲ', 'nngu' => 'ᙳ', 'nnguu' => 'ᙴ', 'nnga' => 'ᙵ', 'nngaa' => 'ᙶ', 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ', 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ', ]; public function getMainCode(): string { return 'iu'; } public function getLanguageVariants(): array { return [ 'iu', 'ike-cans', 'ike-latn' ]; } public function getVariantsFallbacks(): array { return [ 'iu' => 'ike-cans', 'ike-cans' => 'iu', 'ike-latn' => 'iu', ]; } protected function loadDefaultTables(): array { return [ 'lowercase' => new ReplacementArray( self::UPPER_TO_LOWER_CASE_LATIN ), 'ike-cans' => new ReplacementArray( self::TO_SYLLABICS ), 'ike-latn' => new ReplacementArray( self::TO_LATIN ), 'iu' => new ReplacementArray() ]; } public function translate( $text, $toVariant ) { // If $text is empty or only includes spaces, do nothing // Otherwise translate it if ( trim( $text ) ) { $this->loadTables(); // To syllabics, first translate uppercase to lowercase Latin if ( $toVariant == 'ike-cans' ) { $text = $this->mTables['lowercase']->replace( $text ); } $text = $this->mTables[$toVariant]->replace( $text ); } return $text; } } PK ! ��� converters/GanConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Gan Chinese specific code. * * @ingroup Languages */ class GanConverter extends LanguageConverter { public function getMainCode(): string { return 'gan'; } public function getLanguageVariants(): array { return [ 'gan', 'gan-hans', 'gan-hant' ]; } public function getVariantsFallbacks(): array { return [ 'gan' => [ 'gan-hans', 'gan-hant' ], 'gan-hans' => [ 'gan' ], 'gan-hant' => [ 'gan' ], ]; } /** * Get manual level limit for supported variants. * @since 1.36 * * @return array */ protected function getAdditionalManualLevel(): array { return [ 'gan' => 'disable' ]; } public function getDescVarSeparator(): string { return '; '; } public function getVariantNames(): array { $names = [ 'gan' => '原文', 'gan-hans' => '简体', 'gan-hant' => '繁體', ]; return array_merge( parent::getVariantNames(), $names ); } protected function loadDefaultTables(): array { return [ 'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_HANS ), 'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::ZH_TO_HANT ), 'gan' => new ReplacementArray ]; } public function convertCategoryKey( $key ) { return $this->autoConvert( $key, 'gan' ); } } PK ! �ni< < converters/ZghConverter.phpnu �Iw�� <?php /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ /** * Standard Moroccan Amazigh specific code. * * Conversion script for Tifinagh to lowercase Latin for Standard Moroccan Amazigh. * * * Based on: * - LanguageShi.php * - https://fr.wikipedia.org/wiki/Tifinagh * * @ingroup Languages */ class ZghConverter extends LanguageConverterSpecific { /** * The Tifinagh alphabet sequence is based on * "Dictionnaire Général de la Langue Amazighe Informatisé" * by IRCAM (https://tal.ircam.ma/dglai/lexieam.php, DGLAi), * with the labio-velarization mark in the end. */ private const TO_LATIN = [ 'ⴰ' => 'a', 'ⴱ' => 'b', 'ⴳ' => 'g', 'ⴷ' => 'd', 'ⴹ' => 'ḍ', 'ⴻ' => 'e', 'ⴼ' => 'f', 'ⴽ' => 'k', 'ⵀ' => 'h', 'ⵃ' => 'ḥ', 'ⵄ' => 'ɛ', 'ⵅ' => 'x', 'ⵇ' => 'q', 'ⵉ' => 'i', 'ⵊ' => 'j', 'ⵍ' => 'l', 'ⵎ' => 'm', 'ⵏ' => 'n', 'ⵓ' => 'u', 'ⵔ' => 'r', 'ⵕ' => 'ṛ', 'ⵖ' => 'ɣ', 'ⵙ' => 's', 'ⵚ' => 'ṣ', 'ⵛ' => 'c', 'ⵜ' => 't', 'ⵟ' => 'ṭ', 'ⵡ' => 'w', 'ⵢ' => 'y', 'ⵣ' => 'z', 'ⵥ' => 'ẓ', 'ⵯ' => 'ʷ', ]; public function getMainCode(): string { return 'zgh'; } public function getLanguageVariants(): array { return [ 'zgh', 'zgh-latn' ]; } public function getVariantsFallbacks(): array { return []; } protected function loadDefaultTables(): array { return [ 'zgh-latn' => new ReplacementArray( self::TO_LATIN ), 'zgh' => new ReplacementArray() ]; } public function translate( $text, $toVariant ) { // We only convert zgh (zgh-Tfng) to zgh-Latn, not the // other way around. We also don't need to try to // convert if there is no text. if ( $toVariant === 'zgh' || !trim( $text ) ) { return $text; } $this->loadTables(); $text = $this->mTables[$toVariant]->replace( $text ); return $text; } } PK ! �*�87&