File: //usr/local/wp/vendor/gettext/languages/bin/import-cldr-data
#!/usr/bin/env php
<?php
set_error_handler(function ($errno, $errstr, $errfile = null, $errline = null) {
$message = $errstr ? $errstr : "Error {$errno}";
if ($errfile) {
$message .= "\nFile: {$errfile}";
if ($errline) {
$message .= "\nLine: {$errline}";
}
}
throw new RuntimeException($message);
});
error_reporting(-1);
/**
* @param string[] $argv
*
* @throws RuntimeException
*
* @return void
*/
function main(array $argv)
{
$options = new Options($argv);
if (!is_dir($options->outputDir) && !mkdir($options->outputDir, 0777, true)) {
throw new RuntimeException("Cannot create output directory: {$options->outputDir}\n");
}
$options->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', realpath($options->outputDir));
$documentStorage = new DocumentStorage($options);
echo 'Processing languages... ';
$languages = new Languages($options, $documentStorage);
echo "done.\n";
echo 'Processing scripts... ';
$scripts = new Scripts($options, $documentStorage);
echo "done.\n";
echo 'Processing territories... ';
$territories = new Territories($options, $documentStorage);
echo "done.\n";
echo 'Processing plural rules... ';
$plurals = new Plurals($options, $documentStorage, $languages);
echo "done.\n";
echo 'Saving... ';
$languages->save();
$scripts->save();
$territories->save();
$plurals->save();
echo "done.\n";
}
class Options
{
/**
* @var string
*/
public $cldrVersion;
/**
* @var string
*/
public $outputDir;
public function __construct(array $argv)
{
if (array_intersect($argv, array('-h', '--help'))) {
$this->showSyntax($argv[0], 0);
}
$this->outputDir = $this->getDefaultOutputDir();
switch (count($argv)) {
case 3:
$this->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', $argv[2]);
// no break
case 2:
$this->cldrVersion = $argv[1];
if (!preg_match('/^\d+(\.\d+)?(-(alpha|beta)\d+)?$/', $this->cldrVersion)) {
throw new RuntimeException("{$this->cldrVersion} is not a valid CLDR version identifier");
}
break;
default:
$this->showSyntax($argv[0], 1);
}
}
/**
* @param string $programName
* @param int $exitCode
*
* @return never
*/
private function showSyntax($programName, $exitCode)
{
$programName = str_replace('/', DIRECTORY_SEPARATOR, $programName);
$defaultOutputDir = str_replace('/', DIRECTORY_SEPARATOR, $this->getDefaultOutputDir());
echo <<<EOT
Syntax: {$programName} <cldr-version> [output-dir]
Arguments:
cldr-version: the version of the CLDR data.
Examples:
47
47-beta2
47-alpha1
46.1
46.1-beta1
output-dir: the directory where the data will be written to
Default: {$defaultOutputDir}
EOT;
exit($exitCode);
}
/**
* @return string
*/
private function getDefaultOutputDir()
{
return str_replace(DIRECTORY_SEPARATOR, '/', dirname(__DIR__)) . '/src/cldr-data';
}
}
class DocumentStorage
{
/**
* @var string
*/
private $baseUrl;
private $context;
private $cache;
public function __construct(Options $options)
{
$this->baseUrl = 'https://raw.githubusercontent.com/unicode-org/cldr/refs/tags/release-' . str_replace('.', '-', $options->cldrVersion);
$this->context = stream_context_create(array(
'http' => array(
'follow_location' => 1,
'ignore_errors' => false,
),
));
$this->cache = array();
}
/**
* @param string $path
*
* @throws RuntimeException
*
* @return DOMDocument
*/
public function get($path)
{
if (!isset($this->cache[$path])) {
$xml = $this->fetch($path);
$doc = $this->loadXml($xml);
$this->cache[$path] = $doc;
}
return $this->cache[$path];
}
/**
* @param string $path
*
* @throws RuntimeException
*
* @return string
*/
private function fetch($path)
{
$url = $this->baseUrl . '/' . ltrim($path, '/');
set_error_handler(function () {}, -1);
$content = file_get_contents($url, false, $this->context);
restore_error_handler();
if ($content === false) {
$details = '';
/** @var array $http_response_header */
if (!empty($http_response_header)) {
$details = " - {$http_response_header[0]}";
}
throw new RuntimeException("Failed to download from {$url}{$details}");
}
return $content;
}
/**
* @param string $xml
*
* @throws RuntimeException
*
* @return DOMDocument
*/
private function loadXml($xml)
{
$doc = new DOMDocument();
libxml_clear_errors();
$restore = libxml_use_internal_errors(true);
$loaded = $doc->loadXML($xml);
$errors = libxml_get_errors();
libxml_use_internal_errors($restore);
$lines = array();
foreach ($errors as $error) {
$lines[] = "{$error->message} at line {$error->line}";
}
if (!$loaded || $errors !== array()) {
throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines));
}
return $doc;
}
}
abstract class Processor
{
/**
* @var Options
*/
protected $options;
/**
* @var array
*/
protected $data;
/**
* @var DocumentStorage
*/
private $documentStorage;
/**
* @var string
*/
private $path;
protected function __construct(Options $options, DocumentStorage $documentStorage, $path)
{
$this->options = $options;
$this->documentStorage = $documentStorage;
$this->path = ltrim($path, '/');
$doc = $this->documentStorage->get($this->path);
$this->data = $this->parse($doc);
}
/**
* @return void
*/
public function save()
{
$file = $this->getOutputFile();
$dir = dirname($file);
if (!is_dir($dir) && !mkdir($dir, 0777, true)) {
throw new RuntimeException("Cannot create directory: {$dir}");
}
$flags = 0;
if (defined('JSON_UNESCAPED_SLASHES')) {
$flags |= JSON_UNESCAPED_SLASHES;
}
if (defined('JSON_UNESCAPED_UNICODE')) {
$flags |= JSON_UNESCAPED_UNICODE;
}
if (defined('JSON_PRETTY_PRINT')) {
$flags |= JSON_PRETTY_PRINT;
}
if (defined('JSON_THROW_ON_ERROR')) {
$flags |= JSON_THROW_ON_ERROR;
}
$json = json_encode($this->data, $flags);
if (!file_put_contents($file, $json)) {
throw new RuntimeException("Failed to write to file: {$file}");
}
}
/**
* @return array
*/
abstract protected function parse(DOMDocument $doc);
/**
* @return void
*/
protected function sortByKeyWithPossiblyAlt(array &$data)
{
uksort($data, function ($a, $b) {
$aAlt = strpos($a, '-alt-') !== false;
$bAlt = strpos($b, '-alt-') !== false;
if ($aAlt !== $bAlt) {
if (strpos("{$a}-alt-", $b) === 0) {
return 0;
}
if (strpos($a, "{$b}-alt-") === 0) {
return -1;
}
}
return strcasecmp($a, $b);
});
}
/**
* @return string
*/
abstract protected function getOutputRelativeFileName();
/**
* @param string $xml
*
* @return DOMDocument
*/
private function loadXml($xml)
{
$doc = new DOMDocument();
libxml_clear_errors();
$restore = libxml_use_internal_errors(true);
$loaded = $doc->loadXML($xml);
$errors = libxml_get_errors();
libxml_use_internal_errors($restore);
$lines = array();
foreach ($errors as $error) {
$lines[] = "{$error->message} at line {$error->line}";
}
if (!$loaded || $errors !== array()) {
throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines));
}
return $doc;
}
/**
* @return string
*/
private function getOutputFile()
{
return $this->options->outputDir . '/' . ltrim($this->getOutputRelativeFileName(), '/');
}
}
class Plurals extends Processor
{
/**
* @var Languages
*/
private $languages;
public function __construct(Options $options, DocumentStorage $documentStorage, Languages $languages)
{
$this->languages = $languages;
parent::__construct($options, $documentStorage, 'common/supplemental/plurals.xml');
}
/**
* {@inheritdoc}
*
* @see Processor::parse()
*/
protected function parse(DOMDocument $doc)
{
$data = array();
$xpath = new DOMXPath($doc);
$xPluralRulesList = $xpath->query('/supplementalData/plurals[@type="cardinal"]/pluralRules');
$definedLanguageIDs = $this->languages->getDefinedLanguageIDs();
$knownMissingLanguages = array(
'guw', // Gun
'lld', // Dolomitic Ladin
'hnj', // Hmong Njua
'nah', // Nahuatl
'smi', // Sami
);
$replacements = array(
'in' => 'id', // Former Indonesian
'iw' => 'he', // Former Hebrew
'jw' => 'jv', // Former Javanese
'ji' => 'yi', // Former Yiddish
'mo' => 'ro-MD', // former Moldavian
'bh' => '', // Former Bihari: dismissed because it can be 'bho', 'mai' or 'mag'
// Just a CLDR placeholder
'root' => '',
);
$unrecognizedLocaleCodes = array();
foreach ($xPluralRulesList as $xPluralRules) {
$locales = preg_split('/\s+/', (string) $xPluralRules->getAttribute('locales'), -1, PREG_SPLIT_NO_EMPTY);
if ($locales === array()) {
throw new RuntimeException('No locales found in pluralRules element');
}
$elements = array(
'pluralRule-count-zero' => null,
'pluralRule-count-one' => null,
'pluralRule-count-two' => null,
'pluralRule-count-few' => null,
'pluralRule-count-many' => null,
'pluralRule-count-other' => null,
);
foreach ($xPluralRules->childNodes as $xPluralRule) {
if (!$xPluralRule instanceof DOMElement) {
continue;
}
if ($xPluralRule->tagName !== 'pluralRule') {
throw new RuntimeException("Unexpected element: {$xPluralRule->tagName}");
}
$count = (string) $xPluralRule->getAttribute('count');
if ($count === '') {
throw new RuntimeException('Missing count attribute');
}
$key = "pluralRule-count-{$count}";
if (!array_key_exists($key, $elements)) {
throw new RuntimeException("Unknown count: {$count}");
}
if ($elements[$key] !== null) {
throw new RuntimeException("Duplicate count: {$count}");
}
$elements[$key] = $xPluralRule->textContent;
}
$elements = array_filter($elements, function ($value) {
return $value !== null;
});
if ($elements === array()) {
throw new RuntimeException('No plural rules found');
}
foreach ($locales as $locale) {
$locale = str_replace('_', '-', $locale);
$overwrite = true;
if (isset($data[$locale]) && array_search($locale, $replacements, true) === false) {
throw new RuntimeException("Duplicate locale: {$locale}");
}
if (!in_array($locale, $definedLanguageIDs, true) && !in_array($locale, $knownMissingLanguages, true)) {
if (!isset($replacements[$locale])) {
$unrecognizedLocaleCodes[] = $locale;
continue;
}
$locale = $replacements[$locale];
if ($locale === '') {
continue;
}
$overwrite = false;
}
if ($overwrite || !isset($data[$locale])) {
$data[$locale] = $elements;
}
}
}
if ($unrecognizedLocaleCodes !== array()) {
throw new RuntimeException("The following locales are not defined:\n- " . implode("\n- ", $unrecognizedLocaleCodes));
}
if ($data === array()) {
throw new RuntimeException('No plural rules found');
}
$this->sortByKeyWithPossiblyAlt($data);
return array(
'supplemental' => array(
'version' => array(
'_cldrVersion' => $this->options->cldrVersion,
),
'plurals-type-cardinal' => $data,
),
);
}
/**
* {@inheritdoc}
*
* @see Processor::getOutputRelativeFileName()
*/
protected function getOutputRelativeFileName()
{
return 'supplemental/plurals.json';
}
}
abstract class LocaleDisplayName extends Processor
{
public function __construct(Options $options, DocumentStorage $documentStorage)
{
parent::__construct($options, $documentStorage, 'common/main/en.xml');
}
/**
* {@inheritdoc}
*
* @see Processor::parse()
*/
protected function parse(DOMDocument $doc)
{
$data = array();
$xpath = new DOMXPath($doc);
$xElementList = $xpath->query($this->getXPathSelector());
foreach ($xElementList as $xElement) {
$type = (string) $xElement->getAttribute('type');
if ($type === '') {
throw new RuntimeException('Missing type attribute');
}
$key = str_replace('_', '-', $type);
$alt = (string) $xElement->getAttribute('alt');
if ($alt !== '') {
$key = "{$key}-alt-{$alt}";
}
if (isset($data[$key])) {
throw new RuntimeException("Duplicate key: {$key}");
}
$data[$key] = (string) $xElement->textContent;
}
if ($data === array()) {
throw new RuntimeException('No elements found');
}
$this->sortByKeyWithPossiblyAlt($data);
return array(
'main' => array(
'en-US' => array(
'identity' => array(
'version' => array(
'_cldrVersion' => $this->options->cldrVersion,
),
'language' => 'en',
'territory' => 'US',
),
'localeDisplayNames' => array(
$this->getExportedNodeName() => $data,
),
),
),
);
}
/**
* @return string
*/
abstract protected function getXPathSelector();
/**
* @return string
*/
abstract protected function getExportedNodeName();
}
class Languages extends LocaleDisplayName
{
/**
* @return string[]
*/
public function getDefinedLanguageIDs()
{
return array_values(array_filter(
array_keys($this->data['main']['en-US']['localeDisplayNames'][$this->getExportedNodeName()]),
function ($key) {
return strpos((string) $key, '-alt-') === false;
}
));
}
/**
* {@inheritdoc}
*
* @see LocaleDisplayName::getXPathSelector()
*/
protected function getXPathSelector()
{
return '/ldml/localeDisplayNames/languages/language';
}
/**
* {@inheritdoc}
*
* @see LocaleDisplayName::getExportedNodeName()
*/
protected function getExportedNodeName()
{
return 'languages';
}
/**
* {@inheritdoc}
*
* @see Processor::getOutputRelativeFileName()
*/
protected function getOutputRelativeFileName()
{
return 'main/en-US/languages.json';
}
}
class Scripts extends LocaleDisplayName
{
/**
* {@inheritdoc}
*
* @see LocaleDisplayName::getXPathSelector()
*/
protected function getXPathSelector()
{
return '/ldml/localeDisplayNames/scripts/script';
}
/**
* {@inheritdoc}
*
* @see LocaleDisplayName::getExportedNodeName()
*/
protected function getExportedNodeName()
{
return 'scripts';
}
/**
* {@inheritdoc}
*
* @see Processor::getOutputRelativeFileName()
*/
protected function getOutputRelativeFileName()
{
return 'main/en-US/scripts.json';
}
}
class Territories extends LocaleDisplayName
{
/**
* {@inheritdoc}
*
* @see LocaleDisplayName::getXPathSelector()
*/
protected function getXPathSelector()
{
return '/ldml/localeDisplayNames/territories/territory';
}
/**
* {@inheritdoc}
*
* @see LocaleDisplayName::getExportedNodeName()
*/
protected function getExportedNodeName()
{
return 'territories';
}
/**
* {@inheritdoc}
*
* @see Processor::getOutputRelativeFileName()
*/
protected function getOutputRelativeFileName()
{
return 'main/en-US/territories.json';
}
}
try {
main($argv);
} catch (RuntimeException $e) {
fwrite(STDERR, $e->getMessage() . "\n");
exit(1);
}