<?php
/*
Copyright (C) 2007 Richard Ishida ishida@w3.org
Derived from PHP code and CSS styling by Thomas Gruner icspace.org tom.gruner@gmail.com
Script displays or allows you to search the language codes from the iana, readable for people.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
http://www.gnu.org/licenses/gpl.html
*/
if(isset($_GET['source']))
{
header('Content Type: text/plain');
echo "<html><head><title>PHP Source subtags/index.php</title></head><body><pre>" . htmlspecialchars(file_get_contents('index.php')) . "</pre></body></html>";
die;
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<?php
define('none', 0);
define('check', 1);
define('find', 2);
define('lookup', 3);
define('language', 4);
define('extlang', 5);
define('script', 6);
define('region', 7);
define('variant', 8);
define('grand', 9);
define('redundant', 10);
$searchtype = none;
$searchtext = '';
$searchtags = '';
$errormsg = '';
$warnings = '';
$info = '';
#if (isset($_GET['searchtype']) && isset($_GET['searchtext'])) {
# $searchtype = $_GET['searchtype'];
# $searchtext = trim(strtolower($_GET['searchtext']));
# }
if (isset($_GET['submit'])) {
switch (strtolower($_GET['submit'])) {
case 'list': if (isset($_GET['list'])) { $searchtype = $_GET['list']; } break;
case 'find': if (isset($_GET['find'])) { $searchtype = find; $searchtext = trim(strtolower($_GET['find'])); } break;
case 'look up': if (isset($_GET['lookup'])) { $searchtype = lookup; $searchtext = trim(strtolower($_GET['lookup'])); } break;
case 'check': if (isset($_GET['check'])) { $searchtype = check; $searchtext = trim(strtolower($_GET['check'])); } break;
default: echo strtolower($_GET['submit']);
}
}
if ($searchtype == none && (isset($_GET['find']) || isset($_GET['lookup']) || isset($_GET['check']) || isset($_GET['list']) )) {
$errormsg .= addtoerrmsg("There was a problem figuring out what you want to do. This may arise because you hit return after typing in a field, rather than clicking on a button. Instead, you should use a mouse to click on a button or hit tab then return on the keyboard. It may also occur if you supplied incorrect parameters in the URL.");
}
#echo "<br />$searchtype..";
#echo "<br />$searchtext...";
// normalise lists of tags
if ($searchtype != find) {
$searchtext = preg_replace("/\s+/", " ", $searchtext);
$searchtext = str_replace('; ','-',$searchtext);
$searchtext = str_replace(', ','-',$searchtext);
$searchtext = str_replace(';','-',$searchtext);
$searchtext = str_replace(',','-',$searchtext);
$searchtext = str_replace(' ','-',$searchtext);
}
// remove any user-defined tags, but keep a copy
preg_match("/(-x|^x)-.+/", $searchtext, $userdefined);
if (isset($userdefined[0])) { $userdefined[0] = trim($userdefined[0], '-'); }
$searchtext = preg_replace("/(-|\b)x-.+/", "", $searchtext);
// include additional data files
include('silcodes.php');
include('scriptcodes.php');
include('languages.php');
include('extlang.php');
include('scripts.php');
include('regions.php');
include('variant.php');
include('grandfathered.php');
include('redundant.php');
include('macrolanguages.php');
?>
<?php
#################### FUNCTIONS #######################################
function makeListItem($item) {
GLOBAL $searchtype, $sil, $scriptcodes, $macrolanguages;
if ($searchtype == check) { checkforwarnings($item); }
if ($searchtype == check ) { checkforinfo($item); }
$subtag = $item['Subtag'];
$description = $item['Description'];
if (isset($item['Deprecated'])) {
$description .= ' <span style="font-size: 70%;"><img src="images/deprecated.png" alt=" " /> deprecated, ';
if (isset($item['Preferred-Value'])) { $description .= 'use</span> '.$item['Preferred-Value']; }
else { $description .= 'don\'t use.</span>'; }
}
$div = <<<EOD
<div class="registryItem">
<h3>
EOD;
// add the link to the Ethnologue
if ($item['Type']=='language') {
if (isset($sil[$subtag])) { $siltag = $sil[$subtag]; }
else { $siltag = $subtag; }
$div .= "<span class='sil' style='float:right; width: 20px; text-align:right;'><a href='http://www.ethnologue.com/show_language.asp?code=$siltag'><img src='images/ethn.png' title='Look up in the SIL Ethnologue.' alt='Look up in the SIL Ethnologue.' /></a></span>";
}
// add the link to UniView for script subtags
if ($item['Type']=='script') {
if (isset($scriptcodes[$subtag])) {
$univiewtag = $scriptcodes[$subtag];
$div .= "<span class='sil' style='float:right; width: 20px; text-align:right;'><a href='/rishida/scripts/uniview/?block=$univiewtag'><img src='images/univ.png' title='Look up in UniView.' alt='Look up in UniView.' /></a></span>";
}
}
$div .= <<<EOD
<span class="st">{$subtag}</span>
{$description}
</h3>
EOD;
if (isset($item['Comments'])) {
$div .= "<div class='comments' style='font-size:80%;'><img src='images/comments.png' /> Registry comment: ".$item['Comments']."</div>";
}
// check for useful information if this is a search, rather than a check
//if ($searchtype == find || $searchtype == lookup || $searchtype == list) {
if ($searchtype != check) {
if (isset($item['Scope']) && $item['Scope'] == 'macrolanguage') {
$list = ''; $comm = '';
foreach ($macrolanguages[$item['Subtag']] as $lang) { $list .= $lang.' '; }
$list = '<a href="/rishida/utils/subtags/index.php?lookup='.$list.'&submit=Look+up">'.$list.'</a>';
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> <span class="stname">'.$item['Subtag']."</span> is a macrolanguage. You should consider whether you can find a more specific language for your purposes. This macrolanguage encompasses ".$list.".</div>";
}
if (isset($item['Scope']) && $item['Scope'] == 'collection') {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/comments.png" alt="Comment." /> <span class="stname">'.$item['Subtag'].'</span> represents a collection of languages. Although a collection subtag can be used in the absence of a more specific tag, you should check whether there is a more specific language subtag. Unfortunately, the registry does not offer any suggestions to assist with this.</div>';
}
if ($subtag == 'mul') {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> <span class="stname">'.$item['Subtag'].'</span> should not be used when a list of languages or individual tags for each content element can be used instead.</div>';
}
if ($subtag == 'und') {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/comments.png" alt="Comment." /> <span class="stname">'.$item['Subtag'].'</span> identifies linguistic content whose language is not determined.</div>';
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> <span class="stname">'.$item['Subtag'].'</span> should not be used unless a language tag is required and language information is not available or cannot be determined. Omitting the language tag (where permitted) is preferred. This subtag may also be useful when matching language tags in certain situations.</div>';
}
if ($subtag == 'zxx') {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/comments.png" alt="Comment." /> <span class="stname">'.$item['Subtag'].'</span> identifies non-linguistic content for which a language classification is inappropriate or does not apply, such as instrumental or electronic music, sound recordings consisting of non-verbal sounds, audiovisuals with no narration or dialog, or printed titles, or subtitles, machine-readable data files consisting of machine languages or character codes, programming source code, etc.</div>';
}
if ($subtag == 'mis') {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> <span class="stname">'.$item['Subtag'].'</span> identifies content whose language is known but which does not currently have a corresponding subtag. This subtag should not be used, since future developments may render it invalid. It is always preferable to use <span class="stname">und</span> or (with prior agreement) private use subtags.</div>';
}
if ($subtag == 'i-default') {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> <span class="stname">'.$item['Subtag'].'</span> identifies the condition or content used where the language preferences of the user cannot be established. It should not be used except as a means of labelling the default content for applications or protocols that require default language content to be labeled with that specific tag. It may also be used by an application or protocol to identify when the default language content is being returned.</div>';
}
if (isset($item['Prefix']) && $item['Type'] == 'variant') {
$preflist = str_replace('-', '+', $item['Prefix']);
$preflist = str_replace(',', '</span> or <span class="stname">', $preflist);
//if (strpos($item['Prefix'], ',') !== false) {
//$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" /> The variant subtag <span class="stname">'.$item['Subtag'].'</span> should only be used in a language tag that already contains <span class="stname">'.$preflist.'</span>.</div>';
//$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" /> Should only be used in a language tag that already contains <span class="stname">'.$preflist.'</span>.</div>';
//}
//else {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> Should only be used in a language tag that already contains <span class="stname">'.$preflist.'</span>.</div>';
//}
}
if ($item['Type'] == 'variant' && ! isset($item['Prefix'])) {
//$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" /> The variant subtag <span class="stname">'.$item['Subtag'].'</span> has no prefix requirements, and so should be used after any other variant tags used.</div>';
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> Has no prefix requirements, and so should be used after any other variant subtags that do have prefix requirements.</div>';
}
if ($item['Type'] == 'extlang' && isset($item['Prefix'])) {
$div .= '<div class="comments" style="font-size:80%;"><img src="images/warnings.png" alt="Note." /> Use as <span class="stname">'.$item['Prefix'].'-'.$item['Subtag'].'</span>, however is it usually preferable to use the <span class="stname">'.$item['Preferred-Value'].'</span> language subtag.</div>';
}
}
// output the additional registry fields
unset($item['Description']);
unset($item['Subtag']);
unset($item['Tag']);
$addDetails = '';
foreach ($item as $key => $value) {
$addDetails .= "<li>$key: $value</li>";
}
$div .= <<<EOD
<ul>
$addDetails
</ul>
</div>
EOD;
return $div;
}
//<a href="http://www.ethnologue.com/show_language.asp?code=$siltag"><img src="ethnologue.png" class="sil" alt="Look up in the SIL Ethnologue." title="Look up in the SIL Ethnologue." /></a>
function addtoerrmsg ($msg) {
$div = <<<EOD
<p class="registryItem"><img src="images/error.png" alt="Error:"> $msg</p>
EOD;
return $div;
}
function checkforwarnings ($item) {
GLOBAL $warnings, $itemarray;
if (isset($item['Deprecated'])) {
$pf = ''; $comm = '';
if (isset($item['Preferred-Value'])) { $pf = 'Use <span class="stname">'.$item['Preferred-Value']."</span> instead"; }
if (isset($item['Comments'])) { $comm = 'The entry for this tag has the following comments: <em>'.$item['Comments'].'</em>'; }
$warnings .= '<p class="registryItem"><img src="images/warning.png" alt="Warning:" /> <span class="stname">'.$item['Subtag']."</span> is deprecated. ".$pf.$comm.".</p>";
}
if ($item['Type'] == 'script' && isset($itemarray[0]['Suppress-Script']) && $itemarray[0]['Suppress-Script'] == $item['Subtag']) {
$warnings .= '<p class="registryItem"><img src="images/warning.png" alt="Warning:" /> The script tag <span class="stname">'.$item['Subtag']."</span> shouldn't be used with the language subtag <span class='stname'>".$itemarray[0]['Subtag']."</span>.</p>";
}
if ($item['Type'] == 'extlang' && $itemarray[0]['Subtag'] != $item['Prefix']) {
$warnings .= '<p class="registryItem"><img src="images/error.png" alt="Error:" /> The extended language subtag <span class="stname">'.$item['Subtag']."</span> must be used with the language subtag <span class='stname'>".$item['Prefix']."</span>. Note, however, that it is better to just use the <span class='stname'>".$item['Subtag']."</span> language subtag, rather than <span class='stname'>".$item['Prefix'].'-'.$item['Subtag']."</span>.</p>";
}
if ($item['Type'] == 'extlang' && $itemarray[0]['Subtag'] == $item['Prefix']) {
$warnings .= '<p class="registryItem"><img src="images/warning.png" alt="Warning:" /> It is usually better to just use the <span class="stname">'.$item['Subtag']."</span> language subtag, rather than <span class='stname'>".$item['Prefix'].'-'.$item['Subtag']."</span>.</p>";
}
if ($item['Type'] == 'script') {
$warnings .= '<p class="registryItem"><img src="images/warning.png" alt="Warning:" /> The script subtag <span class="stname">'.$item['Subtag']."</span> should not be used unless it adds some information that is needed to distinguish this language tag from another one. If you do use the script subtag, use it consistently in the context where the language tags are used.</p>";
}
if ($item['Type'] == 'region') {
$warnings .= '<p class="registryItem"><img src="images/warning.png" alt="Warning:" /> Check that the region subtag <span class="stname">'.$item['Subtag']."</span> contributes information needed to distinguish this language tag from another one, otherwise leave it out. For example, <span class='stname'>en-GB</span> can be useful for spell-checking, but the region subtag in <span class='stname'>ja-JP</span> is unlikely to be useful unless you are intentionally contrasting it with Japanese spoken in other parts of the world.</p>";
}
}
function checkforinfo ($item) {
GLOBAL $info, $itemarray, $macrolanguages;
if (isset($item['Scope']) && $item['Scope'] == 'macrolanguage') {
$list = ''; $comm = '';
foreach ($macrolanguages[$item['Subtag']] as $lang) { $list .= $lang.' '; }
$list = '<a href="/rishida/utils/subtags/index.php?lookup='.$list.'&submit=Look+up">'.$list.'</a>';
$info .= '<p class="registryItem"><img src="images/comment.png" alt="Information:" /> <span class="stname">'.$item['Subtag']."</span> is a macrolanguage. You should consider whether you can find a more specific language for your purposes. This macrolanguage encompasses ".$list.".</p>";
}
if (isset($item['Scope']) && $item['Scope'] == 'collection') {
$info .= '<p class="registryItem"><img src="images/comment.png" alt="Information:" /> <span class="stname">'.$item['Subtag'].'</span> represents a collection of languages. Although a collection subtag can be used in the absence of a more specific tag, you should check whether there is a more specific language subtag. Unfortunately, the registry does not offer any suggestions to assist with this.</p>';
}
}
?>
<?php
#################### DO THE SEARCHING #######################################
$languageList = '';
$scriptList = '';
$regionsList = '';
$grandList = '';
$redundantList = '';
$variantList = '';
$extlangList = '';
$langprocessed = false;
$listcount = 0;
if ($searchtype > 3) { # ie. if this is a simple listing of tags
switch($searchtype) {
case language : foreach ($languages as $item) { $languageList .= makeListItem($item); $listcount++; } break;
case script : foreach ($scripts as $item) { $scriptList .= makeListItem($item); $listcount++; } break;
case region : foreach ($regions as $item) { $regionsList .= makeListItem($item); $listcount++; } break;
case grand : foreach ($grandfathered as $item) { $grandList .= makeListItem($item); $listcount++; } break;
case redundant : foreach ($redundant as $item) { $redundantList .= makeListItem($item); $listcount++; } break;
case variant : foreach ($variant as $item) { $variantList .= makeListItem($item); $listcount++; } break;
case extlang : foreach ($extlang as $item) { $extlangList .= makeListItem($item); $listcount++; } break;
default : echo "Unknown Type: " . $searchtype . "</br />";
}
}
elseif ($searchtype == 2) { // ie. searching descriptions
foreach ($languages as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $languageList .= makeListItem($item); }};
foreach ($scripts as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $scriptList .= makeListItem($item); }};
foreach ($regions as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $regionsList .= makeListItem($item); }};
foreach ($grandfathered as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $grandList .= makeListItem($item); }};
//foreach ($redundant as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $redundantList .= makeListItem($item); }};
foreach ($variant as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $variantList .= makeListItem($item); }};
foreach ($extlang as $item) { if(strpos(strtolower($item['Description']), $searchtext) !== false) { $extlangList .= makeListItem($item); }};
}
elseif ($searchtype == 1) { // tag parser
$itemarray = array();
$subtags = explode('-', $searchtext);
if (count($subtags)>1) {
#foreach ($grandfathered as $item) { if($item['Subtag'] == $subtags[0].'-'.$subtags[1]) { $grandList .= makeListItem($item); }};
foreach ($grandfathered as $item) { if($item['Tag'] == $searchtext) { $grandList .= makeListItem($item); }};
}
if ($grandList == '' && $searchtext != '') { // don't look up grandfathered tags any further
// deal with the language one and remove from array to avoid looking up the same tag twice
$langtag = array_shift($subtags); $found=false;
foreach ($languages as $item) { if($item['Subtag'] == $langtag) { $languageList .= makeListItem($item); $found=true; $itemarray[] = $item; continue; }};
if (! $found) { $errormsg .= addtoerrmsg("Language subtag <span class='stname'>".$langtag."</span> not found."); }
}
} // the rest of the tag parsing is done in the Check for Well-formedness section below
else { // tag list lookup
$subtags = explode('-', $searchtext);
if (count($subtags)>1) {
#foreach ($grandfathered as $item) { if($item['Subtag'] == $subtags[0].'-'.$subtags[1]) { $grandList .= makeListItem($item); }};
foreach ($grandfathered as $item) { if($item['Tag'] == $searchtext) { $grandList .= makeListItem($item); }};
}
foreach ($subtags as $subtag) { //echo "subtag ".$subtag."<br />";
foreach ($languages as $item) { if(strtolower($item['Subtag']) == $subtag) { $languageList .= makeListItem($item); continue; }};
foreach ($extlang as $item) { if(strtolower($item['Subtag']) == $subtag) { $extlangList .= makeListItem($item); continue; }};
foreach ($regions as $item) { if(strtolower($item['Subtag']) == $subtag) { $regionsList .= makeListItem($item); continue; }};
foreach ($scripts as $item) { if(strtolower($item['Subtag']) == $subtag) { $scriptList .= makeListItem($item); continue; }};
foreach ($variant as $item) { if(strtolower($item['Subtag']) == $subtag) { $variantList .= makeListItem($item); continue; }};
}
}
/*
$nav = '';
*/
?>
<?php
#################### CHECK FOR WELL-FORMEDNESS #######################################
// Note to self: This code is not particularly efficient, since I added new ideas by bolting on code quickly. Could do with an overhaul,
// where everything is organized around the $itemarray.
if ($searchtype == 1 && $grandList == '' && $searchtext != '') { # ie. only do this for tag lookup, and when the tag is not a grandfathered tag
// read subtags into an array and determine their type
// raise an error if there is more than one of any given type
$subtagArray = array();
$extlangfound = false;
$scriptfound = false;
$regionfound = false;
$variantfound = false;
$variantlist = '';
for ($i=0; $i<count($subtags); $i++) {
switch(strlen($subtags[$i])) {
case 2: $subtagArray[$i]['posn'] = $i;
$subtagArray[$i]['type'] = region;
$subtagArray[$i]['value'] = $subtags[$i];
if ($regionfound) { $errormsg .= addtoerrmsg("More than one region subtag: <span class='stname'>$subtags[$i]</span>"); }
$regionfound = true;
break;
case 3: if (preg_match("/[0-9]/", $subtags[$i])) { // ie. it is a region
$subtagArray[$i]['posn'] = $i;
$subtagArray[$i]['type'] = region;
$subtagArray[$i]['value'] = $subtags[$i];
if ($regionfound) { $errormsg .= addtoerrmsg("More than one subtag: <span class='stname'>$subtags[$i]</span>"); }
$regionfound = true;
}
else {
$subtagArray[$i]['posn'] = $i;
$subtagArray[$i]['type'] = extlang;
$subtagArray[$i]['value'] = $subtags[$i];
if ($extlangfound) { $errormsg .= addtoerrmsg("More than one extlang subtag: <span class='stname'>$subtags[$i]</span>"); }
$extlangfound = true;
}
break;
case 4: if (preg_match("/[0-9]/", $subtags[$i])) { // ie. it is a variant
$subtagArray[$i]['posn'] = $i;
$subtagArray[$i]['type'] = variant;
$subtagArray[$i]['value'] = $subtags[$i];
if(strpos($variantlist, $subtagArray[$i]['value']) !== false) { $errormsg .= addtoerrmsg("Duplicate variant subtag: <span class='stname'>$subtags[$i]</span>"); }
else { $variantlist .= $subtagArray[$i]['value'].'.'; }
#if ($variantfound) { $errormsg .= addtoerrmsg("Duplicate variant subtag: $subtags[$i]"); }
#$variantfound = true;
}
else {
$subtagArray[$i]['posn'] = $i;
$subtagArray[$i]['type'] = script;
$subtagArray[$i]['value'] = $subtags[$i];
if ($scriptfound) { $errormsg .= addtoerrmsg("More than one script subtag: <span class='stname'>$subtags[$i]</span>"); }
$scriptfound = true;
}
break;
default:$subtagArray[$i]['posn'] = $i;
$subtagArray[$i]['type'] = variant;
$subtagArray[$i]['value'] = $subtags[$i];
if(strpos($variantlist, $subtagArray[$i]['value']) !== false) { $errormsg .= addtoerrmsg("Duplicate variant subtag: <span class='stname'>$subtags[$i]</span>"); }
else { $variantlist .= $subtagArray[$i]['value'].'.'; }
#if ($variantfound) { $errormsg .= addtoerrmsg("Duplicate variant subtag: $subtags[$i]"); }
#$variantfound = true;
break;
}
}
// for ($i=0; $i<count($subtagArray); $i++) {
// echo '<p>'.$subtagArray[$i]['posn'].' '.$subtagArray[$i]['type'].' '.$subtagArray[$i]['value']."</p>";
// }
// check that all subtags are meaningful
foreach($subtagArray as $subtag) {
$found=false;
switch($subtag['type']) {
case extlang:
foreach ($extlang as $item) { if(strtolower($item['Subtag']) == $subtag['value']) { $extlangList .= makeListItem($item); $found=true; $itemarray[] = $item; continue; }};
if (! $found) { $errormsg .= addtoerrmsg("Extlang subtag <span class='stname'>".$subtag['value']."</span> not found."); }
break;
case region:
foreach ($regions as $item) { if(strtolower($item['Subtag']) == $subtag['value']) { $regionsList .= makeListItem($item); $found=true; $itemarray[] = $item; continue; }};
if (! $found) { $errormsg .= addtoerrmsg("Region subtag <span class='stname'>".$subtag['value']."</span> not found."); }
break;
case script:
foreach ($scripts as $item) { if(strtolower($item['Subtag']) == $subtag['value']) { $scriptList .= makeListItem($item); $found=true; $itemarray[] = $item; continue; }};
if (! $found) { $errormsg .= addtoerrmsg("Script subtag <span class='stname'>".$subtag['value']."</span> not found."); }
break;
case variant:
foreach ($variant as $item) { if(strtolower($item['Subtag']) == $subtag['value']) { $variantList .= makeListItem($item); $found=true; $itemarray[] = $item; continue; }};
if (! $found) { $errormsg .= addtoerrmsg("Variant subtag <span class='stname'>".$subtag['value']."</span> not found."); }
break;
}
}
//print_r($itemarray);
// check that all subtags are correctly ordered
$wrongorder = false;
for ($i=0; $i<count($subtagArray)-1; $i++) {
if ($subtagArray[$i]['type'] > $subtagArray[$i+1]['type']) { // there's a problem...
$wrongorder = true;
}
}
if ($wrongorder) {
$currentorder = '';
for ($i=0; $i<count($subtagArray); $i++) {
switch ($subtagArray[$i]['type']) {
case extlang: $currentorder .= ' - extlang<sub>'.$subtagArray[$i]['value'].'</sub>'; break;
case script: $currentorder .= ' - script<sub>'.$subtagArray[$i]['value'].'</sub>'; break;
case region: $currentorder .= ' - region<sub>'.$subtagArray[$i]['value'].'</sub>'; break;
case variant: $currentorder .= ' - variant<sub>'.$subtagArray[$i]['value'].'</sub>'; break;
}
}
$expectedorder = ''; $exfnd=false; $scfnd=false; $refnd=false; $vafnd=false;
foreach($subtagArray as $subtag) { if ($subtag['type']==extlang && !$exfnd) { $expectedorder .= ' - extlang'; $exfnd = true; } }
foreach($subtagArray as $subtag) { if ($subtag['type']==script && !$scfnd) { $expectedorder .= ' - script'; $scfnd = true; } }
foreach($subtagArray as $subtag) { if ($subtag['type']==region && !$refnd) { $expectedorder .= ' - region'; $refnd = true; } }
foreach($subtagArray as $subtag) { if ($subtag['type']==variant && !$vafnd) { $expectedorder .= ' - variant'; $vafnd = true; } }
$errormsg .= addtoerrmsg("Subtags incorrectly ordered. Current order:<br /> language<sub>".$langtag."</sub>".$currentorder."<br />Expected order:<br /> language $expectedorder");
}
// check that variants are used correctly
for ($i=1; $i<count($itemarray); $i++) {
if ($itemarray[$i]['Type'] == 'variant') { // echo 'checking '.$itemarray[$i]['Subtag'].'<br />';
if (!isset($itemarray[$i]['Prefix'])) { // echo 'no prefixes<br />';
$warnings .= '<p class="registryItem"><img src="images/warning.png" alt="Warning:" /> Ensure that <span class="stname">'.$itemarray[$i]['Subtag']."</span> appears after any other variants with prefix requirements.</p>";
}
else {
$prefixlist = explode(', ', $itemarray[$i]['Prefix']); // echo count($prefixlist).' prefixes<br />';
$found = false;
$variantsAtPrefixEnd = '';
$prefixLangTag = '';
foreach ($prefixlist as $prefix) { // echo 'checking prefix '.$prefix.'<br />';
// find out whether the last subtag in the prefix is a variant
$prefixtags = explode('-', $prefix);
$lastprefix = $prefixtags[count($prefixtags)-1]; // echo 'lastprefix is '.$lastprefix.'<br />';
if (strlen($lastprefix) >= 5) {
$variantsAtPrefixEnd .= $lastprefix.' '; // echo 'this is a variant<br />';
// check that this corresponds to the previous subtag
if (isset($itemarray[$i-1]) && $itemarray[$i-1]['Subtag'] == $lastprefix ) {
$found = true;
break ;
}
}
else { // echo 'need to check all tags<br />';
// check that each subtag in one of the prefixes appears in the previous items
$tagfound = 0;
if ( $prefixtags[0] == $itemarray[0]['Subtag'] ) { $tagfound++; }// echo 'matched the language subtag<br />'; }
for ($n=1; $n<count($prefixtags); $n++) { // echo 'checking other subtags in the prefix<br />';
for ($k=0; $k<$i; $k++) {
if ($prefixtags[$n] == $itemarray[$k]['Subtag']) { $tagfound++; }// echo 'matched the '.$prefixtags[$n].' subtag<br />'; }
}
}
if ($tagfound == count($prefixtags)) {
$found = true; // echo 'found a match with '.$prefix.'<br />';
break;
}
}
// echo $found.'<br />';
}
//if ($found) { echo 'found<br />'; }
if (!$found) {
if ($variantsAtPrefixEnd != '' && strpos($itemarray[$i]['Prefix'], ',') !== false) {
$errormsg .= addtoerrmsg("The variant <span class='stname'>".$itemarray[$i]['Subtag']."</span> should be used immediately after one of the following subtags: <span class='stname'>".$variantsAtPrefixEnd.'</span>.');
}
else if ($variantsAtPrefixEnd != '') {
$errormsg .= addtoerrmsg("The variant <span class='stname'>".$itemarray[$i]['Subtag']."</span> should be used immediately after <span class='stname'>".$variantsAtPrefixEnd.'</span>.');
}
else if (strpos($itemarray[$i]['Prefix'], ',') !== false) {
$errormsg .= addtoerrmsg("Subtags before the variant <span class='stname'>".$itemarray[$i]['Subtag']."</span> should include one of <span class='stname'>".str_replace('-','+',$itemarray[$i]['Prefix']).'</span>. <span style="font-size: 80%">(For each alternative, a single subtag or the first subtag before a + sign is a language tag. Any subtag listed after a + sign must also appear, but may be preceded or followed by other subtags, eg. zh+Latn matches zh-cmn-Latin-CN.)</span>');
}
else {
$errormsg .= addtoerrmsg("Subtags before the variant <span class='stname'>".$itemarray[$i]['Subtag']."</span> should include <span class='stname'>".str_replace('-','+',$itemarray[$i]['Prefix']).'</span>. <span style="font-size: 80%">(A single subtag or the first subtag before a + sign is a language tag. Any subtag listed after a + sign must also appear, but may be preceded or followed by other subtags, eg. zh+Latn matches zh-cmn-Latin-CN.)</span>');
}
}
}
}
}
}
// $errormsg .= addtoerrmsg("The variant <span class='stname'>".$itemarray[$i]['Subtag']."</span> should be used immediately after the subtag <span class='stname'>".$lastprefix.'</span>.');
// if (!$tagfound) {$errormsg .= addtoerrmsg("The variant <span class='stname'>".$itemarray[$i]['Subtag']."</span> should be used with the subtag <span class='stname'>".$currenttag.'</span>.'); }
?>
<?php
#################### CHECK USER DEFINED FOR WELL-FORMEDNESS #######################################
if (isset($userdefined[0])) {
$usertags = explode('-', $userdefined[0]);
foreach ($usertags as $subtag) {
if (strlen($subtag) > 8) { $errormsg .= addtoerrmsg("Private-use subtag <span class='stname'>$subtag</span> too long. Should be 8 characters maximum."); }
}
}
?>
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>ishida >> utilities: Language subtag registry search</title>
<meta name="keywords" content="iana subtag registry search language codes tags subtags region script variant" />
<meta name="description" content="Search the IANA language subtag registry or list subtags by category." />
<link rel="stylesheet" type="text/css" href="/rishida/style/article-basic.css"/>
<link rel="top" title="Ishida home page" type="text/html" hreflang="en" href="/rishida/" />
<link rel="up" title="List of utilities" type="text/html" hreflang="en" href="/rishida/utilities" />
<!--<link title="___VERSION DESCN IN FOREIGN LANG." type="text/html" rel="alternate" hreflang="___LANG" href="___AND THE HREF" lang="___LANG" xml:lang="___LANG" />-->
<style type="text/css" media="all">
/* <![CDATA[ */
@import "/rishida/style/article-standards.css";
@import "/rishida/style/ri.css";
/* ]]> */
</style>
<link rel="stylesheet" type="text/css" href="style.css" />
<script type="text/javascript" src="functions.js">//</script>
</head>
<body onload="if (document.getElementById) { makeFriendly(); }">
<div id="wai-start" class="hide">
<p>Use accesskey "n" to jump to the <a href="#internal-links">internal navigation links</a> at any point. <a href="#contentstart">Skip to
the content start.</a></p>
</div>
<div id="site-navigation"><!--<p class="noprint" title="___LANG version.">> <a href="___HREF" lang="___LANG" xml:lang="___LANG">___LANGUAGE IN FOREIGN</a></p>-->
<img id="picture" alt="World map" src="/rishida/icons/world.gif" width="150" height="61"
style="margin-top: 1em; margin-bottom: 1em;"/></div>
<div class="sidebar">
<div class="section" style="font-size: 90%;">
<h2>Related links</h2>
<p><a target="_blank" href="http://www.w3.org/International/articles/language-tags/Overview.en.php">About language subtags</a></p>
<p><a target="_blank" href="<?php echo $_SERVER['PHP_SELF'] ?>?source=1">PHP source code</a></p>
<p><a target="_blank" href="http://www.iana.org/assignments/language-subtag-registry">IANA subtag registry</a></p>
<h2>Notes</h2>
<p><strong><span class="notenumber">1</span> Find.</strong> The description is the name of the subtag, ie. the language, region, etc. You can search on any part of a description (eg. <a href="/rishida/utils/subtags/index.php?find=eng&submit=Find">eng</a> gives results that include Bengali, English and Tengwar). Note that some languages may be spelled differently, eg. <a href="/rishida/utils/subtags/index.php?find=panjabi&submit=Find">Panjabi</a> yields three results, but <a href="/rishida/utils/subtags/index.php?find=punjabi&submit=Find">Punjabi</a> only one.</p>
<p><strong><span class="notenumber">2</span> Look up. </strong> Looks up all forms of all subtags in the list, eg. it returns a language and extlang subtag for <a href="/rishida/utils/subtags/index.php?lookup=cmn&submit=Look+up">cmn</a>. Subtags can be separated by commas, semicolons, hyphens, or spaces, eg. <a href="/rishida/utils/subtags/index.php?lookup=en%3B+fr%2C+ca%2C+Bali-1901&submit=Look+up">en; fr, ca, Bali-1901</a>.<br />
Only exact matches of subtags are returned, ie. <a href="/rishida/utils/subtags/index.php?lookup=ak&submit=Look+up">ak</a> doesn't match <a href="/rishida/utils/subtags/index.php?lookup=akk&submit=Look+up">akk</a>. <br />
If there is no output for a tag, it was not a valid tag in the registry.</p>
<p><strong><span class="notenumber">3</span> Check.</strong> Looks up all subtags in a language tag (ie. a hyphen-separated list of subtags) and reports problems, if there are any, eg. <a href="/rishida/utils/subtags/index.php?check=de-419-DE-alt&submit=Check">de-419-DE-alt</a> and <a href="/rishida/utils/subtags/index.php?check=ms-cmn&submit=Check">ms-cmn</a>.<br />
Warnings are also displayed to guide you in language choices. Example: <a href="/rishida/utils/subtags/index.php?check=ms-min-latn&submit=Check">ms-min-Latn</a>.<br />
If no errors or warnings are reported, the language tag should be ok, though there may be some rare corner cases that are not detected.</p>
<p><strong><span class="notenumber">4</span> <img src="images/ethn.png" alt="Green button with E on it" /></strong> provides links from language subtag entries to the SIL Ethnologue. This is useful when trying to find a language. If there is no page for a given language tag, such as <a href="/rishida/utils/subtags/index.php?find=nn&submit=Find">nn</a>, click on the result to reveal the full record; if there is a macrolanguage specified, eg. 'no', that will probably yield an ethnologue entry.</p>
<p><strong><span class="notenumber">5</span> <img src="images/univ.png" alt="Lilac button with U on it" /></strong> provides links from script subtag entries to the most relevant Unicode script block in UniView.<br />
In some cases, there are additional Unicode areas dedicated to the same script. These can usually be easily found alongside the current block in UniView's pull-down control.
<br />
Some very large blocks such as Han, Hangul and Egyptian Hieroglyphs are not linked to. Other scripts do not yet have Unicode blocks.
</p>
<p><strong><span class="notenumber">6</span> Grandfathered tags</strong> are pre-RFC 4646 registrations of tags that cannot be completely composed
from the subtags in the current registry. If you search for a grandfathered tag it will be displayed in the results,
and the subtags that are in the current registry will also be shown.</p>
<p><strong><span class="notenumber">7</span> Redundant tags</strong> are pre-RFC 4646 registrations of tags that can now be formed by
combining separate subtags from the current registry. They do not show up in search results.</p>
<p><strong><span class="notenumber">8</span> Extension subtags</strong> are not supported in this version of the lookup tool.</p>
<!--p>The 'redundant' class of subtags are legacy subtags that are still in the registry but that can now be composed of </p-->
</div>
</div>
<div id="boilerplate">
<div id="line"> </div>
</div>
<div id="topbar">ishida >> utilities</div>
<div>
<div id="sitelinks" class="noprint">
<span><a href="/rishida/" title="Richard Ishida's home page">home</a>
<a href="/rishida/blog/" title="Richard Ishida's blog">blog</a>
<a href="/rishida/writing" title="Papers, articles, notes, etc">writings</a>
<a href="/rishida/utilities" title="Small utilities written in xhtml and javascript">utilities</a>
<a href="/rishida/photos/" title="Photos and video clips">photos</a>
<a href="/rishida/family.html" title="Introducing the Ishida family">family</a> </span>
</div>
<h1> Language Subtag Lookup</h1>
</div>
<div class="section">
<p> The IANA Language Subtag Registry is where you find the subtags that can make up a BCP 47 language tag (separated by hyphens). In a short while I plan to link to guidelines for creating subtags, however, this tool already provides information to guide your choices and lets you check a language tag.</p>
<p>Click on the results to see the raw IANA data for a subtag.</p>
<p class="explanation">This version has been updated to support the approximately 7800 new subtags, including extlang subtags, introduced in July 2009, and the tool now provides help in choosing subtags and a tag checker based on RFC 5646. Please report any bugs to <a href="mailto:ishida@w3.org">me</a>.</p>
<form method="get" action="<?php echo $_SERVER['PHP_SELF'] ?>" name="searchlists" id="searchlists" >
<input style="display: none;" type="submit" value="" name="submit"/> <!-- this is a dummy to trigger an error message if a button isn't selected. -->
<p><strong>Find</strong> a subtag by searching for text in the descriptions <span class="explanation">See note 1.</span><br />
<input type="text" name="find" <?php if (isset($_GET['find'])) { echo "value='".$_GET['find']."'"; } ?> />
<input type="submit" value="Find" name="submit"/>
</p>
<p><strong>Look up</strong> one or more subtags <span class="explanation">See note 2.</span><br />
<input type="text" name="lookup" <?php if (isset($_GET['lookup'])) { echo "value='".$_GET['lookup']."'"; } ?> />
<input type="submit" value="Look up" name="submit"/>
</p>
<p><strong>List</strong> all tags of the following type <br />
<select name="list">
<option value="0" <?php if ($searchtype==none){ echo 'selected="selected" ';}?> >Choose...</option>
<option value="4" <?php if ($searchtype==language){ echo 'selected="selected" ';}?> >Languages (takes a while!)</option>
<option value="5" <?php if ($searchtype==extlang){ echo 'selected="selected" ';}?> >Extlangs</option>
<option value="6" <?php if ($searchtype==script){ echo 'selected="selected" ';}?> >Scripts</option>
<option value="7" <?php if ($searchtype==region){ echo 'selected="selected" ';}?> >Regions</option>
<option value="8" <?php if ($searchtype==variant){ echo 'selected="selected" ';}?> >Variants</option>
<option value="9" <?php if ($searchtype==grand){ echo 'selected="selected" ';}?> >Grandfathered</option>
<option value="10" <?php if ($searchtype==redundant){ echo 'selected="selected" ';}?> >Redundant</option>
</select>
<input type="submit" value="List" name="submit"/>
</p>
<p>
<strong>Check</strong> a hyphen-separated tag <span class="explanation">See note 3.</span><br />
<input type="text" name="check" <?php if (isset($_GET['check'])) { echo "value='".$_GET['check']."'"; }?> />
<input type="submit" value="Check" name="submit"/>
</p>
<?php if ($errormsg != '') { echo $errormsg; } ?>
<?php if ($warnings != '') { echo $warnings; } ?>
<?php if ($info != '') { echo $info; } ?>
</form>
</div>
<?php
#################### OUTPUT THE RESULTS #######################################
$finalList = '';
if ($languageList != '') { echo "<h2>Language Codes "; if ($searchtype>3){ echo " (".$listcount." subtags)";} echo "</h2><div id='listLanguages'>"; echo $languageList; echo "</div>"; }
if ($extlangList != '') { $finalList .= "<h2>Extlang "; if ($searchtype>3){$finalList.=" (".$listcount." subtags)";} $finalList .= "</h2><div id='listExtlang'>$extlangList</div>"; }
if ($scriptList != '') { $finalList .= "<h2>Script Codes "; if ($searchtype>3){$finalList.=" (".$listcount." subtags)";} $finalList .= "</h2><div id='listScripts'>$scriptList</div>"; }
if ($regionsList != '') { $finalList .= "<h2>Geographic Regions "; if ($searchtype>3){$finalList.=" (".$listcount." subtags)";} $finalList .= "</h2><div id='listRegions'>$regionsList</div>"; }
if ($grandList != '') { $finalList .= "<h2>Grandfathered "; if ($searchtype>3){$finalList.=" (".$listcount." subtags)";} $finalList .= "</h2><div id='listGrand'>$grandList</div>"; }
if ($redundantList != '') { $finalList .= "<h2>Redundant "; if ($searchtype>3){$finalList.=" (".$listcount." subtags)";} $finalList .= "</h2><div id='listRedundant'>$redundantList</div>"; }
if ($variantList != '') { $finalList .= "<h2>Variants "; if ($searchtype>3){$finalList.=" (".$listcount." subtags)";} $finalList .= "</h2><div id='listVariant'>$variantList</div>"; }
if ($finalList == '') { $finalList = '<h2>No subtags found</h2>'; }
if (isset($userdefined[0])) { $finalList .= "<h2>User-defined subtags</h2><div id='userDefined'><div class='registryItem'><h3>$userdefined[0]</h3></div></div>"; }
?>
<?php echo $finalList; ?>
<div id="author">
<p>Developed by: <a href="http://www.w3.org/People/Ishida/">Richard Ishida</a>.<!-- [Translator: ___NAME, ____ORGANISATION.]--></p>
</div>
<div class="smallprint">
<p id="version" style="margin-bottom: 0;">Web app created 1 April 2007. Last update <span id="version-info"><!-- #BeginDate format:IS1m -->2009-09-10 18:42<!-- #EndDate --></span>
GMT</p>
<p class="copyright" style="margin-top: 0;">Copyright © 2007-2009 Richard Ishida.</p>
<!-- Temporary check on visitors -->
<!--?php
$subject = "IANA subtag search used: ".$searchtext;
$to = "ishida@w3.org";
$from = "ishida@w3.org";
$message = "User agent: ".$_SERVER['HTTP_USER_AGENT']."\n\n";
$message .= "Referer: ".$_SERVER['HTTP_REFERER']."\n\n";
$message .= "IP Address: ".$_SERVER['REMOTE_ADDR']."\n\n";
if ($_SERVER['REMOTE_ADDR'] != '86.5.91.129') { mail($to, $subject, $message, "From: $from"); }
?-->
</div>
<script src="http://www.google-analytics.com/urchin.js" type="text/javascript">
</script>
<script type="text/javascript">
_uacct = "UA-1938093-1";
urchinTracker();
</script>
</body>
</html>