<?php
parse_tab(<<<EOF Bases: unicode (hex) sjis jis0208 HIRAGANA 12353 0x3041 0x829F 0x2421 (note jis0208 only uses 0x21..0x7E) KATAKANA 12449 0x30A1 0x8340 0x2521
Base index and increment (lines beginning with space are comments) 000+2: a i u e o 001+2: A I U E O Input map. 0 represents a vowel (AIUEO), X its capital replacement, x its lowercase replacement. = 0: X = WH0: U x
010+2: KA KI KU KE KO = KY0: KI yx = K0: KX
011+2: GA GI GU GE GO = GY0: GI yx = G0: GX
020+2: SA SI SU SE SO = SHI: SI = SH0: SI yx = SY0: SI yx = SW0: SU x = CI: SI = S0: SX
021+2: ZA ZI ZU ZE ZO = ZY0: ZI yx = JI: ZI = JY0: ZI yx = J0: ZI yx = Z0: ZX = ZH0: ZI yx
030+2: TA TI 035+2: TU TE TO = CHI: TI = CH0: TI yx = TY0: TI yx = CY0: TI yx = TSU: TU = TS0: TU yx = TH0: TE yx = TW0: TO x = T0: TX
031+2: DA DI 036+2: DU DE DO = DY0: DI yx = DH0: DE yx = DW0: DO x = DZI: DI = D0: DX
034: tu 041+1: NA NI NU NE NO = N0: NX = NY0: NI yx
046+3: HA HI HU HE HO = HY0: HI yx = H0: HX = FY0: HU yx = FU: HU = FW0: HU x = F0: HU x
047+3: BA BI BU BE BO = BY0: BI yx = B0: BX note these use katakana! vu = U with " = VY0: vu yx = VU: vu = V0: vu x
048+3: PA PI PU PE PO = PY0: PI yx = P0: PX
061+1: MA MI MU ME MO = MY0: MI yx = M0: MX
066+2: ya yu yo 067+2: YA YU YO = YA: YA = YI: I = YU: YU = YE: I e = YO: YO
072+1: RA RI RU RE RO = RY0: RI yx = LY0: RI yx = R0: RX = L0: RX
077+1: wa WA WI WE WO (What is small wa? I don't know) = WA: WA = WI: U i = WYI: WI = WYE: WE = WE: U e = WO: WO
082: N = N: N
179: vu ka ke (katakana) 187: - (katakana prolonged voice mark) EOF );
function parse_tab($str) { global $trtab; $trtab = array(); global $uniHtab; $uniHtab = array(); $uniH = 0; global $uniKtab; $uniKtab = array(); $uniK = 0; global $sjisHtab; $sjisHtab = array(); $sjisH = 0; global $sjisKtab; $sjisKtab = array(); $sjisK = 0; global $jisHtab; $jisHtab = array(); $jisH = $jisH1 = 0; global $jisKtab; $jisKtab = array(); $jisK = $jisK1 = 0; foreach(explode("\n", $str) as $s) { if($s=="" || $s[0]==' ')continue; if(preg_match('/^HIRAGANA/i', $s)) { preg_match('/^[A-Z]* *[0-9]* *([x0-9A-F]*) *([x0-9A-F]*) *([x0-9A-F]*)/i', $s, $ma); $uniH = hexdec($ma[1]); $sjisH= hexdec($ma[2]); $jisH = hexdec($ma[3]); $jisH1 = (($jisH % 256) - 0x21) + ((int)($jisH/256) - 0x21) * 0x5E; } elseif(preg_match('/^KATAKANA/i', $s)) { preg_match('/^[A-Z]* *[0-9]* *([x0-9A-F]*) *([x0-9A-F]*) *([x0-9A-F]*)/i', $s, $ma); $uniK = hexdec($ma[1]); $sjisK= hexdec($ma[2]); $jisK = hexdec($ma[3]); $jisK1 = (($jisK % 256) - 0x21) + ((int)($jisK/256) - 0x21) * 0x5E; } elseif(preg_match('/^[0-9]/', $s)) { $tok = sscanf($s, '%d+%d'); $n = (int)$tok[0]; $mul = (int)$tok[1]; $line = preg_replace('/^[^:]*: */', '', $s); foreach(explode(" ", $line) as $tok) { $uniHtab[$tok] = $uniH + $n; $uniKtab[$tok] = $uniK + $n; $sjisHtab[$tok]= $sjisH + $n; $sjisKtab[$tok]= $sjisK + $n; $jisHtab[$tok] = $jisH1+$n; $jisKtab[$tok] = $jisK1+$n; $n += $mul; } } elseif(preg_match('/[^ ]* .*:/', $s)) { preg_match('/([^ ]*) *([^:]*): *(.*)/', $s, $ma); $pat = $ma[1]; /* Currently contains '=' */ $key = strtolower($ma[2]); $res = preg_replace('/ *\(.*/', '', $ma[3]); if(!preg_match('/0/', $key)) $trtab[$key] = $res; else foreach(array('a','i','u','e','o')as $v) { $k = str_replace('0', $v, $key); if(!isset($trtab[$k])) $trtab[$k] = str_replace('X', strtoupper($v), str_replace('x', $v, $res)); } } else { echo '<p>Unrecognized line in configuration: "', htmlspecialchars($s), '"<p>'; } } }
function unihtmldisp($c) { echo '&#', $c, ';'; } function sjisdisp($c) { echo chr($c/256), chr($c%256); } function eucjpdisp($c) { echo chr(0xA1 + (int)(($c) / 0x5E)), chr(0xA1 + ($c) % 0x5E); } function iso2022begin() { echo '$B'; } function iso2022disp($c) { echo chr(0x21 + (int)(($c) / 0x5E)), chr(0x21 + ($c) % 0x5E); } function iso2022end() { echo '(B'; }
function disptavut($tab, $iskatakana=false) { global $dump; global $uniHtab, $uniKtab, $sjisHtab, $sjisKtab, $jisHtab, $jisKtab; if($dump == 'iso-2022-jp')iso2022begin(); foreach($tab as $s) { if($s == 'yi')$s = 'i'; elseif($s == 'ye')$s = 'e'; elseif(!$uniHtab[$s]) { if($dump == 'iso-2022-jp')iso2022end(); echo '<p>Unknown syllable: "', htmlspecialchars($s), '"<p>'; if($dump == 'iso-2022-jp')iso2022start(); } if(strlen($s)) switch($dump) { case 'shift_jis': sjisdisp ($iskatakana ? $sjisKtab[$s]:$sjisHtab[$s]); break; case 'shodouka': case 'euc-jp': eucjpdisp ($iskatakana ? $jisKtab[$s] : $jisHtab[$s]); break; case 'iso-2022-jp': iso2022disp($iskatakana ? $jisKtab[$s] : $jisHtab[$s]); break; case 'text': print $s; break; default: unihtmldisp($iskatakana ? $uniKtab[$s] : $uniHtab[$s]); break; } } if($dump == 'iso-2022-jp')iso2022end(); }
function tavuta($s, $iskatakana=false) { global $trtab, $spacet; $s = strtolower($s); $s = str_replace('ô', 'ou', $s); $s = str_replace('â', 'aa', $s); $s = str_replace('ê', 'ee', $s); $s = str_replace('î', 'ii', $s); $s = str_replace('û', 'uu', $s); /* The exceptional particles */ $s = str_replace(' o ', ' wo ', $s); $s = str_replace(' wa ', ' ha ', $s); #$s = str_replace(' wa', ' ha', $s); $res = ''; $b = strlen($s); for($a=0; $a<$b; ) { /**/if(isset($trtab[substr($s, $a, 4)])) { $res .= $trtab[substr($s, $a, 4)].' '; $a += 4; continue; } elseif(isset($trtab[substr($s, $a, 3)])) { $res .= $trtab[substr($s, $a, 3)].' '; $a += 3; continue; } elseif(isset($trtab[substr($s, $a, 2)])) { $res .= $trtab[substr($s, $a, 2)].' '; $a += 2; continue; } elseif(isset($trtab[substr($s, $a, 1)])) { $res .= $trtab[substr($s, $a, 1)].' '; $a += 1; continue; } else { if($s[$a] == ' ' && strlen($res)) { if(strlen($spacet)) { if(strlen($res)) { disptavut(explode(' ', $res), $iskatakana); $res = ''; } echo '<span style="color:#40D060">', htmlspecialchars($spacet), '</span>'; } $a++; continue; } $tmp = substr( str_replace('sh', 's', str_replace('ch', 't', str_replace('cch', 'tch', str_replace('ts', 't', substr($s, $a, 3))))), 0, 2); if($tmp[0] == $tmp[1]) { $res .= 'tu '; $a++; continue; } if(strlen($res)) { disptavut(explode(' ', $res), $iskatakana); $res = ''; } $res = ''; echo '<em><span style="color:red">', htmlspecialchars($s[$a]), '</span></em>'; $a++; } } if(strlen($res))disptavut(explode(' ', $res), $iskatakana); return explode(' ', $res); }
$dump = $_GET['dump']; $src = $_GET['src']; $s = $_GET['s'];
/* MAIN START */ if(strlen($dump) && $dump != 'text' && $dump != 'shodouka') header('Content-type: text/html; charset='.$dump);
if($dump == 'shodouka') { include '/WWW/shodouka.php'; }
if($src >= 1 && $src <= 7) { echo '<html><head><title>Hiragana and katakana tables</title>', '</head>', '<body>'; echo "<small>Character encoding: ", strlen($dump) ? $dump : "default"; echo "<br>Possible choices, depending on what your browser supports: "; foreach(array('unihtml'=>'html &#codes;', 'iso-2022-jp'=>'iso-2022-jp', 'euc-jp'=>'euc-JP', 'shift_jis'=>'shift_jis', 'text'=>'Syllable codes as ascii', 'shodouka'=>'Shodouka') as $tmp=>$tmp2) echo '<a href="hiragana.php?src=', $src, '&dump=', $tmp, '">', $tmp2, '</a> '; echo '</small>'; }
switch($src) { case 1: print '<table width="100%" border=1 cellspacing=0 cellpadding=0><tr><td bgcolor=white>'; highlight_file('hiragana.php'); print '</td></tr></table>'; exit; case 2: case 3: case 4: case 5: case 6: { print '<table cellpadding=1 cellspacing=0>'; $order = " in alphabetical order"; if($src==2) { /* Resort by length */ $trtab2 = array(); foreach($trtab as $s => $s2) { if($s != 'n')while(strlen($s) < 3)$s = " $s"; $trtab2[$s] = $s2; } } elseif($src==3) { /* Not altered */ $trtab2 = $trtab; } elseif($src==4) { foreach($trtab as $s => $s2) if(preg_match('/ /', $s2) || preg_match('/^[lc][aiueo]/', $s))/* if(preg_match('/^(.[wy]|[tdw]h|[cs]h[aueo]|(ts|f)[aieo]|[jlv]|[yw][ei])/', $s))*/ unset($trtab[$s]); $trtab2 = $trtab; } elseif($src==5 || $src==6) { $trtab2 = array(); $c=100; $k = $src==5 ? array('','k','g','s','z','t','d','h','b','p','m','n','r','y') : array('','k','g','s','z','m','t','d','n','h','b','p','r','w','y'); foreach($k as $k) foreach(array('a','i','u','e','o') as $v) { $s3 = str_replace(array('hu','tu','si','zi','ti'), array('fu','tsu','shi','ji','chi'), $k.$v); if(preg_match('/ye|yi|we|wi|wu/', $s3))continue; $trtab2[str_pad('',$c--) . $s3] = $trtab[$s3]; } foreach(array('n','wa','wo','wyi','wye') as $k) $trtab2[$k] = $trtab[$k]; $order = ""; } ksort($trtab2); for($iskatakana=0; $iskatakana<=1; $iskatakana++) { echo "<tr><td colspan=10><h1>Romanized ", ($iskatakana ? "katakana" : "hiragana"), " syllables$order.", "</h1>"; if($src==5) { echo '<small>'; if($iskatakana) print '(Used in writing foreign words (English) or emphasis)'; else print '(Used in writing Japanese words)'; echo '</small>'; } if($src==3) { print '(Note that this is not a pronounciation guide for English speakers. English writing/pronounciation system is way too messed up.<br>See <a href="hiragana.php?src=7">table 7</a>, it explains better.)<p>'; } echo "</td></tr>"; /* Taulukon tulostus */ $c = 0; $rivinro = 0; foreach($trtab2 as $tavu=>$tavut) { $style = 'font-size:130%' .';padding-left:7px' .';padding-right:7px'; if($src == 5 || $src==6) { $style .= ';border-top:1px dashed #002070'; if($src == 6) { if(!($c%5))$style .= ';border-left:1px dashed #002070'; if($c == ($rivinro==4 ? 12 : 14))$style .= ';border-right:1px dashed #002070'; if($rivinro > 2)if($c >= ($rivinro==3 ? 13 : 0)) $style .= ';border-bottom:1px dashed #002070'; } else { if(!($c%5))$style .= ';border-left:1px dashed #002070'; if(($rivinro==4 && $c>=10) || ($rivinro==5 && $c>=8) || ($rivinro==6 && $c>=5) || $rivinro==7)$style .= ';border-bottom:1px dashed #002070'; if($c==($rivinro==6 ? 7 : ($rivinro==0||$rivinro==7 ? 4 : ($rivinro==4 ? 14 : 9)))) $style .= ';border-right:1px dashed #002070'; } } if(!$c)print '<tr>'; echo '<td style="', htmlspecialchars($style), '">'; if($src != 5) echo $tavu, '<br>'; else echo '<small>', trim($tavu), '</small><br>';
if($dump == 'iso-2022-jp')iso2022begin(); foreach(explode(' ', $tavut) as $s) { if($s == 'yi')$s = 'i'; elseif($s == 'ye')$s = 'e'; $kk = $iskatakana; if($s[0]=='v')$kk=0; switch($dump) { case 'shift_jis': sjisdisp ($kk ? $sjisKtab[$s]:$sjisHtab[$s]); break; case 'shodouka': case 'euc-jp': eucjpdisp ($kk ? $jisKtab[$s] : $jisHtab[$s]); break; case 'iso-2022-jp': iso2022disp($kk ? $jisKtab[$s] : $jisHtab[$s]); break; case 'text': print $s; break; default: unihtmldisp($kk ? $uniKtab[$s] : $uniHtab[$s]); break; } } if($dump == 'iso-2022-jp')iso2022end();
print '</td>'; $c++; if($src == 5) { $max = 10; if($rivinro == 0)$max = 5; elseif($rivinro == 4)$max = 15; elseif($rivinro == 6)$max = 8; } elseif($src == 6) $max = 15; else $max = 10; if($c == $max){print '</tr>';$c=0;$rivinro++;} } print '</tr>'; } print '</table>'; print '<p>No warranty. Data may be incorrect. If you detect errors, you are encouraged to fix them. See the <a href="hiragana.php?src=1">source code</a>.'; print '<p>Written by <a href="mailto:bisqwit@iki.fi">Bisqwit</a> (<a href="https://iki.fi/bisqwit/">https://iki.fi/bisqwit/</a>).'; echo '</body></html>'; if($dump == 'shodouka')ShodoukaEnd(); exit; } case 7: { function tab7val($s) { return strpos ('A I U E O N a i u e o KA GA KI GI KU GU KE GE KO GO SA ZA SI ZI SU ZU SE ZE SO ZO TA DA TI DI TU DU TE DE TO DO NA NI NU NE NO HA BA PA HI BI PI HU BU PU HE BE PE HO BO PO MA MI MU ME MO RA RI RU RE RO WA WI WU WE WO YA YI YU YE YO ya yi yu ye yo vu ', $s); } function tab7sorter($a, $b) { $aa = explode(' ', $a); $bb = explode(' ', $b); $m = count($aa); if(count($bb) > $m)$m = count($bb); for($c=0; $c<$m; $c++) { $av = tab7val($aa[$c]); $bv = tab7val($bb[$c]); if($av < $bv)return -1; if($av > $bv)return 1; } return 0; } for($iskatakana=0; $iskatakana<=1; $iskatakana++) { $trtab2 = Array(); foreach($trtab as $r=>$k) { if(!$iskatakana) { if(false || preg_match('/^.w/', $r) || preg_match('/^c[^h]/', $r) || preg_match('/^[^sc]h/', $r) || preg_match('/^ts[^u]/', $r) || preg_match('/^dy[ou]/', $r) || $r == 'yi' || preg_match('/ y[ei]/', $k) || preg_match('/ [aiueo]/', $k) || preg_match('/^v/', $r) ) continue; } $trtab2[$k][] = $r; } $trtab2['DI'][] = 'ji'; $trtab2['DU'][] = 'zu'; uksort($trtab2, tab7sorter); print '<h1>'; print $iskatakana ? 'Katakana (used in foreign words)' : 'Hiragana (used in native words)'; print ' - different ways to romanize.</h1>'; print '<small>Note that this is not a pronounciation guide for English speakers.</small>'; $rivit = Array(); foreach($trtab2 as $tavut => $r) { ob_start(); print '<td align=left>'; if($dump == 'iso-2022-jp')iso2022begin(); foreach(explode(' ', $tavut) as $s) { if($s == 'yi')$s = 'i'; elseif($s == 'ye')$s = 'e'; $kk = $iskatakana; if($s[0]=='v')$kk=0; switch($dump) { case 'shift_jis': sjisdisp ($kk ? $sjisKtab[$s]:$sjisHtab[$s]); break; case 'shodouka': case 'euc-jp': eucjpdisp ($kk ? $jisKtab[$s] : $jisHtab[$s]); break; case 'iso-2022-jp': iso2022disp($kk ? $jisKtab[$s] : $jisHtab[$s]); break; case 'text': print $s; break; default: unihtmldisp($kk ? $uniKtab[$s] : $uniHtab[$s]); break; } } if($dump == 'iso-2022-jp')iso2022end(); print '</td>'; sort($r); if(preg_match('/^[ld]/', $r[0]) && preg_match('/^[rzj]/', $r[1]) && count($r)==2)$r=Array($r[1],$r[0]); for($c=0;$c<4;$c++) echo '<td>', $r[$c], ' </td>'; $rivi = ob_get_contents(); ob_end_clean(); $rivit[] = $rivi; } $sarac = 6; $add = count($rivit)/$sarac; print '<table cellpadding=1 cellspacing=0 width="100%">'; $lev = 100/$sarac; for($c=0; $c<=$add; $c++) { if(!$c) { print '<tr>'; for($s=0;$s<$sarac;$s++)echo '<td colspan=6 width="',$lev,'%"> </td>'; print '</tr>'; } print '<tr>'; for($s=0; $s<$sarac; $s++) { $r = (int)($c + ($add)*$s); $r2 = (int)( ($add)*($s+1)); if($c+1 <= $add || $r2 > $r) print $rivit[$r]; else print '<td></td><td></td><td></td><td></td><td></td>'; print '<td width=50> </td>'; } print '</tr>'; } print '</table>'; } print '<p>No warranty. Data may be incorrect. If you detect errors, you are encouraged to fix them. See the <a href="hiragana.php?src=1">source code</a>.'; print '<p>Written by <a href="mailto:bisqwit@iki.fi">Bisqwit</a> (<a href="https://iki.fi/bisqwit/">https://iki.fi/bisqwit/</a>).'; print '<p>See also: <a href="confuse.php">confusing hiragana and katakana</a>'; echo '</body></html>'; if($dump == 'shodouka')ShodoukaEnd(); exit; } }
include '/WWW/headers.php';
headers('Romaji to hiragana converter'); print '<H1>Romaji to hiragana converter</H1>'; NavBar('');
echo "Note: Type words 'wa' and 'o' as 'ha' and 'wo', because that's how they are written in japanese.<br>", '<form method=GET action="hiragana.php">', 'Romaji: <input type=text name=s value="', htmlspecialchars($s), '"><br>', 'Word separator: <input type=text size=6 name=spacet value="',htmlspecialchars($spacet), '"><br>', 'Encoding: <select name=dump>'; foreach(array('unihtml'=>'html &#codes;', 'iso-2022-jp'=>'iso-2022-jp', 'euc-jp'=>'euc-JP', 'shift_jis'=>'shift_jis', 'text'=>'Syllable codes as ascii', 'shodouka'=>'<a href="http://web.lfw.org/shodouka/">Shodouka</a>' ) as $tmp=>$tmp2) { echo '<option value="', $tmp, '"'; if($tmp == $dump)echo ' selected'; echo '>', $tmp2, '</option>'; } echo '</select><br>', '<input type=submit value="Convert">', '</form>';
echo "<br>If your text is ENGLISH, try <a href=\"https://bisqwit.iki.fi/source/deeng.html?engtext=". htmlspecialchars(urlencode($s))."\">english to romaji converter</a>.<br> If your text is otherwise NOT standard romaji (but Finnish for example), use <a href=\"japthing.php?s=",htmlspecialchars(urlencode($s)), "\">older converter</a> instead.<br> Those two also offer katakana.<br> If you want kanji! Use <a href=\"https://bisqwit.iki.fi/japtools/\">the dictionary</a> instead. <p> ";
if(strlen($s)) { echo '<p style="font-size:130%">', htmlspecialchars($s), ": (up:hiragana; down:katakana)<br> <br>\n"; print '<span style="font-size:250%">'; $tmp = tavuta($s, false); print '</span>'; echo '<hr>'; print '<span style="font-size:250%">'; $tmp = tavuta($s, true); print '</span>'; echo '</p>'; ob_start(); foreach($tmp as $s) { if($s == 'yi') $s = 'i'; elseif($s == 'ye') $s = 'e'; if($s) eucjpdisp($jisHtab[$s]); } $s = @iconv('euc-jp', 'utf-8', ob_get_contents()); ob_end_clean(); $url = 'http://babelfish.altavista.com/babelfish/tr?lp=ja_en'; $url .= '&tt=urltext&doit=done&intl=1&urltext='.urlencode($s); echo '<a href="', htmlspecialchars($url), '">', 'Translate using Babelfish', '</a>'; }
?> <p><small>Written by Bisqwit (<a href="https://iki.fi/bisqwit/">https://iki.fi/bisqwit/</a>)</small>
<p><b> <a href="hiragana.php?src=2">Syllable list</a> is also available. It lists all the katakana/hiragana syllables.</b><br> It is available <a href="hiragana.php?src=3">sorted another way</a> too. A <a href="hiragana.php?src=4">shorter list</a> is there too. Some <a href="hiragana.php?src=5">other</a> <a href="hiragana.php?src=6">versions</a> also... </p>
<? Epilogue();
?>
Disclaimer: The accuracy of the operation of this converter is strictly limited to the progress of my Japanese studies.
<p> <em>Errors, if you find, report to <a href="http://bisqwit.stc.cx/">me</a> via <a href="mailto:bisqwit@iki.fi">email</a>.</em>
<p><a href="hiragana.php?src=1">Source code</a> is available (php). It might interest you, as it is short (~340 lines (well, once was)), but contains hiragana encoding in iso-2022-jp, euc-jp, shift_jis and unihtml (without external programs), plus a quite elegant parsed table for romaji to hiragana conversion. It could be easily extended to support different romajizations, and has a theoretic support for katakana. <p> <br><a href="http://bisqwit.stc.cx/%7Emog/hiragana.gif">These</a> <a href="http://bisqwit.stc.cx/%7Emog/katakana.gif">images</a> could also be helpful. <? Footers();
if($dump == 'shodouka')ShodoukaEnd();
/* MAIN END */
|