/* From old Koi-8 to Unicode */
long oldkoi8tou[128] = {
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
};
/* From CP866 to Unicode */
long cp866tou[128] = {
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,
0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,
0x2555,0x2563,0x2551,0x2557,0x255d,0x255c,0x255b,0x2510,
0x2514,0x2534,0x252c,0x251c,0x2500,0x253c,0x255e,0x255f,
0x255a,0x2554,0x2569,0x2566,0x2560,0x2550,0x256c,0x2567,
0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256b,
0x256a,0x2518,0x250c,0x2588,0x2584,0x258c,0x2590,0x2580,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x044f,
0x0401,0x0451,0x0404,0x0454,0x0407,0x0457,0x040e,0x045e,
0x00b0,0x2022,0x00b7,0x221a,0x2116,0x00a4,0x25a0, -1
};
/* From CP1251 to Unicode */
long cp1251tou[128] = {
0x0402,0x0403,0x201a,0x0453,0x201e,0x2026,0x2020,0x2021,
-1,0x2030,0x0409,0x2039,0x040a,0x040c,0x040b,0x040f,
0x0452,0x2018,0x2019,0x201c,0x201d,0x2022,0x2013,0x2014,
-1,0x2122,0x0459,0x203a,0x045a,0x045c,0x045b,0x045f,
0x00a0,0x040e,0x045e,0x0408,0x00a4,0x0490,0x00a6,0x00a7,
0x0401,0x00a9,0x0404,0x00ab,0x00ac,0x00ad,0x00ae,0x0407,
0x00b0,0x00b1,0x0406,0x0456,0x0491,0x00b5,0x00b6,0x00b7,
0x0451,0x2116,0x0454,0x00bb,0x0458,0x0405,0x0455,0x0457,
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x044f,
};
/* From Mac to Unicode */
long mactou[128] = {
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,
0x2020,0x00b0,0x0490,0x00a3,0x00a7,0x2022,0x00b6,0x0406,
0x00ae,0x00a9,0x2122,0x0402,0x0452,0x2260,0x0403,0x0453,
0x221e,0x00b1,0x2264,0x2265,0x0456,0x03bc,0x0491,0x0408,
0x0404,0x0454,0x0407,0x0457,0x0409,0x0459,0x040a,0x045a,
0x0458,0x0405,0x00ac,0x221a,0x0192,0x2248,0x0394,0x00ab,
0x00bb,0x2026,0x0020,0x040b,0x045b,0x040c,0x045c,0x0455,
0x00b0,0x00b1,0x0406,0x0456,0x0491,0x00b5,0x00b6,0x00b7,
0x040e,0x045e,0x040f,0x045f,0x2116,0x0401,0x0451,0x044f,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x00a4,
};
/* From Alternativnyj Variant to Unicode */
long avtou[128] = {
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,
0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,
0x2555,0x2563,0x2551,0x2557,0x255d,0x255c,0x255b,0x2510,
0x2514,0x2534,0x252c,0x251c,0x2500,0x253c,0x255e,0x255f,
0x255a,0x2554,0x2569,0x2566,0x2560,0x2550,0x256c,0x2567,
0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256b,
0x256a,0x2518,0x250c,0x2588,0x2584,0x258c,0x2590,0x2580,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x044f,
0x0401,0x0451,0x0317,0x0316,0x0301,0x0300,0x2192,0x2190,
0x2193,0x2191,0x00f7,0x00b1,0x2116,0x00a4,0x25a0, -1
};
/* The interpretation of the four symbols following the second
alphabetic block in AV remains unclear. One suggestion was to treat
these as (non-spacing) grave and acute, as appearing above upper- or
lowercase letters, but the graphical rendering in Briabin's original
article makes clear that the distinction is between acute and grave,
above or below the letter: this is what the table now has.
But the preponderance of graphical symbols in AV suggests that the
intention was to provide facilities for character graphics, in which
case the interpretation is simply straight lines connecting two
adjacent midpoints of the boinding box. If the box is the unit
square, these would run from (.5,0) to (0,.5) and to (1,.5), and from
(.5,1) to (0,.5) and to (1,.5), in this order. (The line segments are
of course directionless.) Such symbols are not present in Unicode --
the closest things are 0x25de 0x25df 0x25dc 0x25dd (in this order) but
these are curved, not straight.
Whether the graphics or the accent usage is more prevalent in actual
usage only those plugged into the Russian PC community can tell. If
the graphics usage turns out to be prevalent, these four symbols would
be reasonable candidates for incorporation into Unicode, perhaps at
positions 0x25ef to 0x25f3. */
/* From Osnovnoj Variant to Unicode */
long ovtou[128] = {
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2,
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x044f,
0x0401,0x0451,0x0317,0x0316,0x0301,0x0300,0x2192,0x2190,
0x2193,0x2191,0x00f7,0x00b1,0x2116,0x00a4,0x25a0, -1
};
/* The same problem with the interpretation of 242-245 as in AV (these
rows are definitely identical). The low positions of OV are probably
identical to 176-223 in AV... */
/* From ISO8859-5 to Unicode */
long newkoi8tou[128] = {
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
0x00a0,0x0401,0x0402,0x0403,0x0404,0x0405,0x0406,0x0407,
0x0408,0x0409,0x040a,0x040b,0x040c,0x00ad,0x040e,0x040f,
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,0x041f,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042a,0x042b,0x042c,0x042d,0x042e,0x042f,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,0x043f,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044a,0x044b,0x044c,0x044d,0x044e,0x044f,
0x2116,0x0451,0x0452,0x0453,0x0454,0x0455,0x0456,0x0457,
0x0458,0x0459,0x045a,0x00a7,0x045c,0x045d,0x045e,0x045f
};
/* Use newkoi8tou in combination with isotoibm to derive the unicode
meaning of the Cyrillic range in the DKOI extension of EBCDIC. If
someone has DKOI-8 text available, I'd love to actually try... */
| Top of page |
ACKNOWLEDGEMENTS
Most of the information was provided by the following:
David J. Birnbaum at djbpitt+@pitt.edu
Bur Davis at bdavis@adobe.com
George Fowler at gfowler@ucs.indiana.edu
Richard B. Paine at RPAINE@CCNODE.Colorado.EDU
Slava Paperno at PAPY@CORNELLA.cit.cornell.edu
Glenn E. Thobe at thobe@getunx.info.com
Dimitri Vulis at DLV@CUNYVMS1.BITNET
Johan W. van Wingen (acknowledged in Dimitri Vulis' posting, but
no netaddress)
Thanks to all who contributed -- I am responsible for the errors that
still remain.
Andras Kornai (
andras@calera.com,
kornai@csli.stanford.edu)