# This file es charset2otp.py # # (c) 2004. Javier Bezos. License: LPPL. # # This file creates otp/ocp files for the charset # mechanism of the Mem package. import sys, os def writeenc(enc, name=None, begin=None, end=None): if name == None: name = enc if begin == None: begin = 0x80 if end == None: end = 0xFF s = '' print 'Creating', name for i in range(begin, end + 1): x = unicode(chr(i), enc, 'replace') if not (i % 8): s = s.rstrip() + '\n' s += '@"%04X, ' % ord(x) #end of = open('%s.mtp' % name, 'w') of.write('%% This file is %s.mtp\n' '%%\n' '%% (c) 2004 Javier Bezos. License: LPPL\n' '%% Please, send bug reports and comments to:\n' '%% jbezos at wanadoo dot es\n' '%%\n' '%% It has been generated with the script charset2otp.py\n' '%% and the built-in Unicode data in Python version:\n' '%% %s\n\n' % (name, sys.version)) of.write('input: 1;\n' 'output: 2;\n\n') if end != 0: of.write('tables:\n\n' '%s[@"%X] = {%s};\n\n' % (name, end-begin+1, s[:-2])) #end of.write('states: utf8;\n\n' 'expressions:\n\n' '%% Built-in utf-8 parsing, so that we can escape to it,\n' '%% as described below.\n\n' ' @"1B => ;\n' ' @"00-@"7F => \\1;\n' ' (@"C0-@"DF)(@"80-@"BF)\n' ' => #(((\\1-@"C0)*@"40) + (\\2-@"80));\n' ' (@"E0-@"EF)(@"80-@"BF)(@"80-@"BF)\n' ' => #(((\\1-@"E0)*@"1000) + ((\\2-@"80)*@"40) + (\\3-@"80));\n' ' . => @"FFFD;\n\n' '%% Use ("1B) to mark the beginning of a Unicode text\n' '%% thus escaping from the current encoding. 00 means a\n' '%% uft-16 code, 01 a utf-16 text, 02 a utf-8 text. \n' '%% ends the block, too. Note we can be sure "1B is not\n' '%% present in a utf-8 string, but that does not hold for\n' '%% utf-16.\n\n' '@"1B @"00 ^(@"1B)<1,> @"1B => "\\UseMemUnichar{" \\(*+2-1) "}";\n' '@"1B @"01 ^(@"1B)<1,> @"1B => \\(*+2-1);\n' '@"1B @"02 => ;\n\n') if end != 0: of.write('@"%X-@"%X => #(%s[\\1 - @"%X]);\n' % (begin, end, name, begin)) #end of.write('. => \\1;\n') of.close() os.system('python ./mtp2ocp.py %s' % name) #os.remove('@%s.otp' % name) #end ##### MAL cuando end = FF writeenc('cp1251') writeenc('cp1252', end = 0x9F) writeenc('cp1253') writeenc('cp1256') writeenc('cp1257') writeenc('mac_roman', name = 'macstd') writeenc('iso8859_1', name = 'isolat1', end = 0) writeenc('iso8859_2', name = 'isolat2', begin = 0xA0) # writeenc('iso8859_3', name = 'isolat3', begin = 0xA0) # Deprecated writeenc('iso8859_4', name = 'isolat4', begin = 0xA0) writeenc('iso8859_5', name = 'isocyr', begin = 0xA0) writeenc('iso8859_6', name = 'isoara', begin = 0xA0) writeenc('iso8859_7', name = 'isoell', begin = 0xA0) writeenc('koi8_r', name = 'koi8ru', begin = 0xA0) writeenc('koi8_u', name = 'koi8uk', begin = 0xA0) # writeenc('ascii') # Special case # writeenc('asmo') # Not available in Python