0

cyrillic_convert.py Python3 refactor

This commit is contained in:
Ben Steadman
2019-03-21 20:11:38 +00:00
parent 7a2b0e6061
commit 4e3a861c9c
3 changed files with 95 additions and 82 deletions

View File

@@ -1,92 +1,93 @@
#!/usr/bin/python #!/usr/bin/python3
"""Convert gibberish back into Cyrillic""" """Convert gibberish back into Cyrillic"""
import fileinput import fileinput
import os import argparse
import sys import sys
usage = """ gibberish_to_cyrillic = {
'À': 'А',
'Á': 'Б',
'Â': 'В',
'Ã': 'Г',
'Ä': 'Д',
'Å': 'Е',
'Æ': 'Ж',
'Ç': 'З',
'È': 'И',
'É': 'Й',
'Ê': 'К',
'Ë': 'Л',
'Ì': 'М',
'Í': 'Н',
'Î': 'О',
'Ï': 'П',
'Ð': 'Р',
'Ñ': 'С',
'Ò': 'Т',
'Ó': 'У',
'Ô': 'Ф',
'Õ': 'Х',
'Ö': 'Ц',
'×': 'Ч',
'Ø': 'Ш',
'Ù': 'Щ',
'Ú': 'Ъ',
'Û': 'Ы',
'Ü': 'Ь',
'Ý': 'Э',
'Þ': 'Ю',
'ß': 'Я',
'à': 'а',
'á': 'б',
'â': 'в',
'ã': 'г',
'ä': 'д',
'å': 'е',
'æ': 'ж',
'ç': 'з',
'è': 'и',
'é': 'й',
'ê': 'к',
'ë': 'л',
'ì': 'м',
'í': 'н',
'î': 'о',
'ï': 'п',
'ð': 'р',
'ñ': 'с',
'ò': 'т',
'ó': 'у',
'ô': 'ф',
'õ': 'х',
'ö': 'ц',
'÷': 'ч',
'ø': 'ш',
'ù': 'щ',
'ú': 'ъ',
'û': 'ы',
'ü': 'ь',
'ý': 'э',
'þ': 'ю',
'ÿ': 'я'
}
trans_table = {ord(k): v for k, v in gibberish_to_cyrillic.items()}
def convert(s):
return s.translate(trans_table)
if __name__ == '__main__':
description = """
If you have signs that should be Cyrillic, but are instead gibberish, If you have signs that should be Cyrillic, but are instead gibberish,
this script will convert it back to proper Cyrillic. this script will convert it back to proper Cyrillic.
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument('file', metavar='markers.js')
usage: python %(script)s <markers.js> args = parser.parse_args()
ex. python %(script)s C:\\Inetpub\\www\\map\\markers.js convert(args.markers_file)
or %(script)s /srv/http/map/markers.js for line in fileinput.input(files=markers_file, inplace=1):
""" % {'script': os.path.basename(sys.argv[0])} print(convert(s), end='')
if len(sys.argv) < 2:
sys.exit(usage)
gibberish_to_cyrillic = {
r"\u00c0": r"\u0410",
r"\u00c1": r"\u0411",
r"\u00c2": r"\u0412",
r"\u00c3": r"\u0413",
r"\u00c4": r"\u0414",
r"\u00c5": r"\u0415",
r"\u00c6": r"\u0416",
r"\u00c7": r"\u0417",
r"\u00c8": r"\u0418",
r"\u00c9": r"\u0419",
r"\u00ca": r"\u041a",
r"\u00cb": r"\u041b",
r"\u00cc": r"\u041c",
r"\u00cd": r"\u041d",
r"\u00ce": r"\u041e",
r"\u00cf": r"\u041f",
r"\u00d0": r"\u0420",
r"\u00d1": r"\u0421",
r"\u00d2": r"\u0422",
r"\u00d3": r"\u0423",
r"\u00d4": r"\u0424",
r"\u00d5": r"\u0425",
r"\u00d6": r"\u0426",
r"\u00d7": r"\u0427",
r"\u00d8": r"\u0428",
r"\u00d9": r"\u0429",
r"\u00da": r"\u042a",
r"\u00db": r"\u042b",
r"\u00dc": r"\u042c",
r"\u00dd": r"\u042d",
r"\u00de": r"\u042e",
r"\u00df": r"\u042f",
r"\u00e0": r"\u0430",
r"\u00e1": r"\u0431",
r"\u00e2": r"\u0432",
r"\u00e3": r"\u0433",
r"\u00e4": r"\u0434",
r"\u00e5": r"\u0435",
r"\u00e6": r"\u0436",
r"\u00e7": r"\u0437",
r"\u00e8": r"\u0438",
r"\u00e9": r"\u0439",
r"\u00ea": r"\u043a",
r"\u00eb": r"\u043b",
r"\u00ec": r"\u043c",
r"\u00ed": r"\u043d",
r"\u00ee": r"\u043e",
r"\u00ef": r"\u043f",
r"\u00f0": r"\u0440",
r"\u00f1": r"\u0441",
r"\u00f2": r"\u0442",
r"\u00f3": r"\u0443",
r"\u00f4": r"\u0444",
r"\u00f5": r"\u0445",
r"\u00f6": r"\u0446",
r"\u00f7": r"\u0447",
r"\u00f8": r"\u0448",
r"\u00f9": r"\u0449",
r"\u00fa": r"\u044a",
r"\u00fb": r"\u044b",
r"\u00fc": r"\u044c",
r"\u00fd": r"\u044d",
r"\u00fe": r"\u044e",
r"\u00ff": r"\u044f"
}
for line in fileinput.FileInput(inplace=1):
for i, j in gibberish_to_cyrillic.iteritems():
line = line.replace(i, j)
sys.stdout.write(line)

View File

@@ -16,6 +16,7 @@ from test_settings import SettingsTest
from test_tileset import TilesetTest from test_tileset import TilesetTest
from test_cache import TestLRU from test_cache import TestLRU
from test_contributors import TestContributors from test_contributors import TestContributors
from test_cyrillic_convert import TestCyrillicConvert
# DISABLE THIS BLOCK TO GET LOG OUTPUT FROM TILESET FOR DEBUGGING # DISABLE THIS BLOCK TO GET LOG OUTPUT FROM TILESET FOR DEBUGGING
if 0: if 0:

View File

@@ -0,0 +1,11 @@
import unittest
import tempfile
from contrib.cyrillic_convert import convert
class TestCyrillicConvert(unittest.TestCase):
def test_convert(self):
gibberish = '{chunk: [-2, 0],y: 65,msg: "ðåëèãèè",x: -20,z: 4,type: "sign"}'
cyrillic = '{chunk: [-2, 0],y: 65,msg: "религии",x: -20,z: 4,type: "sign"}'
self.assertEqual(convert(gibberish), cyrillic)