0

cyrillic_convert.py Python3 refactor

This commit is contained in:
Ben Steadman
2019-03-21 20:11:38 +00:00
parent 7a2b0e6061
commit 4e3a861c9c
3 changed files with 95 additions and 82 deletions

View File

@@ -1,92 +1,93 @@
#!/usr/bin/python #!/usr/bin/python3
"""Convert gibberish back into Cyrillic""" """Convert gibberish back into Cyrillic"""
import fileinput import fileinput
import os import argparse
import sys import sys
usage = """
If you have signs that should be Cyrillic, but are instead gibberish,
this script will convert it back to proper Cyrillic.
usage: python %(script)s <markers.js>
ex. python %(script)s C:\\Inetpub\\www\\map\\markers.js
or %(script)s /srv/http/map/markers.js
""" % {'script': os.path.basename(sys.argv[0])}
if len(sys.argv) < 2:
sys.exit(usage)
gibberish_to_cyrillic = { gibberish_to_cyrillic = {
r"\u00c0": r"\u0410", 'À': 'А',
r"\u00c1": r"\u0411", 'Á': 'Б',
r"\u00c2": r"\u0412", 'Â': 'В',
r"\u00c3": r"\u0413", 'Ã': 'Г',
r"\u00c4": r"\u0414", 'Ä': 'Д',
r"\u00c5": r"\u0415", 'Å': 'Е',
r"\u00c6": r"\u0416", 'Æ': 'Ж',
r"\u00c7": r"\u0417", 'Ç': 'З',
r"\u00c8": r"\u0418", 'È': 'И',
r"\u00c9": r"\u0419", 'É': 'Й',
r"\u00ca": r"\u041a", 'Ê': 'К',
r"\u00cb": r"\u041b", 'Ë': 'Л',
r"\u00cc": r"\u041c", 'Ì': 'М',
r"\u00cd": r"\u041d", 'Í': 'Н',
r"\u00ce": r"\u041e", 'Î': 'О',
r"\u00cf": r"\u041f", 'Ï': 'П',
r"\u00d0": r"\u0420", 'Ð': 'Р',
r"\u00d1": r"\u0421", 'Ñ': 'С',
r"\u00d2": r"\u0422", 'Ò': 'Т',
r"\u00d3": r"\u0423", 'Ó': 'У',
r"\u00d4": r"\u0424", 'Ô': 'Ф',
r"\u00d5": r"\u0425", 'Õ': 'Х',
r"\u00d6": r"\u0426", 'Ö': 'Ц',
r"\u00d7": r"\u0427", '×': 'Ч',
r"\u00d8": r"\u0428", 'Ø': 'Ш',
r"\u00d9": r"\u0429", 'Ù': 'Щ',
r"\u00da": r"\u042a", 'Ú': 'Ъ',
r"\u00db": r"\u042b", 'Û': 'Ы',
r"\u00dc": r"\u042c", 'Ü': 'Ь',
r"\u00dd": r"\u042d", 'Ý': 'Э',
r"\u00de": r"\u042e", 'Þ': 'Ю',
r"\u00df": r"\u042f", 'ß': 'Я',
r"\u00e0": r"\u0430", 'à': 'а',
r"\u00e1": r"\u0431", 'á': 'б',
r"\u00e2": r"\u0432", 'â': 'в',
r"\u00e3": r"\u0433", 'ã': 'г',
r"\u00e4": r"\u0434", 'ä': 'д',
r"\u00e5": r"\u0435", 'å': 'е',
r"\u00e6": r"\u0436", 'æ': 'ж',
r"\u00e7": r"\u0437", 'ç': 'з',
r"\u00e8": r"\u0438", 'è': 'и',
r"\u00e9": r"\u0439", 'é': 'й',
r"\u00ea": r"\u043a", 'ê': 'к',
r"\u00eb": r"\u043b", 'ë': 'л',
r"\u00ec": r"\u043c", 'ì': 'м',
r"\u00ed": r"\u043d", 'í': 'н',
r"\u00ee": r"\u043e", 'î': 'о',
r"\u00ef": r"\u043f", 'ï': 'п',
r"\u00f0": r"\u0440", 'ð': 'р',
r"\u00f1": r"\u0441", 'ñ': 'с',
r"\u00f2": r"\u0442", 'ò': 'т',
r"\u00f3": r"\u0443", 'ó': 'у',
r"\u00f4": r"\u0444", 'ô': 'ф',
r"\u00f5": r"\u0445", 'õ': 'х',
r"\u00f6": r"\u0446", 'ö': 'ц',
r"\u00f7": r"\u0447", '÷': 'ч',
r"\u00f8": r"\u0448", 'ø': 'ш',
r"\u00f9": r"\u0449", 'ù': 'щ',
r"\u00fa": r"\u044a", 'ú': 'ъ',
r"\u00fb": r"\u044b", 'û': 'ы',
r"\u00fc": r"\u044c", 'ü': 'ь',
r"\u00fd": r"\u044d", 'ý': 'э',
r"\u00fe": r"\u044e", 'þ': 'ю',
r"\u00ff": r"\u044f" 'ÿ': 'я'
} }
trans_table = {ord(k): v for k, v in gibberish_to_cyrillic.items()}
for line in fileinput.FileInput(inplace=1):
for i, j in gibberish_to_cyrillic.iteritems():
line = line.replace(i, j)
sys.stdout.write(line)
def convert(s):
return s.translate(trans_table)
if __name__ == '__main__':
description = """
If you have signs that should be Cyrillic, but are instead gibberish,
this script will convert it back to proper Cyrillic.
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument('file', metavar='markers.js')
args = parser.parse_args()
convert(args.markers_file)
for line in fileinput.input(files=markers_file, inplace=1):
print(convert(s), end='')

View File

@@ -16,6 +16,7 @@ from test_settings import SettingsTest
from test_tileset import TilesetTest from test_tileset import TilesetTest
from test_cache import TestLRU from test_cache import TestLRU
from test_contributors import TestContributors from test_contributors import TestContributors
from test_cyrillic_convert import TestCyrillicConvert
# DISABLE THIS BLOCK TO GET LOG OUTPUT FROM TILESET FOR DEBUGGING # DISABLE THIS BLOCK TO GET LOG OUTPUT FROM TILESET FOR DEBUGGING
if 0: if 0:

View File

@@ -0,0 +1,11 @@
import unittest
import tempfile
from contrib.cyrillic_convert import convert
class TestCyrillicConvert(unittest.TestCase):
def test_convert(self):
gibberish = '{chunk: [-2, 0],y: 65,msg: "ðåëèãèè",x: -20,z: 4,type: "sign"}'
cyrillic = '{chunk: [-2, 0],y: 65,msg: "религии",x: -20,z: 4,type: "sign"}'
self.assertEqual(convert(gibberish), cyrillic)