diff options
Diffstat (limited to 'util/bin')
-rwxr-xr-x | util/bin/ukrepl | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/util/bin/ukrepl b/util/bin/ukrepl new file mode 100755 index 00000000..9b084027 --- /dev/null +++ b/util/bin/ukrepl @@ -0,0 +1,109 @@ +#!/usr/bin/python2 +# -*- coding: utf-8 -*- +import sys + +wont_change = { ' ' : ' ' , + '\n' : '\n' + } +def fixed_width_replace(char): #f + if char in wont_change: return unicode(char) + else: + try: + if not 32 < ord(char) < 126: raise Exception("not in range") + return unichr(0xFF00 + ord(char)-32) + except: + return char + +cyrillic_dict = { + 'A' : u'А', 'a' : 'а','Ä' : u'Ӓ', 'ä' : u'ӓ', + 'B' : u'В', + 'c' : u'с', + 'E' : u'Е', + 'e' : u'е', + 'H' : u'Н', + 'I' : u'І', 'i' : u'і', + 'j' : u'ј','J' : u'Ј', + 'K' : u'К', + 'M' : u'М', + 'O' : u'О', 'o' : u'о', 'Ö' : u'Ӧ', 'ö' : u'ӧ', + 'P' : u'Р', 'p' : u'р', + 'S' : u'Ѕ', + 'T' : u'г' + } + +def cyrillic_replace(char): #c + return cyrillic_dict.get(char,char) +historic_latin_dict = { + 'B' : u'Ɓ', + 'b' : u'ƅ', + 'u' : u'ư', + 'U' : u'Ư', + '' : 'Ǟ', + #'5' : 'ƽ', + 'o' : 'ơ', + 'O' : 'Ơ', + '5' : 'Ƽ' + } +def historic_latin(char): #H + return historic_latin_dict.get(char,char) +punctuation_dict = { + '!' : u'ǃ', + '\'': u'’', + '\"': u'ˮ', + '(' : u'⟨', + ')' : u'⟩', + ':' : u'ː', + ' ' : u' ', + # all different spaces, made for perfect trolling + #' ' : u' ', + #' ' : u' ', + #' ' : u'', + #'-' : u'‒', + #'-' : u'—', + #'-' : u'―', + #'-' : u'‐', + #'-' : u'⁃', + '-' : u'–', + '_' : u'−', + '~' : u'⁓', + #'~' : u'∼', + #'~' : u'〜', + + } +def punctuation(char): #p + return punctuation_dict.get(char,char) +def helpme(): + print "usage %s [modes]" % sys.argv[0] + print "modes:" + print " c -- cyrillic replace" + print " f -- fixed width" + print " p -- replace punctuation" + print " H -- replace with historic latin chars" + print " h -- this message" + sys.exit(0) + +#parsing happens inside the nested loop +modes = ''.join(sys.argv[1:]) +# sane defaults if no mode given +if not modes : modes = "f" + +if 'h' in modes: helpme() + +for line in sys.stdin: + for char in line: + for mode in modes: + if mode is 'c': + char = cyrillic_replace(char) + elif mode is 'f': + char = fixed_width_replace(char) + elif mode is 'H': + char = historic_latin(char) + elif mode is 'p': + char = punctuation(char) + else: + print "unknown mode %c" % mode + helpme() + try: + sys.stdout.write(char) + except: + sys.stdout.write(char.encode("utf-8")) |