Python convert files encoding to UTF8


Usage:

1
python conv.py /path/to/folder

Code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
# -*- coding: utf-8 -*-.
#
# Author: Zeuxis.Lo
#

import sys
import os

class Conv(object):
def __init__(self):
if len(sys.argv) < 2:
print("Please enter path")
sys.exit(0)
else:
self.folder = sys.argv[1]

def run(self):
for f in os.listdir(self.folder):
full_path = os.path.join(self.folder, f)
if os.path.isfile(full_path) and full_path.endswith((".cgi", ".txt")):
self.convert(full_path)

def convert(self, file_path):
print("File Path: {0}".format(file_path))

encodings = ('windows-1253', 'iso-8859-7', 'cp950', 'big5')
file_content = open(file_path, 'r').read()
decoded_content = None

for enconding_name in encodings:
try:
decoded_content = file_content.decode(enconding_name)
print("--> encode {0}".format(enconding_name))
break
except:
if enconding_name == encodings[-1]:
print("--> encode Fail")

if decoded_content:
f = open(file_path, 'w')
try:
f.write(decoded_content.encode("UTF-8"))
print("--> convert OK")
except Exception, e:
print("--> convert Fail")
finally:
f.close()

if __name__ == "__main__":
Conv().run()