-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslate.py
More file actions
106 lines (86 loc) · 4.56 KB
/
translate.py
File metadata and controls
106 lines (86 loc) · 4.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import click
from lxml import etree
import xlrd, xlwt, mmap
from xml.dom import minidom
nsmap = {"xml": "http://www.w3.org/XML/1998/namespace"}
def XLSDictReader(f, sheet_index=0):
book = xlrd.open_workbook(file_contents=mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ))
sheet = book.sheet_by_index(sheet_index)
headers = dict( (i, sheet.cell_value(0, i) ) for i in range(sheet.ncols) )
return ( dict( (headers[j], sheet.cell_value(i, j)) for j in headers ) for i in range(1, sheet.nrows) )
def get_text(element):
if element is None: return ""
return element.text
# Thanks to https://infix.se/2007/02/06/gentlemen-indent-your-xml
def indent(elem, level=0):
"""Ensures the file is still indented with 4 spaces, as all
the existing codelist files are."""
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
for e in elem:
indent(e, level+1)
if not e.tail or not e.tail.strip():
e.tail = i + " "
if not e.tail or not e.tail.strip():
e.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
@click.group()
def main():
pass
@main.command()
@click.option('--existing_codelist_filename', help="The filename of the existing XML codelists file, e.g. 'Sector.xml'.")
@click.option('--output_filename', help="The output filename of the new XML codelists file incorporating the translations.")
@click.option('--new_translation_filename', help="The filename of the Excel translation of the existing codelists file.")
@click.option('--lang', help="The language of the translations file in lowercase, e.g. 'fr' for French.")
def merge_translations(existing_codelist_filename, output_filename, new_translation_filename, lang):
"""Merge translations from a provided Excel translations
file into an existing XML file."""
parser = etree.XMLParser(remove_blank_text=True)
codelist_xml_file = open(existing_codelist_filename)
codelist_xml = etree.parse(codelist_xml_file, parser)
translated_file = open(new_translation_filename, "r")
codes = XLSDictReader(translated_file)
for one_code in codes:
the_code = codelist_xml.xpath("/codelist/codelist-items/codelist-item[code/text()='{}']".format(one_code["code"]))[0]
if one_code["name"]:
new_name = the_code.find('name/narrative[@xml:lang="{}"]'.format(lang), namespaces=nsmap)
if new_name is None:
new_name = etree.SubElement(the_code.find('name'), "narrative")
new_name.set('{http://www.w3.org/XML/1998/namespace}lang', lang)
new_name.text = one_code["name"]
if one_code["description"]:
new_description = the_code.find('description/narrative[@xml:lang="{}"]'.format(lang), namespaces=nsmap)
if new_description is None:
new_description = etree.SubElement(the_code.find('description'), "narrative")
new_description.set('{http://www.w3.org/XML/1998/namespace}lang', lang)
new_description.text = one_code["description"]
indent(codelist_xml.getroot())
outf = open(output_filename, 'w')
outf.write("{}\n".format(etree.tostring(codelist_xml, encoding="unicode")))
outf.close()
@main.command()
@click.option('--existing_codelist_filename', help="The filename of the existing XML codelists file, e.g. 'Sector.xml'.")
@click.option('--output_filename', help="The output filename of the Excel translations into the desired language.")
@click.option('--lang', help="The language of the translations file in lowercase, e.g. 'fr' for French.")
def generate_translations(existing_codelist_filename, output_filename, lang):
"""Generate an Excel translations file from an existing
XML file"""
parser = etree.XMLParser(remove_blank_text=True)
codelist_xml_file = open(existing_codelist_filename)
codelist_xml = etree.parse(codelist_xml_file, parser)
wb = xlwt.Workbook()
sheet = wb.add_sheet('Sheet 1')
sheet.write(0,0,'code')
sheet.write(0,1,'name')
sheet.write(0,2,'description')
for i, code in enumerate(codelist_xml.xpath("/codelist/codelist-items/codelist-item")):
sheet.write(i+1, 0, get_text(code.find('code')))
sheet.write(i+1, 1, get_text(code.find('name/narrative[@xml:lang="{}"]'.format(lang), namespaces=nsmap)))
sheet.write(i+1, 2, get_text(code.find('description/narrative[@xml:lang="{}"]'.format(lang), namespaces=nsmap)))
wb.save(output_filename, "utf-8")
if __name__ == '__main__':
main()