#!/bin/sh
''''exec python3 -s -- "$0" "$@" # '''
#
# Copyright (c) 2010-2018 Shaun McCance <shaunm@gnome.org>
#
# ITS Tool program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# ITS Tool is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with ITS Tool; if not, write to the Free Software Foundation, 59 Temple
# Place, Suite 330, Boston, MA 0211-1307 USA.
#
from __future__ import print_function
from __future__ import unicode_literals
VERSION="2.0.6"
DATADIR="/snap/gnome-42-2204-sdk/current/usr/share"
import gettext
import hashlib
import libxml2
import optparse
import os
import os.path
import re
import sys
import time
import io
PY3 = sys.version_info[0] == 3
if PY3:
string_types = str,
def ustr(s, encoding=None):
if isinstance(s, str):
return s
elif encoding:
return str(s, encoding)
else:
return str(s)
ustr_type = str
def pr_str(s):
"""Return a string that can be safely print()ed"""
# Since print works on both bytes and unicode, just return the argument
return s
uout = sys.stdout
else:
string_types = basestring,
ustr = ustr_type = unicode
def pr_str(s):
"""Return a string that can be safely print()ed"""
if isinstance(s, str):
# Since print works on str, just return the argument
return s
else:
# print may not work on unicode if the output encoding cannot be
# detected, so just encode with UTF-8
return unicode.encode(s, 'utf-8')
import codecs
uout = codecs.getwriter('utf-8')(sys.stdout)
NS_ITS = 'http://www.w3.org/2005/11/its'
NS_ITST = 'http://itstool.org/extensions/'
NS_BLANK = 'http://itstool.org/extensions/blank/'
NS_XLINK = 'http://www.w3.org/1999/xlink'
NS_XML = 'http://www.w3.org/XML/1998/namespace'
class NoneTranslations:
def gettext(self, message):
return None
def lgettext(self, message):
return None
def ngettext(self, msgid1, msgid2, n):
return None
def lngettext(self, msgid1, msgid2, n):
return None
def ugettext(self, message):
return None
def ungettext(self, msgid1, msgid2, n):
return None
class MessageList (object):
def __init__ (self):
self._messages = []
self._by_node = {}
self._has_credits = False
def add_message (self, message, node):
self._messages.append (message)
if node is not None:
self._by_node[node] = message
def add_credits(self):
if self._has_credits:
return
msg = Message()
msg.set_context('_')
msg.add_text('translator-credits')
msg.add_comment(Comment('Put one translator per line, in the form NAME <EMAIL>, YEAR1, YEAR2'))
self._messages.append(msg)
self._has_credits = True
def get_message_by_node (self, node):
return self._by_node.get(node, None)
def get_nodes_with_messages (self):
return list(self._by_node.keys())
def output (self, out):
msgs = []
msgdict = {}
for msg in self._messages:
key = (msg.get_context(), msg.get_string())
if key in msgdict:
for source in msg.get_sources():
msgdict[key].add_source(source)
for marker in msg.get_markers():
msgdict[key].add_marker(marker)
for comment in msg.get_comments():
msgdict[key].add_comment(comment)
for idvalue in msg.get_id_values():
msgdict[key].add_id_value(idvalue)
if msg.get_preserve_space():
msgdict[key].set_preserve_space()
if msg.get_locale_filter() is not None:
locale = msgdict[key].get_locale_filter()
if locale is not None:
msgdict[key].set_locale_filter('%s, %s' % (locale, msg.get_locale_filter()))
else:
msgdict[key].set_locale_filter(msg.get_locale_filter())
else:
msgs.append(msg)
msgdict[key] = msg
out.write('msgid ""\n')
out.write('msgstr ""\n')
out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n')
out.write('"POT-Creation-Date: %s\\n"\n' % time.strftime("%Y-%m-%d %H:%M%z"))
out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n')
out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n')
out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n')
out.write('"MIME-Version: 1.0\\n"\n')
out.write('"Content-Type: text/plain; charset=UTF-8\\n"\n')
out.write('"Content-Transfer-Encoding: 8bit\\n"\n')
out.write('\n')
for msg in msgs:
out.write(msg.format())
out.write('\n')
class Comment (object):
def __init__ (self, text):
self._text = ustr(text)
assert(text is not None)
self._markers = []
def add_marker (self, marker):
self._markers.append(marker)
def get_markers (self):
return self._markers
def get_text (self):
return self._text
def format (self):
ret = ''
markers = {}
for marker in self._markers:
if marker not in markers:
ret += '#. (itstool) comment: ' + marker + '\n'
markers[marker] = marker
if '\n' in self._text:
doadd = False
for line in self._text.split('\n'):
if line != '':
doadd = True
if not doadd:
continue
ret += '#. %s\n' % line
else:
text = self._text
while len(text) > 72:
j = text.rfind(' ', 0, 72)
if j == -1:
j = text.find(' ')
if j == -1:
break
ret += '#. %s\n' % text[:j]
text = text[j+1:]
ret += '#. %s\n' % text
return ret
class Placeholder (object):
def __init__ (self, node):
self.node = node
self.name = ustr(node.name, 'utf-8')
class Message (object):
def __init__ (self):
self._message = []
self._empty = True
self._ctxt = None
self._placeholders = []
self._sources = []
self._markers = []
self._id_values = []
self._locale_filter = None
self._comments = []
self._preserve = False
def __repr__(self):
if self._empty:
return "Empty message"
return self.get_string()
def escape (self, text):
return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
def add_text (self, text):
if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
self._message.append('')
if not isinstance(text, ustr_type):
text = ustr(text, 'utf-8')
self._message[-1] += text.replace('&', '&').replace('<', '<').replace('>', '>')
if re.sub('\s+', ' ', text).strip() != '':
self._empty = False
def add_entity_ref (self, name):
self._message.append('&' + name + ';')
self._empty = False
def add_placeholder (self, node):
holder = Placeholder(node)
self._placeholders.append(holder)
self._message.append(holder)
def get_placeholder (self, name):
placeholder = 1
for holder in self._placeholders:
holdername = '%s-%i' % (holder.name, placeholder)
if holdername == ustr(name, 'utf-8'):
return holder
placeholder += 1
def add_start_tag (self, node):
if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
self._message.append('')
if node.ns() is not None and node.ns().name is not None:
self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
else:
self._message[-1] += ('<%s' % ustr(node.name, 'utf-8'))
for prop in xml_attr_iter(node):
name = prop.name
if prop.ns() is not None:
name = prop.ns().name + ':' + name
atval = prop.content
if not isinstance(atval, ustr_type):
atval = ustr(atval, 'utf-8')
atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
self._message += " %s=\"%s\"" % (name, atval)
if node.children is not None:
self._message[-1] += '>'
else:
self._message[-1] += '/>'
def add_end_tag (self, node):
if node.children is not None:
if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
self._message.append('')
if node.ns() is not None and node.ns().name is not None:
self._message[-1] += ('</%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
else:
self._message[-1] += ('</%s>' % ustr(node.name, 'utf-8'))
def is_empty (self):
return self._empty
def get_context (self):
return self._ctxt
def set_context (self, ctxt):
self._ctxt = ctxt
def add_source (self, source):
if not isinstance(source, ustr_type):
source = ustr(source, 'utf-8')
self._sources.append(source)
def get_sources (self):
return self._sources
def add_marker (self, marker):
if not isinstance(marker, ustr_type):
marker = ustr(marker, 'utf-8')
self._markers.append(marker)
def get_markers (self):
return self._markers
def add_id_value(self, id_value):
self._id_values.append(id_value)
def get_id_values(self):
return self._id_values
def add_comment (self, comment):
if comment is not None:
self._comments.append(comment)
def get_comments (self):
return self._comments
def get_string (self):
message = ''
placeholder = 1
for msg in self._message:
if isinstance(msg, string_types):
message += msg
elif isinstance(msg, Placeholder):
message += '<_:%s-%i/>' % (msg.name, placeholder)
placeholder += 1
if not self._preserve:
message = re.sub('\s+', ' ', message).strip()
return message
def get_preserve_space (self):
return self._preserve
def set_preserve_space (self, preserve=True):
self._preserve = preserve
def get_locale_filter(self):
return self._locale_filter
def set_locale_filter(self, locale):
self._locale_filter = locale
def format (self):
ret = ''
markers = {}
for marker in self._markers:
if marker not in markers:
ret += '#. (itstool) path: ' + marker + '\n'
markers[marker] = marker
for idvalue in self._id_values:
ret += '#. (itstool) id: ' + idvalue + '\n'
if self._locale_filter is not None:
ret += '#. (itstool) ' + self._locale_filter[1] + ' locale: ' + self._locale_filter[0] + '\n'
comments = []
commentsdict = {}
for comment in self._comments:
key = comment.get_text()
if key in commentsdict:
for marker in comment.get_markers():
commentsdict[key].add_marker(marker)
else:
comments.append(comment)
commentsdict[key] = comment
for i in range(len(comments)):
if i != 0:
ret += '#.\n'
ret += comments[i].format()
for source in self._sources:
ret += '#: %s\n' % source
if self._preserve:
ret += '#, no-wrap\n'
if self._ctxt is not None:
ret += 'msgctxt "%s"\n' % self._ctxt
message = self.get_string()
if self._preserve:
ret += 'msgid ""\n'
lines = message.split('\n')
for line, no in zip(lines, list(range(len(lines)))):
if no == len(lines) - 1:
ret += '"%s"\n' % self.escape(line)
else:
ret += '"%s\\n"\n' % self.escape(line)
else:
ret += 'msgid "%s"\n' % self.escape(message)
ret += 'msgstr ""\n'
return ret
def xml_child_iter (node):
child = node.children
while child is not None:
yield child
child = child.next
def xml_attr_iter (node):
attr = node.get_properties()
while attr is not None:
yield attr
attr = attr.next
def xml_is_ns_name (node, ns, name):
if node.type != 'element':
return False
return node.name == name and node.ns() is not None and node.ns().content == ns
def xml_get_node_path(node):
# The built-in nodePath() method only does numeric indexes
# when necessary for disambiguation. For various reasons,
# we prefer always using indexes.
name = node.name
if node.ns() is not None and node.ns().name is not None:
name = node.ns().name + ':' + name
if node.type == 'attribute':
name = '@' + name
name = '/' + name
if node.type == 'element' and node.parent.type == 'element':
count = 1
prev = node.previousElementSibling()
while prev is not None:
if prev.name == node.name:
if prev.ns() is None:
if node.ns() is None:
count += 1
else:
if node.ns() is not None:
if prev.ns().name == node.ns().name:
count += 1
prev = prev.previousElementSibling()
name = '%s[%i]' % (name, count)
if node.parent.type == 'element':
name = xml_get_node_path(node.parent) + name
return name
def xml_error_catcher(doc, error):
doc._xml_err += " %s" % error
def fix_node_ns (node, nsdefs):
childnsdefs = nsdefs.copy()
nsdef = node.nsDefs()
while nsdef is not None:
nextnsdef = nsdef.next
if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content:
node.removeNsDef(nsdef.content)
else:
childnsdefs[nsdef.name] = nsdef.content
nsdef = nextnsdef
for child in xml_child_iter(node):
if child.type == 'element':
fix_node_ns(child, childnsdefs)
class LocNote (object):
def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False):
self.locnote = locnote
self.locnoteref = locnoteref
self.locnotetype = locnotetype
if self.locnotetype != 'alert':
self.locnotetype = 'description'
self._preserve_space=space
def __repr__(self):
if self.locnote is not None:
if self._preserve_space:
return self.locnote
else:
return re.sub('\s+', ' ', self.locnote).strip()
elif self.locnoteref is not None:
return '(itstool) link: ' + re.sub('\s+', ' ', self.locnoteref).strip()
return ''
class Document (object):
def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
self._xml_err = ''
libxml2.registerErrorHandler(xml_error_catcher, self)
try:
ctxt = libxml2.createFileParserCtxt(filename)
except:
sys.stderr.write('Error: cannot open XML file %s\n' % filename)
sys.exit(1)
ctxt.lineNumbers(1)
self._load_dtd = load_dtd
self._keep_entities = keep_entities
if load_dtd:
ctxt.loadSubset(1)
if keep_entities:
ctxt.loadSubset(1)
ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
ctxt.replaceEntities(0)
else:
ctxt.replaceEntities(1)
ctxt.parseDocument()
self._filename = filename
self._doc = ctxt.doc()
self._localrules = []
def pre_process (node):
for child in xml_child_iter(node):
if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'):
if child.nsProp('parse', None) == 'text':
child.xincludeProcessTree()
elif xml_is_ns_name(child, NS_ITS, 'rules'):
if child.hasNsProp('href', NS_XLINK):
href = child.nsProp('href', NS_XLINK)
fileref = os.path.join(os.path.dirname(filename), href)
if not os.path.exists(fileref):
if opts.itspath is not None:
for pathdir in opts.itspath:
fileref = os.path.join(pathdir, href)
if os.path.exists(fileref):
break
if not os.path.exists(fileref):
sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
sys.exit(1)
hctxt = libxml2.createFileParserCtxt(fileref)
hctxt.replaceEntities(1)
hctxt.parseDocument()
root = hctxt.doc().getRootElement()
version = None
if root.hasNsProp('version', None):
version = root.nsProp('version', None)
else:
sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
os.path.basename(href))
if version is not None and version not in ('1.0', '2.0'):
sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
(os.path.basename(href), root.nsProp('version', None)))
else:
self._localrules.append(root)
version = None
if child.hasNsProp('version', None):
version = child.nsProp('version', None)
else:
root = child.doc.getRootElement()
if root.hasNsProp('version', NS_ITS):
version = root.nsProp('version', NS_ITS)
else:
sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
if version is not None and version not in ('1.0', '2.0'):
sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
version)
else:
self._localrules.append(child)
pre_process(child)
pre_process(self._doc)
try:
self._check_errors()
except libxml2.parserError as e:
sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e))
sys.exit(1)
self._msgs = messages
self._its_translate_nodes = {}
self._its_within_text_nodes = {}
self._its_locale_filters = {}
self._its_id_values = {}
self._its_loc_notes = {}
self._its_preserve_space_nodes = {}
self._itst_drop_nodes = {}
self._itst_contexts = {}
self._its_lang = {}
self._itst_lang_attr = {}
self._itst_credits = None
self._its_externals = {}
def __del__ (self):
self._doc.freeDoc()
def _check_errors(self):
if self._xml_err:
raise libxml2.parserError(self._xml_err)
def register_its_params(self, xpath, rules, params={}):
for child in xml_child_iter(rules):
if xml_is_ns_name(child, NS_ITS, 'param'):
name = child.nsProp('name', None)
if name in params:
value = params[name]
else:
value = child.getContent()
xpath.xpathRegisterVariable(name, None, value)
def apply_its_rule(self, rule, xpath):
if rule.type != 'element':
return
if xml_is_ns_name(rule, NS_ITS, 'translateRule'):
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
self._its_translate_nodes[node] = rule.nsProp('translate', None)
elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'):
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
self._its_within_text_nodes[node] = rule.nsProp('withinText', None)
elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'):
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
val = rule.nsProp('preserveSpace', None)
if val == 'yes':
self._its_preserve_space_nodes[node] = 'preserve'
elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'):
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
self._its_preserve_space_nodes[node] = rule.nsProp('space', None)
elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'):
if rule.nsProp('selector', None) is not None:
if rule.hasNsProp('localeFilterList', None):
lst = rule.nsProp('localeFilterList', None)
else:
lst = '*'
if rule.hasNsProp('localeFilterType', None):
typ = rule.nsProp('localeFilterType', None)
else:
typ = 'include'
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
self._its_locale_filters[node] = (lst, typ)
elif xml_is_ns_name(rule, NS_ITST, 'dropRule'):
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
self._itst_drop_nodes[node] = rule.nsProp('drop', None)
elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'):
sel = rule.nsProp('selector', None)
idv = rule.nsProp('idValue', None)
if sel is not None and idv is not None:
for node in self._try_xpath_eval(xpath, sel):
try:
oldnode = xpath.contextNode()
except:
oldnode = None
xpath.setContextNode(node)
idvalue = self._try_xpath_eval(xpath, idv)
if isinstance(idvalue, string_types):
self._its_id_values[node] = idvalue
else:
for val in idvalue:
self._its_id_values[node] = val.content
break
xpath.setContextNode(oldnode)
pass
elif xml_is_ns_name(rule, NS_ITST, 'contextRule'):
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
if rule.hasNsProp('context', None):
self._itst_contexts[node] = rule.nsProp('context', None)
elif rule.hasNsProp('contextPointer', None):
try:
oldnode = xpath.contextNode()
except:
oldnode = None
xpath.setContextNode(node)
ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None))
if isinstance(ctxt, string_types):
self._itst_contexts[node] = ctxt
else:
for ctxt in ctxt:
self._itst_contexts[node] = ctxt.content
break
xpath.setContextNode(oldnode)
elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'):
locnote = None
notetype = rule.nsProp('locNoteType', None)
for child in xml_child_iter(rule):
if xml_is_ns_name(child, NS_ITS, 'locNote'):
locnote = LocNote(locnote=child.content, locnotetype=notetype)
break
if locnote is None:
if rule.hasNsProp('locNoteRef', None):
locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype)
if rule.nsProp('selector', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
if locnote is not None:
self._its_loc_notes.setdefault(node, []).append(locnote)
else:
if rule.hasNsProp('locNotePointer', None):
sel = rule.nsProp('locNotePointer', None)
ref = False
elif rule.hasNsProp('locNoteRefPointer', None):
sel = rule.nsProp('locNoteRefPointer', None)
ref = True
else:
continue
try:
oldnode = xpath.contextNode()
except:
oldnode = None
xpath.setContextNode(node)
note = self._try_xpath_eval(xpath, sel)
if isinstance(note, string_types):
if ref:
nodenote = LocNote(locnoteref=note, locnotetype=notetype)
else:
nodenote = LocNote(locnote=note, locnotetype=notetype)
self._its_loc_notes.setdefault(node, []).append(nodenote)
else:
for note in note:
if ref:
nodenote = LocNote(locnoteref=note.content, locnotetype=notetype)
else:
nodenote = LocNote(locnote=note.content, locnotetype=notetype,
space=self.get_preserve_space(note))
self._its_loc_notes.setdefault(node, []).append(nodenote)
break
xpath.setContextNode(oldnode)
elif xml_is_ns_name(rule, NS_ITS, 'langRule'):
if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
try:
oldnode = xpath.contextNode()
except:
oldnode = None
xpath.setContextNode(node)
res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None))
if len(res) > 0:
self._its_lang[node] = res[0].content
# We need to construct language attributes, not just read
# language information. Technically, langPointer could be
# any XPath expression. But if it looks like an attribute
# accessor, just use the attribute name.
if rule.nsProp('langPointer', None)[0] == '@':
self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:]
xpath.setContextNode(oldnode)
elif xml_is_ns_name(rule, NS_ITST, 'credits'):
if rule.nsProp('appendTo', None) is not None:
for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)):
self._itst_credits = (node, rule)
break
elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or
xml_is_ns_name(rule, NS_ITST, 'externalRefRule')):
sel = rule.nsProp('selector', None)
if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'):
ptr = rule.nsProp('externalResourceRefPointer', None)
else:
ptr = rule.nsProp('refPointer', None)
if sel is not None and ptr is not None:
for node in self._try_xpath_eval(xpath, sel):
try:
oldnode = xpath.contextNode()
except:
oldnode = None
xpath.setContextNode(node)
res = self._try_xpath_eval(xpath, ptr)
if len(res) > 0:
self._its_externals[node] = res[0].content
xpath.setContextNode(oldnode)
def apply_its_rules(self, builtins, params={}):
if builtins:
dirs = []
ddir = os.getenv('XDG_DATA_HOME', '')
if ddir == '':
ddir = os.path.join(os.path.expanduser('~'), '.local', 'share')
dirs.append(ddir)
ddir = os.getenv('XDG_DATA_DIRS', '')
if ddir == '':
if DATADIR not in ('/snap/gnome-42-2204-sdk/current/usr/local/share', '/snap/gnome-42-2204-sdk/current/usr/share'):
ddir += DATADIR + ':'
ddir += '/snap/gnome-42-2204-sdk/current/usr/local/share:/snap/gnome-42-2204-sdk/current/usr/share'
dirs.extend(ddir.split(':'))
ddone = {}
for ddir in dirs:
itsdir = os.path.join(ddir, 'itstool', 'its')
if not os.path.exists(itsdir):
continue
for dfile in os.listdir(itsdir):
if dfile.endswith('.its'):
if not ddone.get(dfile, False):
self.apply_its_file(os.path.join(itsdir, dfile), params=params)
ddone[dfile] = True
self.apply_local_its_rules(params=params)
def apply_its_file(self, filename, params={}):
doc = libxml2.parseFile(filename)
root = doc.getRootElement()
if not xml_is_ns_name(root, NS_ITS, 'rules'):
return
version = None
if root.hasNsProp('version', None):
version = root.nsProp('version', None)
else:
sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
os.path.basename(filename))
if version is not None and version not in ('1.0', '2.0'):
sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
(os.path.basename(filename), root.nsProp('version', None)))
return
matched = True
for match in xml_child_iter(root):
if xml_is_ns_name(match, NS_ITST, 'match'):
matched = False
xpath = self._doc.xpathNewContext()
par = match
nss = {}
while par is not None:
nsdef = par.nsDefs()
while nsdef is not None:
if nsdef.name is not None:
if nsdef.name not in nss:
nss[nsdef.name] = nsdef.content
xpath.xpathRegisterNs(nsdef.name, nsdef.content)
nsdef = nsdef.next
par = par.parent
if match.hasNsProp('selector', None):
if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0:
matched = True
break
if matched == False:
return
for rule in xml_child_iter(root):
xpath = self._doc.xpathNewContext()
par = match
nss = {}
while par is not None:
nsdef = par.nsDefs()
while nsdef is not None:
if nsdef.name is not None:
if nsdef.name not in nss:
nss[nsdef.name] = nsdef.content
xpath.xpathRegisterNs(nsdef.name, nsdef.content)
nsdef = nsdef.next
par = par.parent
self.register_its_params(xpath, root, params=params)
self.apply_its_rule(rule, xpath)
def apply_local_its_rules(self, params={}):
for rules in self._localrules:
def reg_ns(xpath, node):
if node.parent is not None:
reg_ns(xpath, node.parent)
nsdef = node.nsDefs()
while nsdef is not None:
if nsdef.name is not None:
xpath.xpathRegisterNs(nsdef.name, nsdef.content)
nsdef = nsdef.next
xpath = self._doc.xpathNewContext()
reg_ns(xpath, rules)
self.register_its_params(xpath, rules, params=params)
for rule in xml_child_iter(rules):
if rule.type != 'element':
continue
if rule.nsDefs() is not None:
rule_xpath = self._doc.xpathNewContext()
reg_ns(rule_xpath, rule)
self.register_its_params(rule_xpath, rules, params=params)
else:
rule_xpath = xpath
self.apply_its_rule(rule, rule_xpath)
def _append_credits(self, parent, node, trdata):
if xml_is_ns_name(node, NS_ITST, 'for-each'):
select = node.nsProp('select', None)
if select == 'years':
for year in trdata[2].split(','):
for child in xml_child_iter(node):
self._append_credits(parent, child, trdata + (year.strip(),))
elif xml_is_ns_name(node, NS_ITST, 'value-of'):
select = node.nsProp('select', None)
val = None
if select == 'name':
val = trdata[0]
elif select == 'email':
val = trdata[1]
elif select == 'years':
val = trdata[2]
elif select == 'year' and len(trdata) == 4:
val = trdata[3]
if val is not None:
if not PY3:
val = val.encode('utf-8')
parent.addContent(val)
else:
newnode = node.copyNode(2)
parent.addChild(newnode)
for child in xml_child_iter(node):
self._append_credits(newnode, child, trdata)
def merge_credits(self, translations, language, node):
if self._itst_credits is None:
return
# Dear Python, please implement pgettext.
# http://bugs.python.org/issue2504
# Sincerely, Shaun
trans = translations.ugettext('_\x04translator-credits')
if trans is None or trans == 'translator-credits':
return
regex = re.compile('(.*) \<(.*)\>, (.*)')
for credit in trans.split('\n'):
match = regex.match(credit)
if not match:
continue
trdata = match.groups()
for node in xml_child_iter(self._itst_credits[1]):
self._append_credits(self._itst_credits[0], node, trdata)
def join_translations(self, translations, node=None, strict=False):
is_root = False
if node is None:
is_root = True
self.generate_messages(comments=False)
node = self._doc.getRootElement()
if node is None or node.type != 'element':
return
if self.get_itst_drop(node) == 'yes':
prev = node.prev
node.unlinkNode()
node.freeNode()
if prev is not None and prev.isBlankNode():
prev.unlinkNode()
prev.freeNode()
return
msg = self._msgs.get_message_by_node(node)
if msg is None:
self.translate_attrs(node, node)
children = [child for child in xml_child_iter(node)]
for child in children:
self.join_translations(translations, node=child, strict=strict)
else:
prevnode = None
if node.prev is not None and node.prev.type == 'text':
prevtext = node.prev.content
if re.sub('\s+', '', prevtext) == '':
prevnode = node.prev
for lang in sorted(list(translations.keys()), reverse=True):
locale = self.get_its_locale_filter(node)
lmatch = match_locale_list(locale[0], lang)
if (locale[1] == 'include' and not lmatch) or (locale[1] == 'exclude' and lmatch):
continue
newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang)
if newnode != node:
newnode.setProp('xml:lang', lang)
node.addNextSibling(newnode)
if prevnode is not None:
node.addNextSibling(prevnode.copyNode(0))
if is_root:
# Because of the way we create nodes and rewrite the document,
# we end up with lots of redundant namespace definitions. We
# kill them off in one fell swoop at the end.
fix_node_ns(node, {})
self._check_errors()
def merge_translations(self, translations, language, node=None, strict=False):
is_root = False
if node is None:
is_root = True
self.generate_messages(comments=False)
node = self._doc.getRootElement()
if node is None or node.type != 'element':
return
drop = False
locale = self.get_its_locale_filter(node)
if locale[1] == 'include':
if locale[0] != '*':
if not match_locale_list(locale[0], language):
drop = True
elif locale[1] == 'exclude':
if match_locale_list(locale[0], language):
drop = True
if self.get_itst_drop(node) == 'yes' or drop:
prev = node.prev
node.unlinkNode()
node.freeNode()
if prev is not None and prev.isBlankNode():
prev.unlinkNode()
prev.freeNode()
return
if is_root:
self.merge_credits(translations, language, node)
msg = self._msgs.get_message_by_node(node)
if msg is None:
self.translate_attrs(node, node)
children = [child for child in xml_child_iter(node)]
for child in children:
self.merge_translations(translations, language, node=child, strict=strict)
else:
newnode = self.get_translated(node, translations, strict=strict, lang=language)
if newnode != node:
self.translate_attrs(node, newnode)
node.replaceNode(newnode)
if is_root:
# Apply language attributes to untranslated nodes. We don't do
# this before processing, because then these attributes would
# be copied into the new nodes. We apply the attribute without
# checking whether it was translated, because any that were will
# just be floating around, unattached to a document.
for lcnode in self._msgs.get_nodes_with_messages():
attr = self._itst_lang_attr.get(lcnode)
if attr is None:
continue
origlang = None
lcpar = lcnode
while lcpar is not None:
origlang = self._its_lang.get(lcpar)
if origlang is not None:
break
lcpar = lcpar.parent
if origlang is not None:
lcnode.setProp(attr, origlang)
# And then set the language attribute on the root node.
if language is not None:
attr = self._itst_lang_attr.get(node)
if attr is not None:
node.setProp(attr, language)
# Because of the way we create nodes and rewrite the document,
# we end up with lots of redundant namespace definitions. We
# kill them off in one fell swoop at the end.
fix_node_ns(node, {})
self._check_errors()
def translate_attrs(self, oldnode, newnode):
trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes']
for attr in trans_attrs:
srccontent = attr.get_content()
if not PY3:
srccontent = srccontent.decode('utf-8')
newcontent = translations.ugettext(srccontent)
if newcontent:
if not PY3:
newcontent = newcontent.encode('utf-8')
newnode.setProp(attr.name, newcontent)
def get_translated (self, node, translations, strict=False, lang=None):
msg = self._msgs.get_message_by_node(node)
if msg is None:
return node
msgstr = msg.get_string()
# Dear Python, please implement pgettext.
# http://bugs.python.org/issue2504
# Sincerely, Shaun
if msg.get_context() is not None:
msgstr = msg.get_context() + '\x04' + msgstr
trans = translations.ugettext(msgstr)
if trans is None:
return node
nss = {}
def reg_ns(node, nss):
if node.parent is not None:
reg_ns(node.parent, nss)
nsdef = node.nsDefs()
while nsdef is not None:
nss[nsdef.name] = nsdef.content
nsdef = nsdef.next
reg_ns(node, nss)
nss['_'] = NS_BLANK
try:
blurb = node.doc.intSubset().serialize('utf-8')
except Exception:
blurb = ''
blurb += '<' + ustr(node.name, 'utf-8')
for nsname in list(nss.keys()):
if nsname is None:
blurb += ' xmlns="%s"' % nss[nsname]
else:
blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname])
blurb += '>%s</%s>' % (trans, ustr(node.name, 'utf-8'))
if not PY3:
blurb = blurb.encode('utf-8')
ctxt = libxml2.createDocParserCtxt(blurb)
if self._load_dtd:
ctxt.loadSubset(1)
if self._keep_entities:
ctxt.loadSubset(1)
ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
ctxt.replaceEntities(0)
else:
ctxt.replaceEntities(1)
ctxt.parseDocument()
trnode = ctxt.doc().getRootElement()
try:
self._check_errors()
except libxml2.parserError:
if strict:
raise
else:
sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
(lang + ' ') if lang is not None else '',
msgstr)))
self._xml_err = ''
return node
def scan_node(node):
children = [child for child in xml_child_iter(node)]
for child in children:
if child.type != 'element':
continue
if child.ns() is not None and child.ns().content == NS_BLANK:
ph_node = msg.get_placeholder(child.name).node
if self.has_child_elements(ph_node):
self.merge_translations(translations, None, ph_node, strict=strict)
newnode = ph_node.copyNode(1)
newnode.setTreeDoc(self._doc)
child.replaceNode(newnode)
else:
repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
child.replaceNode(repl)
scan_node(child)
try:
scan_node(trnode)
except:
if strict:
raise
else:
sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
(lang + ' ') if lang is not None else '',
msgstr)))
self._xml_err = ''
ctxt.doc().freeDoc()
return node
retnode = node.copyNode(2)
retnode.setTreeDoc(self._doc)
for child in xml_child_iter(trnode):
newnode = child.copyNode(1)
newnode.setTreeDoc(self._doc)
retnode.addChild(newnode)
ctxt.doc().freeDoc()
return retnode
def generate_messages(self, comments=True):
if self._itst_credits is not None:
self._msgs.add_credits()
for child in xml_child_iter(self._doc):
if child.type == 'element':
self.generate_message(child, None, comments=comments)
break
def generate_message (self, node, msg, comments=True, path=None):
if node.type in ('text', 'cdata') and msg is not None:
msg.add_text(node.content)
return
if node.type == 'entity_ref':
msg.add_entity_ref(node.name);
if node.type != 'element':
return
if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
return
if self._itst_drop_nodes.get(node, 'no') == 'yes':
return
locfil = self.get_its_locale_filter(node)
if locfil == ('', 'include') or locfil == ('*', 'exclude'):
return
if path is None:
path = ''
translate = self.get_its_translate(node)
withinText = False
if translate == 'no':
if msg is not None:
msg.add_placeholder(node)
is_unit = False
msg = None
else:
is_unit = msg is None or self.is_translation_unit(node)
if is_unit:
if msg is not None:
msg.add_placeholder(node)
msg = Message()
ctxt = None
if node.hasNsProp('context', NS_ITST):
ctxt = node.nsProp('context', NS_ITST)
if ctxt is None:
ctxt = self._itst_contexts.get(node)
if ctxt is not None:
msg.set_context(ctxt)
idvalue = self.get_its_id_value(node)
if idvalue is not None:
basename = os.path.basename(self._filename)
msg.add_id_value(basename + '#' + idvalue)
if self.get_preserve_space(node):
msg.set_preserve_space()
if self.get_its_locale_filter(node) != ('*', 'include'):
msg.set_locale_filter(self.get_its_locale_filter(node))
msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8')))
else:
withinText = True
msg.add_start_tag(node)
if not withinText:
# Add msg for translatable node attributes
for attr in xml_attr_iter(node):
if self._its_translate_nodes.get(attr, 'no') == 'yes':
attr_msg = Message()
if self.get_preserve_space(attr):
attr_msg.set_preserve_space()
attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name))
attr_msg.add_text(attr.content)
if comments:
for locnote in self.get_its_loc_notes(attr):
comment = Comment(locnote)
comment.add_marker ('%s/%s@%s' % (
node.parent.name, node.name, attr.name))
attr_msg.add_comment(comment)
self._msgs.add_message(attr_msg, attr)
if comments and msg is not None:
cnode = node
while cnode is not None:
hasnote = False
for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)):
comment = Comment(locnote)
if withinText:
comment.add_marker('.%s/%s' % (path, cnode.name))
msg.add_comment(comment)
hasnote = True
if hasnote or not is_unit:
break
cnode = cnode.parent
self.generate_external_resource_message(node)
for attr in xml_attr_iter(node):
self.generate_external_resource_message(attr)
idvalue = self.get_its_id_value(attr)
if idvalue is not None:
basename = os.path.basename(self._filename)
msg.add_id_value(basename + '#' + idvalue)
if withinText:
path = path + '/' + node.name
for child in xml_child_iter(node):
self.generate_message(child, msg, comments=comments, path=path)
if translate:
if is_unit and not msg.is_empty():
self._msgs.add_message(msg, node)
elif msg is not None:
msg.add_end_tag(node)
def generate_external_resource_message(self, node):
if node not in self._its_externals:
return
resref = self._its_externals[node]
if node.type == 'element':
translate = self.get_its_translate(node)
marker = '%s/%s' % (node.parent.name, node.name)
else:
translate = self.get_its_translate(node.parent)
marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name)
if translate == 'no':
return
msg = Message()
try:
fullfile = os.path.join(os.path.dirname(self._filename), resref)
filefp = open(fullfile, 'rb')
filemd5 = hashlib.md5(filefp.read()).hexdigest()
filefp.close()
except Exception:
filemd5 = '__failed__'
txt = "external ref='%s' md5='%s'" % (resref, filemd5)
msg.set_context('_')
msg.add_text(txt)
msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
msg.add_marker(marker)
msg.add_comment(Comment('This is a reference to an external file such as an image or'
' video. When the file changes, the md5 hash will change to'
' let you know you need to update your localized copy. The'
' msgstr is not used at all. Set it to whatever you like'
' once you have updated your copy of the file.'))
self._msgs.add_message(msg, None)
def is_translation_unit (self, node):
return self.get_its_within_text(node) != 'yes'
def has_child_elements(self, node):
return len([child for child in xml_child_iter(node) if child.type=='element'])
def get_preserve_space (self, node):
while node.type in ('attribute', 'element'):
if node.getSpacePreserve() == 1:
return True
if node in self._its_preserve_space_nodes:
return (self._its_preserve_space_nodes[node] == 'preserve')
node = node.parent
return False
def get_its_translate(self, node):
val = None
if node.hasNsProp('translate', NS_ITS):
val = node.nsProp('translate', NS_ITS)
elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None):
val = node.nsProp('translate', None)
elif node in self._its_translate_nodes:
val = self._its_translate_nodes[node]
if val is not None:
return val
if node.type == 'attribute':
return 'no'
if node.parent.type == 'element':
return self.get_its_translate(node.parent)
return 'yes'
def get_its_within_text(self, node):
if node.hasNsProp('withinText', NS_ITS):
val = node.nsProp('withinText', NS_ITS)
elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None):
val = node.nsProp('withinText', None)
else:
return self._its_within_text_nodes.get(node, 'no')
if val in ('yes', 'nested'):
return val
return 'no'
def get_its_locale_filter(self, node):
if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS):
if node.hasNsProp('localeFilterList', NS_ITS):
lst = node.nsProp('localeFilterList', NS_ITS)
else:
lst = '*'
if node.hasNsProp('localeFilterType', NS_ITS):
typ = node.nsProp('localeFilterType', NS_ITS)
else:
typ = 'include'
return (lst, typ)
if (xml_is_ns_name(node, NS_ITS, 'span') and
(node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))):
if node.hasNsProp('localeFilterList', None):
lst = node.nsProp('localeFilterList', None)
else:
lst = '*'
if node.hasNsProp('localeFilterType', None):
typ = node.nsProp('localeFilterType', None)
else:
typ = 'include'
return (lst, typ)
if node in self._its_locale_filters:
return self._its_locale_filters[node]
if node.parent.type == 'element':
return self.get_its_locale_filter(node.parent)
return ('*', 'include')
def get_itst_drop(self, node):
if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
return 'yes'
if self._itst_drop_nodes.get(node, 'no') == 'yes':
return 'yes'
return 'no'
def get_its_id_value(self, node):
if node.hasNsProp('id', NS_XML):
return node.nsProp('id', NS_XML)
return self._its_id_values.get(node, None)
def get_its_loc_notes(self, node, inherit=True):
ret = []
if node.hasNsProp('locNote', NS_ITS) or node.hasNsProp('locNoteRef', NS_ITS) or node.hasNsProp('locNoteType', NS_ITS):
notetype = node.nsProp('locNoteType', NS_ITS)
if node.hasNsProp('locNote', NS_ITS):
ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype))
elif node.hasNsProp('locNoteRef', NS_ITS):
ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype))
elif xml_is_ns_name(node, NS_ITS, 'span'):
if node.hasNsProp('locNote', None) or node.hasNsProp('locNoteRef', None) or node.hasNsProp('locNoteType', None):
notetype = node.nsProp('locNoteType', None)
if node.hasNsProp('locNote', None):
ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype))
elif node.hasNsProp('locNoteRef', None):
ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype))
for locnote in reversed(self._its_loc_notes.get(node, [])):
ret.append(locnote)
if (len(ret) == 0 and inherit and
node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'):
return self.get_its_loc_notes(node.parent)
return ret
def output_test_data(self, category, out, node=None):
if node is None:
node = self._doc.getRootElement()
compval = ''
if category == 'translate':
compval = 'translate="%s"' % self.get_its_translate(node)
elif category == 'withinText':
if node.type != 'attribute':
compval = 'withinText="%s"' % self.get_its_within_text(node)
elif category == 'localeFilter':
compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node)
elif category == 'locNote':
val = self.get_its_loc_notes(node)
if len(val) > 0:
if val[0].locnote is not None:
compval = 'locNote="%s"\tlocNoteType="%s"' % (ustr(val[0]), val[0].locnotetype)
elif val[0].locnoteref is not None:
compval = 'locNoteRef="%s"\tlocNoteType="%s"' % (val[0].locnoteref, val[0].locnotetype)
elif category == 'externalResourceRef':
val = self._its_externals.get(node, '')
if val != '':
compval = 'externalResourceRef="%s"' % val
elif category == 'idValue':
val = self.get_its_id_value(node)
if val is not None:
compval = 'idValue="%s"' % val
elif category == 'preserveSpace':
if self.get_preserve_space(node):
compval = 'space="preserve"'
else:
compval = 'space="default"'
else:
sys.stderr.write('Error: Unrecognized category %s\n' % category)
sys.exit(1)
if compval != '':
out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval))
else:
out.write('%s\r\n' % (xml_get_node_path(node)))
for attr in sorted(xml_attr_iter(node), key=ustr):
self.output_test_data(category, out, attr)
for child in xml_child_iter(node):
if child.type == 'element':
self.output_test_data(category, out, child)
@staticmethod
def _try_xpath_eval (xpath, expr):
try:
return xpath.xpathEval(expr)
except:
sys.stderr.write('Warning: Invalid XPath: %s\n' % expr)
return []
def match_locale_list(extranges, locale):
if extranges.strip() == '':
return False
for extrange in [extrange.strip() for extrange in extranges.split(',')]:
if match_locale(extrange, locale):
return True
return False
def match_locale(extrange, locale):
# Extended filtering for extended language ranges as
# defined by RFC4647, part of BCP47.
# http://tools.ietf.org/html/rfc4647#section-3.3.2
rangelist = [x.lower() for x in extrange.split('-')]
localelist = [x.lower() for x in locale.split('-')]
if rangelist[0] not in ('*', localelist[0]):
return False
rangei = localei = 0
while rangei < len(rangelist):
if rangelist[rangei] == '*':
rangei += 1
continue
if localei >= len(localelist):
return False
if rangelist[rangei] in ('*', localelist[localei]):
rangei += 1
localei += 1
continue
if len(localelist[localei]) == 1:
return False
localei += 1
return True
_locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?')
def convert_locale (locale):
# Automatically convert POSIX-style locales to BCP47
match = _locale_pattern.match(locale)
if match is None:
return locale
ret = match.group(1).lower()
variant = match.group(3)
if variant == '@cyrillic':
ret += '-Cyrl'
variant = None
if variant == '@devanagari':
ret += '-Deva'
variant = None
elif variant == '@latin':
ret += '-Latn'
variant = None
elif variant == '@shaw':
ret += '-Shaw'
variant = None
if match.group(2) is not None:
ret += '-' + match.group(2)[1:].upper()
if variant is not None and variant != '@euro':
ret += '-' + variant[1:].lower()
return ret
if __name__ == '__main__':
options = optparse.OptionParser()
options.set_usage('\n itstool [OPTIONS] [XMLFILES]\n' +
' itstool -m <MOFILE> [OPTIONS] [XMLFILES]\n' +
' itstool -j <XMLFILE> [OPTIONS] [MOFILES]')
options.add_option('-i', '--its',
action='append',
dest='itsfile',
metavar='ITS',
help='Load the ITS rules in the file ITS (can specify multiple times)')
options.add_option('-l', '--lang',
dest='lang',
default=None,
metavar='LANGUAGE',
help='Explicitly set the language code for output file')
options.add_option('-j', '--join',
dest='join',
metavar='FILE',
help='Join multiple MO files with the XML file FILE and output XML file')
options.add_option('-m', '--merge',
dest='merge',
metavar='FILE',
help='Merge from a PO or MO file FILE and output XML files')
options.add_option('-n', '--no-builtins',
action='store_true',
dest='nobuiltins',
default=False,
help='Do not apply the built-in ITS rules')
options.add_option('-o', '--output',
dest='output',
default=None,
metavar='OUT',
help='Output PO files to file OUT or XML files in directory OUT')
options.add_option('--path',
action='append',
dest='itspath',
default=None,
metavar='PATHS',
help='Extra path where ITS files may be found (can specify multiple times)')
options.add_option('-s', '--strict',
action='store_true',
dest='strict',
default=False,
help='Exit with error when PO files contain broken XML')
options.add_option('-d', '--load-dtd',
action='store_true',
dest='load_dtd',
default=False,
help='Load external DTDs used by input XML')
options.add_option('-k', '--keep-entities',
action='store_true',
dest='keep_entities',
default=False,
help='Keep entity reference unexpanded')
options.add_option('-p', '--param',
action='append',
dest='params',
default=[],
nargs=2,
metavar='NAME VALUE',
help='Define the ITS parameter NAME to the value VALUE (can specify multiple times)')
options.add_option('-t', '--test',
dest='test',
default=None,
metavar='CATEGORY',
help='Generate conformance test output for CATEGORY')
options.add_option('-v', '--version',
action='store_true',
dest='version',
default=False,
help='Print itstool version and exit')
(opts, args) = options.parse_args(sys.argv)
if opts.version:
print('itstool %s' % VERSION)
sys.exit(0)
params = {}
for name, value in opts.params:
params[name] = value
if opts.merge is None and opts.join is None:
messages = MessageList()
for filename in args[1:]:
doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
doc.apply_its_rules(not(opts.nobuiltins), params=params)
if opts.itsfile is not None:
for itsfile in opts.itsfile:
doc.apply_its_file(itsfile, params=params)
if opts.test is None:
doc.generate_messages()
if opts.output is None or opts.output == '-':
out = uout
else:
try:
out = io.open(opts.output, 'wt', encoding='utf-8')
except:
sys.stderr.write('Error: Cannot write to file %s\n' % opts.output)
sys.exit(1)
if opts.test is not None:
doc.output_test_data(opts.test, out)
else:
messages.output(out)
out.flush()
elif opts.merge is not None:
try:
translations = gettext.GNUTranslations(open(opts.merge, 'rb'))
except:
sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge)
sys.exit(1)
if PY3:
translations.ugettext = translations.gettext
translations.add_fallback(NoneTranslations())
if opts.lang is None:
opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0])
if opts.output is None:
out = './'
elif os.path.isdir(opts.output):
out = opts.output
elif len(args) == 2:
if opts.output == '-':
out = sys.stdout
else:
out = open(opts.output, 'wb')
else:
sys.stderr.write('Error: Non-directory output for multiple files\n')
sys.exit(1)
for filename in args[1:]:
messages = MessageList()
doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
doc.apply_its_rules(not(opts.nobuiltins), params=params)
if opts.itsfile is not None:
for itsfile in opts.itsfile:
doc.apply_its_file(itsfile, params=params)
try:
doc.merge_translations(translations, opts.lang, strict=opts.strict)
except Exception as e:
raise
sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e))
sys.exit(1)
serialized = doc._doc.serialize('utf-8')
if PY3:
# For some reason, under py3, our serialized data is returns as a str.
# Let's encode it to bytes
serialized = serialized.encode('utf-8')
fout = out
fout_is_str = isinstance(fout, string_types)
if fout_is_str:
fout = open(os.path.join(fout, os.path.basename(filename)), 'wb')
fout.write(serialized)
fout.flush()
if fout_is_str:
fout.close()
elif opts.join is not None:
translations = {}
for filename in args[1:]:
try:
thistr = gettext.GNUTranslations(open(filename, 'rb'))
except:
sys.stderr.write('Error: cannot open mo file %s\n' % filename)
sys.exit(1)
thistr.add_fallback(NoneTranslations())
if PY3:
thistr.ugettext = thistr.gettext
lang = convert_locale(os.path.splitext(os.path.basename(filename))[0])
translations[lang] = thistr
if opts.output is None:
out = sys.stdout
elif os.path.isdir(opts.output):
out = open(os.path.join(opts.output, os.path.basename(filename)), 'wb')
else:
out = open(opts.output, 'wb')
messages = MessageList()
doc = Document(opts.join, messages)
doc.apply_its_rules(not(opts.nobuiltins), params=params)
if opts.itsfile is not None:
for itsfile in opts.itsfile:
doc.apply_its_file(itsfile, params=params)
doc.join_translations(translations, strict=opts.strict)
serialized = doc._doc.serialize('utf-8')
if PY3:
# For some reason, under py3, our serialized data is returns as a str.
# Let's encode it to bytes
serialized = serialized.encode('utf-8')
out.write(serialized)
out.flush()