| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2006-2009 Zuza Software Foundation
5 #
6 # This file is part of the Translate Toolkit.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 try:
26 from lxml import etree
27 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \
28 getXMLspace, setXMLspace, namespaced
29 except ImportError, e:
30 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
31
32 from translate.storage import base
33 from translate.lang import data
34
35
37 """generate match objects for all L{re_obj} matches in L{text}."""
38 start = 0
39 max = len(text)
40 while start < max:
41 m = re_obj.search(text, start)
42 if not m:
43 break
44 yield m
45 start = m.end()
46
47 #TODO: we can now do better with our proper placeables support
48 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)',
49 '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
50 re_placeholders = [re.compile(ph) for ph in placeholders]
51
52
54 """return list of regexp matchobjects for with all place holders in the
55 L{text}"""
56 matches = []
57 for re_ph in re_placeholders:
58 matches.extend(list(_findAllMatches(text, re_ph)))
59
60 # sort them so they come sequentially
61 matches.sort(lambda a, b: cmp(a.start(), b.start()))
62 return matches
63
64
66 """
67 A single unit in the file. Provisional work is done to make several
68 languages possible.
69 """
70
71 #The name of the root element of this unit type:(termEntry, tu, trans-unit)
72 rootNode = ""
73 # The name of the per language element of this unit type:(termEntry, tu,
74 # trans-unit)
75 languageNode = ""
76 #The name of the innermost element of this unit type:(term, seg)
77 textNode = ""
78
79 namespace = None
80 _default_xml_space = "preserve"
81 """The default handling of spacing in the absense of an xml:space
82 attribute.
83
84 This is mostly for correcting XLIFF behaviour."""
85
87 """Constructs a unit containing the given source string"""
88 self._rich_source = None
89 self._rich_target = None
90 if empty:
91 self._state_n = 0
92 return
93 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
94 #add descrip, note, etc.
95 super(LISAunit, self).__init__(source)
96
98 """Compares two units"""
99 if not isinstance(other, LISAunit):
100 return super(LISAunit, self).__eq__(other)
101 languageNodes = self.getlanguageNodes()
102 otherlanguageNodes = other.getlanguageNodes()
103 if len(languageNodes) != len(otherlanguageNodes):
104 return False
105 for i in range(len(languageNodes)):
106 mytext = self.getNodeText(languageNodes[i],
107 getXMLspace(self.xmlelement,
108 self._default_xml_space))
109 othertext = other.getNodeText(otherlanguageNodes[i],
110 getXMLspace(self.xmlelement,
111 self._default_xml_space))
112 if mytext != othertext:
113 #TODO:^ maybe we want to take children and notes into account
114 return False
115 return True
116
118 """Returns name in Clark notation.
119
120 For example namespaced("source") in an XLIFF document might return::
121 {urn:oasis:names:tc:xliff:document:1.1}source
122 This is needed throughout lxml.
123 """
124 return namespaced(self.namespace, name)
125
127 languageNodes = self.getlanguageNodes()
128 if len(languageNodes) > 0:
129 self.xmlelement.replace(languageNodes[0], dom_node)
130 else:
131 self.xmlelement.append(dom_node)
132
135 source_dom = property(get_source_dom, set_source_dom)
136
138 if self._rich_source is not None:
139 self._rich_source = None
140 text = data.forceunicode(text)
141 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
142
144 return self.getNodeText(self.source_dom,
145 getXMLspace(self.xmlelement,
146 self._default_xml_space))
147 source = property(getsource, setsource)
148
150 languageNodes = self.getlanguageNodes()
151 assert len(languageNodes) > 0
152 if dom_node is not None:
153 if append or len(languageNodes) == 0:
154 self.xmlelement.append(dom_node)
155 else:
156 self.xmlelement.insert(1, dom_node)
157 if not append and len(languageNodes) > 1:
158 self.xmlelement.remove(languageNodes[1])
159
161 if lang:
162 return self.getlanguageNode(lang=lang)
163 else:
164 return self.getlanguageNode(lang=None, index=1)
165 target_dom = property(get_target_dom)
166
168 """Sets the "target" string (second language), or alternatively
169 appends to the list"""
170 #XXX: we really need the language - can't really be optional, and we
171 # need to propagate it
172 if self._rich_target is not None:
173 self._rich_target = None
174 text = data.forceunicode(text)
175 # Firstly deal with reinitialising to None or setting to identical
176 # string
177 if self.gettarget() == text:
178 return
179 languageNode = self.get_target_dom(None)
180 if not text is None:
181 if languageNode is None:
182 languageNode = self.createlanguageNode(lang, text, "target")
183 self.set_target_dom(languageNode, append)
184 else:
185 if self.textNode:
186 terms = languageNode.iter(self.namespaced(self.textNode))
187 try:
188 languageNode = terms.next()
189 except StopIteration, e:
190 pass
191 languageNode.text = text
192 else:
193 self.set_target_dom(None, False)
194
196 """retrieves the "target" text (second entry), or the entry in the
197 specified language, if it exists"""
198 return self.getNodeText(self.get_target_dom(lang),
199 getXMLspace(self.xmlelement,
200 self._default_xml_space))
201 target = property(gettarget, settarget)
202
204 """Returns a xml Element setup with given parameters to represent a
205 single language entry. Has to be overridden."""
206 return None
207
209 """Create the text node in parent containing all the ph tags"""
210 matches = _getPhMatches(text)
211 if not matches:
212 parent.text = text
213 return
214
215 # Now we know there will definitely be some ph tags
216 start = matches[0].start()
217 pretext = text[:start]
218 if pretext:
219 parent.text = pretext
220 lasttag = parent
221 for i, m in enumerate(matches):
222 #pretext
223 pretext = text[start:m.start()]
224 # this will never happen with the first ph tag
225 if pretext:
226 lasttag.tail = pretext
227 #ph node
228 phnode = etree.SubElement(parent, self.namespaced("ph"))
229 phnode.set("id", str(i+1))
230 phnode.text = m.group()
231 lasttag = phnode
232 start = m.end()
233 #post text
234 if text[start:]:
235 lasttag.tail = text[start:]
236
238 """Returns a list of all nodes that contain per language information.
239 """
240 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
241
243 """Retrieves a languageNode either by language or by index"""
244 if lang is None and index is None:
245 raise KeyError("No criterea for languageNode given")
246 languageNodes = self.getlanguageNodes()
247 if lang:
248 for set in languageNodes:
249 if getXMLlang(set) == lang:
250 return set
251 else:#have to use index
252 if index >= len(languageNodes):
253 return None
254 else:
255 return languageNodes[index]
256 return None
257
259 """Retrieves the term from the given languageNode"""
260 if languageNode is None:
261 return None
262 if self.textNode:
263 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
264 if terms is None:
265 return None
266 else:
267 return getText(terms.next(), xml_space)
268 else:
269 return getText(languageNode, xml_space)
270
274
277
278 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
279 lambda self, value: self._set_property(self.namespaced('xid'), value))
280
281 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
282 lambda self, value: self._set_property(self.namespaced('rid'), value))
283
288 createfromxmlElement = classmethod(createfromxmlElement)
289
290
292 """A class representing a file store for one of the LISA file formats."""
293 UnitClass = LISAunit
294 #The root node of the XML document:
295 rootNode = ""
296 #The root node of the content section:
297 bodyNode = ""
298 #The XML skeleton to use for empty construction:
299 XMLskeleton = ""
300
301 namespace = None
302
303 - def __init__(self, inputfile=None, sourcelanguage='en',
304 targetlanguage=None, unitclass=None):
305 super(LISAfile, self).__init__(unitclass=unitclass)
306 if inputfile is not None:
307 self.parse(inputfile)
308 assert self.document.getroot().tag == self.namespaced(self.rootNode)
309 else:
310 # We strip out newlines to ensure that spaces in the skeleton
311 # doesn't interfere with the the pretty printing of lxml
312 self.parse(self.XMLskeleton.replace("\n", ""))
313 self.setsourcelanguage(sourcelanguage)
314 self.settargetlanguage(targetlanguage)
315 self.addheader()
316 self._encoding = "UTF-8"
317
321
323 """Returns name in Clark notation.
324
325 For example namespaced("source") in an XLIFF document might return::
326 {urn:oasis:names:tc:xliff:document:1.1}source
327 This is needed throughout lxml.
328 """
329 return namespaced(self.namespace, name)
330
332 """Initialises self.body so it never needs to be retrieved from the
333 XML again."""
334 self.namespace = self.document.getroot().nsmap.get(None, None)
335 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
336
338 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word?
339 """Adds and returns a new unit with the given string as first entry."""
340 newunit = self.UnitClass(source)
341 self.addunit(newunit)
342 return newunit
343
345 unit.namespace = self.namespace
346 super(LISAfile, self).addunit(unit)
347 if new:
348 self.body.append(unit.xmlelement)
349
351 """Converts to a string containing the file's XML"""
352 return etree.tostring(self.document, pretty_print=True,
353 xml_declaration=True, encoding='utf-8')
354
356 """Populates this object from the given xml string"""
357 if not hasattr(self, 'filename'):
358 self.filename = getattr(xml, 'name', '')
359 if hasattr(xml, "read"):
360 xml.seek(0)
361 posrc = xml.read()
362 xml = posrc
363 if etree.LXML_VERSION >= (2, 1, 0):
364 #Since version 2.1.0 we can pass the strip_cdata parameter to
365 #indicate that we don't want cdata to be converted to raw XML
366 parser = etree.XMLParser(strip_cdata=False)
367 else:
368 parser = etree.XMLParser()
369 self.document = etree.fromstring(xml, parser).getroottree()
370 self._encoding = self.document.docinfo.encoding
371 self.initbody()
372 assert self.document.getroot().tag == self.namespaced(self.rootNode)
373 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
374 term = self.UnitClass.createfromxmlElement(entry)
375 self.addunit(term, new=False)
376
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Nov 19 17:48:12 2010 | http://epydoc.sourceforge.net |