| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 import re
34
35 from translate.filters import helpers
36 from translate.filters import decoration
37 from translate.filters import prefilters
38 from translate.filters import spelling
39 from translate.lang import factory
40 from translate.lang import data
41 # The import of xliff could fail if the user doesn't have lxml installed. For
42 # now we try to continue gracefully to help users who aren't interested in
43 # support for XLIFF or other XML formats.
44 try:
45 from translate.storage import xliff
46 except ImportError, e:
47 xliff = None
48 # The import of xliff fail silently in the absence of lxml if another module
49 # already tried to import it unsuccessfully, so let's make 100% sure:
50 if not hasattr(xliff, "xliffunit"):
51 xliff = None
52
53 # These are some regular expressions that are compiled for use in some tests
54
55 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't
56 # cover everything we leave \w instead of specifying the exact letters as
57 # this should capture printf types defined in other platforms.
58 # extended to support Python named format specifiers
59 printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))')
60
61 # The name of the XML tag
62 tagname_re = re.compile("<[\s]*([\w\/]*)")
63
64 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
65 #TODO: remove escaped strings once usage is audited
66 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
67
68 # The whole tag
69 tag_re = re.compile("<[^>]+>")
70
71 gconf_attribute_re = re.compile('"[a-z_]+?"')
72
73
75 """Returns the name of the XML/HTML tag in string"""
76 return tagname_re.match(string).groups(1)[0]
77
78
80 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
81 list as wildcards (only allowed in positions "a" and "c"). We take a
82 shortcut by only considering "c" if "b" has already matched."""
83 a, b, c = pair
84 if (b, c) == (None, None):
85 #This is a tagname
86 return pair
87 for pattern in list:
88 x, y, z = pattern
89 if (x, y) in [(a, b), (None, b)]:
90 if z in [None, c]:
91 return pattern
92 return pair
93
94
96 """Returns all the properties in the XML/HTML tag string as
97 (tagname, propertyname, propertyvalue), but ignore those combinations
98 specified in ignore."""
99 properties = []
100 for string in strings:
101 tag = tagname(string)
102 properties += [(tag, None, None)]
103 #Now we isolate the attribute pairs.
104 pairs = property_re.findall(string)
105 for property, value, a, b in pairs:
106 #Strip the quotes:
107 value = value[1:-1]
108
109 canignore = False
110 if (tag, property, value) in ignore or \
111 intuplelist((tag, property, value), ignore) != (tag, property, value):
112 canignore = True
113 break
114 if not canignore:
115 properties += [(tag, property, value)]
116 return properties
117
118
120 """This exception signals that a Filter didn't pass, and gives an
121 explanation or a comment"""
122
124 if not isinstance(messages, list):
125 messages = [messages]
126 assert isinstance(messages[0], unicode) # Assumption: all of same type
127 joined = u", ".join(messages)
128 Exception.__init__(self, joined)
129 # Python 2.3 doesn't have .args
130 if not hasattr(self, "args"):
131 self.args = joined
132
133
135 """This exception signals that a Filter didn't pass, and the bad translation
136 might break an application (so the string will be marked fuzzy)"""
137 pass
138
139 #(tag, attribute, value) specifies a certain attribute which can be changed/
140 #ignored if it exists inside tag. In the case where there is a third element
141 #in the tuple, it indicates a property value that can be ignored if present
142 #(like defaults, for example)
143 #If a certain item is None, it indicates that it is relevant for all values of
144 #the property/tag that is specified as None. A non-None value of "value"
145 #indicates that the value of the attribute must be taken into account.
146 common_ignoretags = [(None, "xml-lang", None)]
147 common_canchangetags = [("img", "alt", None),
148 (None, "title", None),
149 (None, "dir", None),
150 (None, "lang", None),
151 ]
152 # Actually the title tag is allowed on many tags in HTML (but probably not all)
153
154
156 """object representing the configuration of a checker"""
157
158 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
159 notranslatewords=None, musttranslatewords=None,
160 validchars=None, punctuation=None, endpunctuation=None,
161 ignoretags=None, canchangetags=None, criticaltests=None,
162 credit_sources=None):
163 # Init lists
164 self.accelmarkers = self._init_list(accelmarkers)
165 self.varmatches = self._init_list(varmatches)
166 self.criticaltests = self._init_list(criticaltests)
167 self.credit_sources = self._init_list(credit_sources)
168 # Lang data
169 self.targetlanguage = targetlanguage
170 self.updatetargetlanguage(targetlanguage)
171 self.sourcelang = factory.getlanguage('en')
172 # Inits with default values
173 self.punctuation = self._init_default(data.normalized_unicode(punctuation),
174 self.lang.punctuation)
175 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation),
176 self.lang.sentenceend)
177 self.ignoretags = self._init_default(ignoretags, common_ignoretags)
178 self.canchangetags = self._init_default(canchangetags, common_canchangetags)
179 # Other data
180 # TODO: allow user configuration of untranslatable words
181 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)])
182 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)])
183 validchars = data.normalized_unicode(validchars)
184 self.validcharsmap = {}
185 self.updatevalidchars(validchars)
186
188 """initialise configuration paramaters that are lists
189
190 @type list: List
191 @param list: None (we'll initialise a blank list) or a list paramater
192 @rtype: List
193 """
194 if list is None:
195 list = []
196 return list
197
199 """initialise parameters that can have default options
200
201 @param param: the user supplied paramater value
202 @param default: default values when param is not specified
203 @return: the paramater as specified by the user of the default settings
204 """
205 if param is None:
206 return default
207 return param
208
210 """combines the info in otherconfig into this config object"""
211 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
212 self.updatetargetlanguage(self.targetlanguage)
213 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
214 self.varmatches.extend(otherconfig.varmatches)
215 self.notranslatewords.update(otherconfig.notranslatewords)
216 self.musttranslatewords.update(otherconfig.musttranslatewords)
217 self.validcharsmap.update(otherconfig.validcharsmap)
218 self.punctuation += otherconfig.punctuation
219 self.endpunctuation += otherconfig.endpunctuation
220 #TODO: consider also updating in the following cases:
221 self.ignoretags = otherconfig.ignoretags
222 self.canchangetags = otherconfig.canchangetags
223 self.criticaltests.extend(otherconfig.criticaltests)
224 self.credit_sources = otherconfig.credit_sources
225
227 """updates the map that eliminates valid characters"""
228 if validchars is None:
229 return True
230 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)])
231 self.validcharsmap.update(validcharsmap)
232
234 """Updates the target language in the config to the given target
235 language"""
236 self.lang = factory.getlanguage(langcode)
237
238
240
241 def cached_f(self, param1):
242 key = (f.__name__, param1)
243 res_cache = self.results_cache
244 if key in res_cache:
245 return res_cache[key]
246 else:
247 value = f(self, param1)
248 res_cache[key] = value
249 return value
250 return cached_f
251
252
254 """Parent Checker class which does the checking based on functions available
255 in derived classes."""
256 preconditions = {}
257
258 - def __init__(self, checkerconfig=None, excludefilters=None,
259 limitfilters=None, errorhandler=None):
260 self.errorhandler = errorhandler
261 if checkerconfig is None:
262 self.setconfig(CheckerConfig())
263 else:
264 self.setconfig(checkerconfig)
265 # exclude functions defined in UnitChecker from being treated as tests.
266 self.helperfunctions = {}
267 for functionname in dir(UnitChecker):
268 function = getattr(self, functionname)
269 if callable(function):
270 self.helperfunctions[functionname] = function
271 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
272 self.results_cache = {}
273
275 """returns dictionary of available filters, including/excluding those in
276 the given lists"""
277 filters = {}
278 if limitfilters is None:
279 # use everything available unless instructed
280 limitfilters = dir(self)
281 if excludefilters is None:
282 excludefilters = {}
283 for functionname in limitfilters:
284 if functionname in excludefilters:
285 continue
286 if functionname in self.helperfunctions:
287 continue
288 if functionname == "errorhandler":
289 continue
290 filterfunction = getattr(self, functionname, None)
291 if not callable(filterfunction):
292 continue
293 filters[functionname] = filterfunction
294 return filters
295
297 """sets the accelerator list"""
298 self.config = config
299 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
300 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
301 for startmatch, endmatch in self.config.varmatches]
302 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch,
303 prefilters.varnone)
304 for startmatch, endmatch in self.config.varmatches]
305
307 """Sets the filename that a checker should use for evaluating
308 suggestions."""
309 self.suggestion_store = store
310 if self.suggestion_store:
311 self.suggestion_store.require_index()
312
316 filtervariables = cache_results(filtervariables)
317
321 removevariables = cache_results(removevariables)
322
324 """filter out accelerators from str1"""
325 return helpers.multifilter(str1, self.accfilters, None)
326 filteraccelerators = cache_results(filteraccelerators)
327
329 """filter out accelerators from str1"""
330 return helpers.multifilter(str1, self.accfilters, acceptlist)
331
333 """replaces words with punctuation with their unpunctuated
334 equivalents"""
335 return prefilters.filterwordswithpunctuation(str1)
336 filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
337
341 filterxml = cache_results(filterxml)
342
344 """Runs the given test on the given unit.
345
346 Note that this can raise a FilterFailure as part of normal operation"""
347 return test(unit)
348
350 """run all the tests in this suite, return failures as testname,
351 message_or_exception"""
352 self.results_cache = {}
353 failures = {}
354 ignores = self.config.lang.ignoretests[:]
355 functionnames = self.defaultfilters.keys()
356 priorityfunctionnames = self.preconditions.keys()
357 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
358 for functionname in priorityfunctionnames + otherfunctionnames:
359 if functionname in ignores:
360 continue
361 filterfunction = getattr(self, functionname, None)
362 # this filterfunction may only be defined on another checker if
363 # using TeeChecker
364 if filterfunction is None:
365 continue
366 filtermessage = filterfunction.__doc__
367 try:
368 filterresult = self.run_test(filterfunction, unit)
369 except FilterFailure, e:
370 filterresult = False
371 filtermessage = e.args[0]
372 except Exception, e:
373 if self.errorhandler is None:
374 raise ValueError("error in filter %s: %r, %r, %s" % \
375 (functionname, unit.source, unit.target, e))
376 else:
377 filterresult = self.errorhandler(functionname, unit.source,
378 unit.target, e)
379 if not filterresult:
380 # we test some preconditions that aren't actually a cause for
381 # failure
382 if functionname in self.defaultfilters:
383 failures[functionname] = filtermessage
384 if functionname in self.preconditions:
385 for ignoredfunctionname in self.preconditions[functionname]:
386 ignores.append(ignoredfunctionname)
387 self.results_cache = {}
388 return failures
389
390
392 """A checker that passes source and target strings to the checks, not the
393 whole unit.
394
395 This provides some speedup and simplifies testing."""
396
397 - def __init__(self, checkerconfig=None, excludefilters=None,
398 limitfilters=None, errorhandler=None):
399 super(TranslationChecker, self).__init__(checkerconfig, excludefilters,
400 limitfilters, errorhandler)
401
403 """Runs the given test on the given unit.
404
405 Note that this can raise a FilterFailure as part of normal operation."""
406 if self.hasplural:
407 filtermessages = []
408 filterresult = True
409 for pluralform in unit.target.strings:
410 try:
411 if not test(self.str1, unicode(pluralform)):
412 filterresult = False
413 except FilterFailure, e:
414 filterresult = False
415 filtermessages.append(unicode(e.args))
416 if not filterresult and filtermessages:
417 raise FilterFailure(filtermessages)
418 else:
419 return filterresult
420 else:
421 return test(self.str1, self.str2)
422
424 """Do some optimisation by caching some data of the unit for the benefit
425 of run_test()."""
426 self.str1 = data.normalized_unicode(unit.source) or u""
427 self.str2 = data.normalized_unicode(unit.target) or u""
428 self.hasplural = unit.hasplural()
429 self.locations = unit.getlocations()
430 return super(TranslationChecker, self).run_filters(unit)
431
432
434 """A Checker that controls multiple checkers."""
435
436 - def __init__(self, checkerconfig=None, excludefilters=None,
437 limitfilters=None, checkerclasses=None, errorhandler=None,
438 languagecode=None):
439 """construct a TeeChecker from the given checkers"""
440 self.limitfilters = limitfilters
441 if checkerclasses is None:
442 checkerclasses = [StandardChecker]
443 self.checkers = [checkerclass(checkerconfig=checkerconfig,
444 excludefilters=excludefilters,
445 limitfilters=limitfilters,
446 errorhandler=errorhandler) for checkerclass in checkerclasses]
447 if languagecode:
448 for checker in self.checkers:
449 checker.config.updatetargetlanguage(languagecode)
450 # Let's hook up the language specific checker
451 lang_checker = self.checkers[0].config.lang.checker
452 if lang_checker:
453 self.checkers.append(lang_checker)
454
455 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
456 self.config = checkerconfig or self.checkers[0].config
457
459 """returns dictionary of available filters, including/excluding those in
460 the given lists"""
461 if excludefilters is None:
462 excludefilters = {}
463 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
464 self.combinedfilters = {}
465 for filters in filterslist:
466 self.combinedfilters.update(filters)
467 # TODO: move this somewhere more sensible (a checkfilters method?)
468 if limitfilters is not None:
469 for filtername in limitfilters:
470 if not filtername in self.combinedfilters:
471 import sys
472 print >> sys.stderr, "warning: could not find filter %s" % filtername
473 return self.combinedfilters
474
476 """run all the tests in the checker's suites"""
477 failures = {}
478 for checker in self.checkers:
479 failures.update(checker.run_filters(unit))
480 return failures
481
483 """Sets the filename that a checker should use for evaluating
484 suggestions."""
485 for checker in self.checkers:
486 checker.setsuggestionstore(store)
487
488
490 """The basic test suite for source -> target translations."""
491
493 """checks whether a string has been translated at all"""
494 str2 = prefilters.removekdecomments(str2)
495 return not (len(str1.strip()) > 0 and len(str2) == 0)
496
498 """checks whether a translation is basically identical to the original
499 string"""
500 str1 = self.filteraccelerators(self.removevariables(str1)).strip()
501 str2 = self.filteraccelerators(self.removevariables(str2)).strip()
502 if len(str1) < 2:
503 return True
504 # If the whole string is upperase, or nothing in the string can go
505 # towards uppercase, let's assume there is nothing translatable
506 # TODO: reconsider
507 if (str1.isupper() or str1.upper() == str1) and str1 == str2:
508 return True
509 if self.config.notranslatewords:
510 words1 = str1.split()
511 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
512 #currently equivalent to:
513 # if len(words1) == 1 and words1[0] in self.config.notranslatewords:
514 #why do we only test for one notranslate word?
515 return True
516 # we could also check for things like str1.isnumeric(), but the test
517 # above (str1.upper() == str1) makes this unnecessary
518 if str1.lower() == str2.lower():
519 raise FilterFailure(u"please translate")
520 return True
521
523 """checks whether a translation only contains spaces"""
524 len1 = len(str1.strip())
525 len2 = len(str2.strip())
526 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
527
529 """checks whether a translation is much shorter than the original
530 string"""
531 len1 = len(str1.strip())
532 len2 = len(str2.strip())
533 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
534
536 """checks whether a translation is much longer than the original
537 string"""
538 len1 = len(str1.strip())
539 len2 = len(str2.strip())
540 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
541
543 """checks whether escaping is consistent between the two strings"""
544 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")):
545 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word])
546 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word])
547 raise SeriousFilterFailure(u"escapes in original (%s) don't match "
548 "escapes in translation (%s)" %
549 (escapes1, escapes2))
550 else:
551 return True
552
554 """checks whether newlines are consistent between the two strings"""
555 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")):
556 raise FilterFailure(u"line endings in original don't match "
557 "line endings in translation")
558 else:
559 return True
560
562 """checks whether tabs are consistent between the two strings"""
563 if not helpers.countmatch(str1, str2, "\t"):
564 raise SeriousFilterFailure(u"tabs in original don't match "
565 "tabs in translation")
566 else:
567 return True
568
570 """checks whether singlequoting is consistent between the two strings"""
571 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
572 str1 = self.config.lang.punctranslate(str1)
573 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
574 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
575
577 """checks whether doublequoting is consistent between the two strings"""
578 str1 = self.filteraccelerators(self.filtervariables(str1))
579 str1 = self.filterxml(str1)
580 str1 = self.config.lang.punctranslate(str1)
581 str2 = self.filteraccelerators(self.filtervariables(str2))
582 str2 = self.filterxml(str2)
583 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«",
584 u"»", u"“", u"”"))
585
587 """checks for bad double-spaces by comparing to original"""
588 str1 = self.filteraccelerators(str1)
589 str2 = self.filteraccelerators(str2)
590 return helpers.countmatch(str1, str2, u" ")
591
593 """checks for bad spacing after punctuation"""
594 # Convert all nbsp to space, and just check spaces. Useful intermediate
595 # step to stricter nbsp checking?
596 str1 = self.filteraccelerators(self.filtervariables(str1))
597 str1 = self.config.lang.punctranslate(str1)
598 str1 = str1.replace(u"\u00a0", u" ")
599 if str1.find(u" ") == -1:
600 return True
601 str2 = self.filteraccelerators(self.filtervariables(str2))
602 str2 = str2.replace(u"\u00a0", u" ")
603 for puncchar in self.config.punctuation:
604 plaincount1 = str1.count(puncchar)
605 if not plaincount1:
606 continue
607 plaincount2 = str2.count(puncchar)
608 if plaincount1 != plaincount2:
609 continue
610 spacecount1 = str1.count(puncchar + u" ")
611 spacecount2 = str2.count(puncchar + u" ")
612 if spacecount1 != spacecount2:
613 # handle extra spaces that are because of transposed punctuation
614 if abs(spacecount1 - spacecount2) == 1 and str1.endswith(puncchar) != str2.endswith(puncchar):
615 continue
616 return False
617 return True
618
620 """checks whether printf format strings match"""
621 count1 = count2 = plural = None
622 # self.hasplural only set by run_filters, not always available
623 if 'hasplural' in self.__dict__:
624 plural = self.hasplural
625 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
626 count2 = var_num2 + 1
627 str2key = match2.group('key')
628 if match2.group('ord'):
629 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
630 count1 = var_num1 + 1
631 if int(match2.group('ord')) == var_num1 + 1:
632 if match2.group('fullvar') != match1.group('fullvar'):
633 return 0
634 elif str2key:
635 str1key = None
636 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
637 count1 = var_num1 + 1
638 if match1.group('key') and str2key == match1.group('key'):
639 str1key = match1.group('key')
640 # '%.0s' "placeholder" in plural will match anything
641 if plural and match2.group('fullvar') == '.0s':
642 continue
643 if match1.group('fullvar') != match2.group('fullvar'):
644 return 0
645 if str1key == None:
646 return 0
647 else:
648 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
649 count1 = var_num1 + 1
650 # '%.0s' "placeholder" in plural will match anything
651 if plural and match2.group('fullvar') == '.0s':
652 continue
653 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
654 return 0
655
656 if count2 is None:
657 if list(printf_pat.finditer(str1)):
658 return 0
659
660 if (count1 or count2) and (count1 != count2):
661 return 0
662 return 1
663
665 """checks whether accelerators are consistent between the two strings"""
666 str1 = self.filtervariables(str1)
667 str2 = self.filtervariables(str2)
668 messages = []
669 for accelmarker in self.config.accelmarkers:
670 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
671 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
672 count1, countbad1 = counter1(str1)
673 count2, countbad2 = counter2(str2)
674 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
675 accel2, bad2 = getaccel(str2)
676 if count1 == count2:
677 continue
678 if count1 == 1 and count2 == 0:
679 if countbad2 == 1:
680 messages.append(u"accelerator %s appears before an invalid "
681 "accelerator character '%s' (eg. space)" %
682 (accelmarker, bad2[0]))
683 else:
684 messages.append(u"accelerator %s is missing from translation" %
685 accelmarker)
686 elif count1 == 0:
687 messages.append(u"accelerator %s does not occur in original "
688 "and should not be in translation" % accelmarker)
689 elif count1 == 1 and count2 > count1:
690 messages.append(u"accelerator %s is repeated in translation" %
691 accelmarker)
692 else:
693 messages.append(u"accelerator %s occurs %d time(s) in original "
694 "and %d time(s) in translation" %
695 (accelmarker, count1, count2))
696 if messages:
697 if "accelerators" in self.config.criticaltests:
698 raise SeriousFilterFailure(messages)
699 else:
700 raise FilterFailure(messages)
701 return True
702
703 # def acceleratedvariables(self, str1, str2):
704 # """checks that no variables are accelerated"""
705 # messages = []
706 # for accelerator in self.config.accelmarkers:
707 # for variablestart, variableend in self.config.varmatches:
708 # error = accelerator + variablestart
709 # if str1.find(error) >= 0:
710 # messages.append(u"original has an accelerated variable")
711 # if str2.find(error) >= 0:
712 # messages.append(u"translation has an accelerated variable")
713 # if messages:
714 # raise FilterFailure(messages)
715 # return True
716
718 """checks whether variables of various forms are consistent between the
719 two strings"""
720 messages = []
721 mismatch1, mismatch2 = [], []
722 varnames1, varnames2 = [], []
723 for startmarker, endmarker in self.config.varmatches:
724 varchecker = decoration.getvariables(startmarker, endmarker)
725 if startmarker and endmarker:
726 if isinstance(endmarker, int):
727 redecorate = lambda var: startmarker + var
728 else:
729 redecorate = lambda var: startmarker + var + endmarker
730 elif startmarker:
731 redecorate = lambda var: startmarker + var
732 else:
733 redecorate = lambda var: var
734 vars1 = varchecker(str1)
735 vars2 = varchecker(str2)
736 if vars1 != vars2:
737 # we use counts to compare so we can handle multiple variables
738 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], \
739 [var for var in vars2 if vars1.count(var) < vars2.count(var)]
740 # filter variable names we've already seen, so they aren't
741 # matched by more than one filter...
742 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
743 varnames1.extend(vars1)
744 varnames2.extend(vars2)
745 vars1 = map(redecorate, vars1)
746 vars2 = map(redecorate, vars2)
747 mismatch1.extend(vars1)
748 mismatch2.extend(vars2)
749 if mismatch1:
750 messages.append(u"do not translate: %s" % u", ".join(mismatch1))
751 elif mismatch2:
752 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2))
753 if messages and mismatch1:
754 raise SeriousFilterFailure(messages)
755 elif messages:
756 raise FilterFailure(messages)
757 return True
758
760 """checks that function names are not translated"""
761 # We can't just use helpers.funcmatch() since it doesn't ignore order
762 return not set(decoration.getfunctions(str1)).symmetric_difference(set(decoration.getfunctions(str2)))
763
765 """checks that emails are not translated"""
766 return helpers.funcmatch(str1, str2, decoration.getemails)
767
769 """checks that URLs are not translated"""
770 return helpers.funcmatch(str1, str2, decoration.geturls)
771
773 """checks whether numbers of various forms are consistent between the
774 two strings"""
775 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
776
778 """checks whether whitespace at the beginning of the strings matches"""
779 return helpers.funcmatch(str1, str2, decoration.spacestart)
780
782 """checks whether whitespace at the end of the strings matches"""
783 str1 = self.config.lang.punctranslate(str1)
784 return helpers.funcmatch(str1, str2, decoration.spaceend)
785
787 """checks whether punctuation at the beginning of the strings match"""
788 str1 = self.filterxml(self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))))
789 str1 = self.config.lang.punctranslate(str1)
790 str2 = self.filterxml(self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))))
791 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
792
794 """checks whether punctuation at the end of the strings match"""
795 str1 = self.filtervariables(str1)
796 str1 = self.config.lang.punctranslate(str1)
797 str2 = self.filtervariables(str2)
798 str1 = str1.rstrip()
799 str2 = str2.rstrip()
800 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
801
803 """checks that strings that are purely punctuation are not changed"""
804 # this test is a subset of startandend
805 if (decoration.ispurepunctuation(str1)):
806 return str1 == str2
807 else:
808 return not decoration.ispurepunctuation(str2)
809
811 """checks that the number of brackets in both strings match"""
812 str1 = self.filtervariables(str1)
813 str2 = self.filtervariables(str2)
814 messages = []
815 missing = []
816 extra = []
817 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"):
818 count1 = str1.count(bracket)
819 count2 = str2.count(bracket)
820 if count2 < count1:
821 missing.append(u"'%s'" % bracket)
822 elif count2 > count1:
823 extra.append(u"'%s'" % bracket)
824 if missing:
825 messages.append(u"translation is missing %s" % u", ".join(missing))
826 if extra:
827 messages.append(u"translation has extra %s" % u", ".join(extra))
828 if messages:
829 raise FilterFailure(messages)
830 return True
831
833 """checks that the number of sentences in both strings match"""
834 str1 = self.filteraccelerators(str1)
835 str2 = self.filteraccelerators(str2)
836 sentences1 = len(self.config.sourcelang.sentences(str1))
837 sentences2 = len(self.config.lang.sentences(str2))
838 if not sentences1 == sentences2:
839 raise FilterFailure(u"The number of sentences differ: "
840 "%d versus %d" % (sentences1, sentences2))
841 return True
842
844 """checks that options are not translated"""
845 str1 = self.filtervariables(str1)
846 for word1 in str1.split():
847 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum():
848 parts = word1.split(u"=")
849 if not parts[0] in str2:
850 raise FilterFailure(u"The option %s does not occur or is "
851 "translated in the translation." % parts[0])
852 if len(parts) > 1 and parts[1] in str2:
853 raise FilterFailure(u"The parameter %(param)s in option %(option)s "
854 "is not translated." % {"param": parts[1],
855 "option": parts[0]})
856 return True
857
859 """checks that the message starts with the correct capitalisation"""
860 str1 = self.filteraccelerators(str1)
861 str2 = self.filteraccelerators(str2)
862 if len(str1) > 1 and len(str2) > 1:
863 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
864 if len(str1) == 0 and len(str2) == 0:
865 return True
866 if len(str1) == 0 or len(str2) == 0:
867 return False
868 return True
869
871 """checks the capitalisation of two strings isn't wildly different"""
872 str1 = self.removevariables(str1)
873 str2 = self.removevariables(str2)
874 # TODO: review this. The 'I' is specific to English, so it probably
875 # serves no purpose to get sourcelang.sentenceend
876 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1)
877 capitals1 = helpers.filtercount(str1, unicode.isupper)
878 capitals2 = helpers.filtercount(str2, unicode.isupper)
879 alpha1 = helpers.filtercount(str1, unicode.isalpha)
880 alpha2 = helpers.filtercount(str2, unicode.isalpha)
881 # Capture the all caps case
882 if capitals1 == alpha1:
883 return capitals2 == alpha2
884 # some heuristic tests to try and see that the style of capitals is
885 # vaguely the same
886 if capitals1 == 0 or capitals1 == 1:
887 return capitals2 == capitals1
888 elif capitals1 < len(str1) / 10:
889 return capitals2 <= len(str2) / 8
890 elif len(str1) < 10:
891 return abs(capitals1 - capitals2) < 3
892 elif capitals1 > len(str1) * 6 / 10:
893 return capitals2 > len(str2) * 6 / 10
894 else:
895 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
896
898 """checks that acronyms that appear are unchanged"""
899 acronyms = []
900 allowed = []
901 for startmatch, endmatch in self.config.varmatches:
902 allowed += decoration.getvariables(startmatch, endmatch)(str1)
903 allowed += self.config.musttranslatewords.keys()
904 str1 = self.filteraccelerators(self.filtervariables(str1))
905 iter = self.config.lang.word_iter(str1)
906 str2 = self.filteraccelerators(self.filtervariables(str2))
907 #TODO: strip XML? - should provide better error messsages
908 # see mail/chrome/messanger/smime.properties.po
909 #TODO: consider limiting the word length for recognising acronyms to
910 #something like 5/6 characters
911 for word in iter:
912 if word.isupper() and len(word) > 1 and word not in allowed:
913 if str2.find(word) == -1:
914 acronyms.append(word)
915 if acronyms:
916 raise FilterFailure(u"acronyms should not be translated: %s" %
917 u", ".join(acronyms))
918 return True
919
921 """checks for repeated words in the translation"""
922 lastword = ""
923 without_newlines = "\n".join(str2.split("\n"))
924 words = self.filteraccelerators(self.removevariables(self.filterxml(without_newlines))).replace(u".", u"").lower().split()
925 for word in words:
926 if word == lastword and word not in self.config.lang.validdoublewords:
927 raise FilterFailure(u"The word '%s' is repeated" % word)
928 lastword = word
929 return True
930
932 """checks that words configured as untranslatable appear in the
933 translation too"""
934 if not self.config.notranslatewords:
935 return True
936 str1 = self.filtervariables(str1)
937 str2 = self.filtervariables(str2)
938 #The above is full of strange quotes and things in utf-8 encoding.
939 #single apostrophe perhaps problematic in words like "doesn't"
940 for seperator in self.config.punctuation:
941 str1 = str1.replace(seperator, u" ")
942 str2 = str2.replace(seperator, u" ")
943 words1 = self.filteraccelerators(str1).split()
944 words2 = self.filteraccelerators(str2).split()
945 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
946 if stopwords:
947 raise FilterFailure(u"do not translate: %s" %
948 (u", ".join(stopwords)))
949 return True
950
952 """checks that words configured as definitely translatable don't appear
953 in the translation"""
954 if not self.config.musttranslatewords:
955 return True
956 str1 = self.removevariables(str1)
957 str2 = self.removevariables(str2)
958 # The above is full of strange quotes and things in utf-8 encoding.
959 # single apostrophe perhaps problematic in words like "doesn't"
960 for seperator in self.config.punctuation:
961 str1 = str1.replace(seperator, u" ")
962 str2 = str2.replace(seperator, u" ")
963 words1 = self.filteraccelerators(str1).split()
964 words2 = self.filteraccelerators(str2).split()
965 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
966 if stopwords:
967 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords)))
968 return True
969
971 """checks that only characters specified as valid appear in the
972 translation"""
973 if not self.config.validcharsmap:
974 return True
975 invalid1 = str1.translate(self.config.validcharsmap)
976 invalid2 = str2.translate(self.config.validcharsmap)
977 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
978 if invalidchars:
979 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
980 return True
981
983 """checks that file paths have not been translated"""
984 for word1 in self.filteraccelerators(str1).split():
985 if word1.startswith(u"/"):
986 if not helpers.countsmatch(str1, str2, (word1,)):
987 return False
988 return True
989
1017
1019 """checks to ensure that no KDE style comments appear in the
1020 translation"""
1021 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
1022
1024 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
1025 return str2.find(u"#-#-#-#-#") == -1
1026
1028 """checks for English style plural(s) for you to review"""
1029
1030 def numberofpatterns(string, patterns):
1031 number = 0
1032 for pattern in patterns:
1033 number += len(re.findall(pattern, string))
1034 return number
1035
1036 sourcepatterns = ["\(s\)"]
1037 targetpatterns = ["\(s\)"]
1038 sourcecount = numberofpatterns(str1, sourcepatterns)
1039 targetcount = numberofpatterns(str2, targetpatterns)
1040 if self.config.lang.nplurals == 1:
1041 return not targetcount
1042 return sourcecount == targetcount
1043
1045 """checks words that don't pass a spell check"""
1046 if not self.config.targetlanguage:
1047 return True
1048 if not spelling.available:
1049 return True
1050 # TODO: filterxml?
1051 str1 = self.filteraccelerators_by_list(self.filtervariables(str1),
1052 self.config.sourcelang.validaccel)
1053 str2 = self.filteraccelerators_by_list(self.filtervariables(str2),
1054 self.config.lang.validaccel)
1055 ignore1 = []
1056 messages = []
1057 for word, index, suggestions in spelling.check(str1, lang="en"):
1058 ignore1.append(word)
1059 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
1060 if word in self.config.notranslatewords:
1061 continue
1062 if word in ignore1:
1063 continue
1064 # hack to ignore hyphenisation rules
1065 if word in suggestions:
1066 continue
1067 messages.append(u"check spelling of %s (could be %s)" %
1068 (word, u" / ".join(suggestions[:5])))
1069 if messages:
1070 raise FilterFailure(messages)
1071 return True
1072
1074 """checks for messages containing translation credits instead of normal
1075 translations."""
1076 return not str1 in self.config.credit_sources
1077
1078 # If the precondition filter is run and fails then the other tests listed are ignored
1079 preconditions = {
1080 "untranslated": ("simplecaps", "variables", "startcaps",
1081 "accelerators", "brackets", "endpunc",
1082 "acronyms", "xmltags", "startpunc",
1083 "endwhitespace", "startwhitespace",
1084 "escapes", "doublequoting", "singlequoting",
1085 "filepaths", "purepunc", "doublespacing",
1086 "sentencecount", "numbers", "isfuzzy",
1087 "isreview", "notranslatewords", "musttranslatewords",
1088 "emails", "simpleplurals", "urls", "printf",
1089 "tabs", "newlines", "functions", "options",
1090 "blank", "nplurals", "gconf"),
1091 "blank": ("simplecaps", "variables", "startcaps",
1092 "accelerators", "brackets", "endpunc",
1093 "acronyms", "xmltags", "startpunc",
1094 "endwhitespace", "startwhitespace",
1095 "escapes", "doublequoting", "singlequoting",
1096 "filepaths", "purepunc", "doublespacing",
1097 "sentencecount", "numbers", "isfuzzy",
1098 "isreview", "notranslatewords", "musttranslatewords",
1099 "emails", "simpleplurals", "urls", "printf",
1100 "tabs", "newlines", "functions", "options",
1101 "gconf"),
1102 "credits": ("simplecaps", "variables", "startcaps",
1103 "accelerators", "brackets", "endpunc",
1104 "acronyms", "xmltags", "startpunc",
1105 "escapes", "doublequoting", "singlequoting",
1106 "filepaths", "doublespacing",
1107 "sentencecount", "numbers",
1108 "emails", "simpleplurals", "urls", "printf",
1109 "tabs", "newlines", "functions", "options"),
1110 "purepunc": ("startcaps", "options"),
1111 # This is causing some problems since Python 2.6, as
1112 # startcaps is now seen as an important one to always execute
1113 # and could now be done before it is blocked by a failing
1114 # "untranslated" or "blank" test. This is probably happening
1115 # due to slightly different implementation of the internal
1116 # dict handling since Python 2.6. We should never have relied
1117 # on this ordering anyway.
1118 #"startcaps": ("simplecaps",),
1119 "endwhitespace": ("endpunc",),
1120 "startwhitespace": ("startpunc",),
1121 "unchanged": ("doublewords",),
1122 "compendiumconflicts": ("accelerators", "brackets", "escapes",
1123 "numbers", "startpunc", "long", "variables",
1124 "startcaps", "sentencecount", "simplecaps",
1125 "doublespacing", "endpunc", "xmltags",
1126 "startwhitespace", "endwhitespace",
1127 "singlequoting", "doublequoting",
1128 "filepaths", "purepunc", "doublewords", "printf"),
1129 }
1130
1131 # code to actually run the tests (use unittest?)
1132
1133 openofficeconfig = CheckerConfig(
1134 accelmarkers=["~"],
1135 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"),
1136 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0),
1137 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
1138 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"),
1139 ("img", "width", None), ("img", "height", None)],
1140 canchangetags=[("link", "name", None)],
1141 )
1142
1143
1145
1147 checkerconfig = kwargs.get("checkerconfig", None)
1148 if checkerconfig is None:
1149 checkerconfig = CheckerConfig()
1150 kwargs["checkerconfig"] = checkerconfig
1151 checkerconfig.update(openofficeconfig)
1152 StandardChecker.__init__(self, **kwargs)
1153
1154 mozillaconfig = CheckerConfig(
1155 accelmarkers=["&"],
1156 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None),
1157 ("#", 1), ("${", "}"), ("$(^", ")")],
1158 criticaltests=["accelerators"],
1159 )
1160
1161
1163
1165 checkerconfig = kwargs.get("checkerconfig", None)
1166 if checkerconfig is None:
1167 checkerconfig = CheckerConfig()
1168 kwargs["checkerconfig"] = checkerconfig
1169 checkerconfig.update(mozillaconfig)
1170 StandardChecker.__init__(self, **kwargs)
1171
1179
1180 drupalconfig = CheckerConfig(
1181 varmatches=[("%", None), ("@", None), ("!", None)],
1182 )
1183
1184
1186
1188 checkerconfig = kwargs.get("checkerconfig", None)
1189 if checkerconfig is None:
1190 checkerconfig = CheckerConfig()
1191 kwargs["checkerconfig"] = checkerconfig
1192 checkerconfig.update(drupalconfig)
1193 StandardChecker.__init__(self, **kwargs)
1194
1195 gnomeconfig = CheckerConfig(
1196 accelmarkers=["_"],
1197 varmatches=[("%", 1), ("$(", ")")],
1198 credit_sources=[u"translator-credits"],
1199 )
1200
1201
1203
1205 checkerconfig = kwargs.get("checkerconfig", None)
1206 if checkerconfig is None:
1207 checkerconfig = CheckerConfig()
1208 kwargs["checkerconfig"] = checkerconfig
1209 checkerconfig.update(gnomeconfig)
1210 StandardChecker.__init__(self, **kwargs)
1211
1213 """Checks if we have any gconf config settings translated."""
1214 for location in self.locations:
1215 if location.find('schemas.in') != -1:
1216 gconf_attributes = gconf_attribute_re.findall(str1)
1217 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
1218 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2]
1219 if stopwords:
1220 raise FilterFailure(u"do not translate gconf attribute: %s" %
1221 (u", ".join(stopwords)))
1222 return True
1223
1224 kdeconfig = CheckerConfig(
1225 accelmarkers=["&"],
1226 varmatches=[("%", 1)],
1227 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"],
1228 )
1229
1230
1232
1234 # TODO allow setup of KDE plural and translator comments so that they do
1235 # not create false postives
1236 checkerconfig = kwargs.get("checkerconfig", None)
1237 if checkerconfig is None:
1238 checkerconfig = CheckerConfig()
1239 kwargs["checkerconfig"] = checkerconfig
1240 checkerconfig.update(kdeconfig)
1241 StandardChecker.__init__(self, **kwargs)
1242
1243 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")])
1244
1245
1247
1249 checkerconfig = kwargs.get("checkerconfig", None)
1250 if checkerconfig is None:
1251 checkerconfig = CheckerConfig()
1252 kwargs["checkerconfig"] = checkerconfig
1253 checkerconfig.update(cclicenseconfig)
1254 StandardChecker.__init__(self, **kwargs)
1255
1256 projectcheckers = {
1257 "openoffice": OpenOfficeChecker,
1258 "mozilla": MozillaChecker,
1259 "kde": KdeChecker,
1260 "wx": KdeChecker,
1261 "gnome": GnomeChecker,
1262 "creativecommons": CCLicenseChecker,
1263 "drupal": DrupalChecker,
1264 }
1265
1266
1268 """The standard checks for common checks on translation units."""
1269
1273
1277
1279 """Checks for the correct number of noun forms for plural
1280 translations."""
1281 if unit.hasplural():
1282 # if we don't have a valid nplurals value, don't run the test
1283 nplurals = self.config.lang.nplurals
1284 if nplurals > 0:
1285 return len(unit.target.strings) == nplurals
1286 return True
1287
1289 """Checks if there is at least one suggested translation for this
1290 unit."""
1291 self.suggestion_store = getattr(self, 'suggestion_store', None)
1292 suggestions = []
1293 if self.suggestion_store:
1294 suggestions = self.suggestion_store.findunits(unit.source)
1295 elif xliff and isinstance(unit, xliff.xliffunit):
1296 # TODO: we probably want to filter them somehow
1297 suggestions = unit.getalttrans()
1298 return not bool(suggestions)
1299
1300
1302 """verifies that the tests pass for a pair of strings"""
1303 from translate.storage import base
1304 str1 = data.normalized_unicode(str1)
1305 str2 = data.normalized_unicode(str2)
1306 unit = base.TranslationUnit(str1)
1307 unit.target = str2
1308 checker = StandardChecker(excludefilters=ignorelist)
1309 failures = checker.run_filters(unit)
1310 for test in failures:
1311 print "failure: %s: %s\n %r\n %r" % \
1312 (test, failures[test], str1, str2)
1313 return failures
1314
1315
1317 """runs test on a batch of string pairs"""
1318 passed, numpairs = 0, len(pairs)
1319 for str1, str2 in pairs:
1320 if runtests(str1, str2):
1321 passed += 1
1322 print
1323 print "total: %d/%d pairs passed" % (passed, numpairs)
1324
1325
1326 if __name__ == '__main__':
1327 testset = [(r"simple", r"somple"),
1328 (r"\this equals \that", r"does \this equal \that?"),
1329 (r"this \'equals\' that", r"this 'equals' that"),
1330 (r" start and end! they must match.",
1331 r"start and end! they must match."),
1332 (r"check for matching %variables marked like %this",
1333 r"%this %variable is marked"),
1334 (r"check for mismatching %variables marked like %this",
1335 r"%that %variable is marked"),
1336 (r"check for mismatching %variables% too",
1337 r"how many %variable% are marked"),
1338 (r"%% %%", r"%%"),
1339 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1340 (r"simple lowercase", r"it is all lowercase"),
1341 (r"simple lowercase", r"It Is All Lowercase"),
1342 (r"Simple First Letter Capitals", r"First Letters"),
1343 (r"SIMPLE CAPITALS", r"First Letters"),
1344 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1345 (r"forgot to translate", r" "),
1346 ]
1347 batchruntests(testset)
1348
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Nov 19 17:48:57 2010 | http://epydoc.sourceforge.net |