Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  import copy 
 26  import cStringIO 
 27  import re 
 28  import urllib 
 29   
 30  from translate.lang import data 
 31  from translate.misc.multistring import multistring 
 32  from translate.misc import quote 
 33  from translate.misc import textwrap 
 34  from translate.storage import pocommon, base, poparser 
 35  from translate.storage.pocommon import encodingToUse 
 36   
 37  lsep = "\n#: " 
 38  """Seperator for #: entries""" 
 39   
 40  # general functions for quoting / unquoting po strings 
 41   
 42  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 43  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 44   
 45   
46 -def escapeforpo(line):
47 """Escapes a line for po format. assumes no \n occurs in the line. 48 49 @param line: unescaped text 50 """ 51 special_locations = [] 52 for special_key in po_escape_map: 53 special_locations.extend(quote.find_all(line, special_key)) 54 special_locations = dict.fromkeys(special_locations).keys() 55 special_locations.sort() 56 escaped_line = "" 57 last_location = 0 58 for location in special_locations: 59 escaped_line += line[last_location:location] 60 escaped_line += po_escape_map[line[location:location+1]] 61 last_location = location + 1 62 escaped_line += line[last_location:] 63 return escaped_line
64 65
66 -def unescapehandler(escape):
67 return po_unescape_map.get(escape, escape)
68 69
70 -def wrapline(line):
71 """Wrap text for po files.""" 72 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 73 74 # Lines should not start with a space... 75 if len(wrappedlines) > 1: 76 for index, line in enumerate(wrappedlines[1:]): 77 if line.startswith(' '): 78 # Remove the space at the beginning of the line: 79 wrappedlines[index+1] = line[1:] 80 81 # Append a space to the previous line: 82 wrappedlines[index] += ' ' 83 return wrappedlines
84 85
86 -def quoteforpo(text):
87 """quotes the given text for a PO file, returning quoted and escaped lines""" 88 polines = [] 89 if text is None: 90 return polines 91 lines = text.split("\n") 92 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 93 if len(lines) != 2 or lines[1]: 94 polines.extend(['""']) 95 for line in lines[:-1]: 96 #TODO: We should only wrap after escaping 97 lns = wrapline(line) 98 if len(lns) > 0: 99 for ln in lns[:-1]: 100 polines.extend(['"' + escapeforpo(ln) + '"']) 101 if lns[-1]: 102 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 103 else: 104 polines.extend(['"\\n"']) 105 if lines[-1]: 106 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 107 return polines
108 109
110 -def extractpoline(line):
111 """Remove quote and unescape line from po file. 112 113 @param line: a quoted line from a po file (msgid or msgstr) 114 """ 115 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 116 return extracted
117 118
119 -def unquotefrompo(postr):
120 return "".join([extractpoline(line) for line in postr])
121 122
123 -def is_null(lst):
124 return lst == [] or len(lst) == 1 and lst[0] == '""'
125 126
127 -def extractstr(string):
128 left = string.find('"') 129 right = string.rfind('"') 130 if right > -1: 131 return string[left:right+1] 132 else: 133 return string[left:] + '"'
134 135
136 -class pounit(pocommon.pounit):
137 # othercomments = [] # # this is another comment 138 # automaticcomments = [] # #. comment extracted from the source code 139 # sourcecomments = [] # #: sourcefile.xxx:35 140 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 141 # prev_msgid = [] # 142 # prev_msgid_plural = [] # 143 # typecomments = [] # #, fuzzy 144 # msgidcomments = [] # _: within msgid 145 # msgctxt 146 # msgid = [] 147 # msgstr = [] 148 149 # Our homegrown way to indicate what must be copied in a shallow 150 # fashion 151 __shallow__ = ['_store'] 152
153 - def __init__(self, source=None, encoding="UTF-8"):
154 self._encoding = encodingToUse(encoding) 155 self.obsolete = False 156 self._initallcomments(blankall=True) 157 self.prev_msgctxt = [] 158 self.prev_msgid = [] 159 self.prev_msgid_plural = [] 160 self.msgctxt = [] 161 self.msgid = [] 162 self.msgid_pluralcomments = [] 163 self.msgid_plural = [] 164 self.msgstr = [] 165 self.obsoletemsgctxt = [] 166 self.obsoletemsgid = [] 167 self.obsoletemsgid_pluralcomments = [] 168 self.obsoletemsgid_plural = [] 169 self.obsoletemsgstr = [] 170 pocommon.pounit.__init__(self, source)
171
172 - def _initallcomments(self, blankall=False):
173 """Initialises allcomments""" 174 if blankall: 175 self.othercomments = [] 176 self.automaticcomments = [] 177 self.sourcecomments = [] 178 self.typecomments = [] 179 self.msgidcomments = [] 180 self.obsoletemsgidcomments = []
181
182 - def _get_all_comments(self):
183 return [self.othercomments, 184 self.automaticcomments, 185 self.sourcecomments, 186 self.typecomments, 187 self.msgidcomments, 188 self.obsoletemsgidcomments]
189 190 allcomments = property(_get_all_comments) 191
192 - def _get_source_vars(self, msgid, msgid_plural):
193 multi = multistring(unquotefrompo(msgid), self._encoding) 194 if self.hasplural(): 195 pluralform = unquotefrompo(msgid_plural) 196 if isinstance(pluralform, str): 197 pluralform = pluralform.decode(self._encoding) 198 multi.strings.append(pluralform) 199 return multi
200
201 - def _set_source_vars(self, source):
202 msgid = None 203 msgid_plural = None 204 if isinstance(source, str): 205 source = source.decode(self._encoding) 206 if isinstance(source, multistring): 207 source = source.strings 208 if isinstance(source, list): 209 msgid = quoteforpo(source[0]) 210 if len(source) > 1: 211 msgid_plural = quoteforpo(source[1]) 212 else: 213 msgid_plural = [] 214 else: 215 msgid = quoteforpo(source) 216 msgid_plural = [] 217 return msgid, msgid_plural
218
219 - def getsource(self):
220 """Returns the unescaped msgid""" 221 return self._get_source_vars(self.msgid, self.msgid_plural)
222
223 - def setsource(self, source):
224 """Sets the msgid to the given (unescaped) value. 225 226 @param source: an unescaped source string. 227 """ 228 self._rich_source = None 229 self.msgid, self.msgid_plural = self._set_source_vars(source)
230 source = property(getsource, setsource) 231
232 - def _get_prev_source(self):
233 """Returns the unescaped msgid""" 234 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
235
236 - def _set_prev_source(self, source):
237 """Sets the msgid to the given (unescaped) value. 238 239 @param source: an unescaped source string. 240 """ 241 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
242 prev_source = property(_get_prev_source, _set_prev_source) 243
244 - def gettarget(self):
245 """Returns the unescaped msgstr""" 246 if isinstance(self.msgstr, dict): 247 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 248 else: 249 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 250 return multi
251
252 - def settarget(self, target):
253 """Sets the msgstr to the given (unescaped) value""" 254 self._rich_target = None 255 if isinstance(target, str): 256 target = target.decode(self._encoding) 257 if self.hasplural(): 258 if isinstance(target, multistring): 259 target = target.strings 260 elif isinstance(target, basestring): 261 target = [target] 262 elif isinstance(target, (dict, list)): 263 if len(target) == 1: 264 target = target[0] 265 else: 266 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 267 templates = self.msgstr 268 if isinstance(templates, list): 269 templates = {0: templates} 270 if isinstance(target, list): 271 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 272 elif isinstance(target, dict): 273 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 274 else: 275 self.msgstr = quoteforpo(target)
276 target = property(gettarget, settarget) 277
278 - def getalttrans(self):
279 """Return a list of alternate units. 280 281 Previous msgid and current msgstr is combined to form a single 282 alternative unit.""" 283 prev_source = self.prev_source 284 if prev_source and self.isfuzzy(): 285 unit = type(self)(prev_source) 286 unit.target = self.target 287 # Already released versions of Virtaal (0.6.x) only supported XLIFF 288 # alternatives, and expect .xmlelement.get(). 289 # This can be removed soon: 290 unit.xmlelement = dict() 291 return [unit] 292 return []
293
294 - def getnotes(self, origin=None):
295 """Return comments based on origin value (programmer, developer, source code and translator)""" 296 if origin == None: 297 comments = u"".join([comment[2:] for comment in self.othercomments]) 298 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 299 elif origin == "translator": 300 comments = u"".join([comment[2:] for comment in self.othercomments]) 301 elif origin in ["programmer", "developer", "source code"]: 302 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 303 else: 304 raise ValueError("Comment type not valid") 305 # Let's drop the last newline 306 return comments[:-1]
307
308 - def addnote(self, text, origin=None, position="append"):
309 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 310 # ignore empty strings and strings without non-space characters 311 if not (text and text.strip()): 312 return 313 text = data.forceunicode(text) 314 commentlist = self.othercomments 315 linestart = "# " 316 autocomments = False 317 if origin in ["programmer", "developer", "source code"]: 318 autocomments = True 319 commentlist = self.automaticcomments 320 linestart = "#. " 321 text = text.split("\n") 322 newcomments = [linestart + line + "\n" for line in text] 323 if position == "append": 324 newcomments = commentlist + newcomments 325 elif position == "prepend": 326 newcomments = newcomments + commentlist 327 328 if autocomments: 329 self.automaticcomments = newcomments 330 else: 331 self.othercomments = newcomments
332
333 - def removenotes(self):
334 """Remove all the translator's notes (other comments)""" 335 self.othercomments = []
336
337 - def __deepcopy__(self, memo={}):
338 # Make an instance to serve as the copy 339 new_unit = self.__class__() 340 # We'll be testing membership frequently, so make a set from 341 # self.__shallow__ 342 shallow = set(self.__shallow__) 343 # Make deep copies of all members which are not in shallow 344 for key, value in self.__dict__.iteritems(): 345 if key not in shallow: 346 setattr(new_unit, key, copy.deepcopy(value)) 347 # Make shallow copies of all members which are in shallow 348 for key in set(shallow): 349 setattr(new_unit, key, getattr(self, key)) 350 # Mark memo with ourself, so that we won't get deep copied 351 # again 352 memo[id(self)] = self 353 # Return our copied unit 354 return new_unit
355
356 - def copy(self):
357 return copy.deepcopy(self)
358
359 - def _msgidlen(self):
360 if self.hasplural(): 361 return len(unquotefrompo(self.msgid)) + len(unquotefrompo(self.msgid_plural)) 362 else: 363 return len(unquotefrompo(self.msgid))
364
365 - def _msgstrlen(self):
366 if isinstance(self.msgstr, dict): 367 combinedstr = "\n".join([unquotefrompo(msgstr) for msgstr in self.msgstr.itervalues()]) 368 return len(combinedstr) 369 else: 370 return len(unquotefrompo(self.msgstr))
371
372 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
373 """Merges the otherpo (with the same msgid) into this one. 374 375 Overwrite non-blank self.msgstr only if overwrite is True 376 merge comments only if comments is True 377 """ 378 379 def mergelists(list1, list2, split=False): 380 #decode where necessary 381 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 382 for position, item in enumerate(list1): 383 if isinstance(item, str): 384 list1[position] = item.decode("utf-8") 385 for position, item in enumerate(list2): 386 if isinstance(item, str): 387 list2[position] = item.decode("utf-8") 388 389 #Determine the newline style of list1 390 lineend = "" 391 if list1 and list1[0]: 392 for candidate in ["\n", "\r", "\n\r"]: 393 if list1[0].endswith(candidate): 394 lineend = candidate 395 if not lineend: 396 lineend = "" 397 else: 398 lineend = "\n" 399 400 #Split if directed to do so: 401 if split: 402 splitlist1 = [] 403 splitlist2 = [] 404 prefix = "#" 405 for item in list1: 406 splitlist1.extend(item.split()[1:]) 407 prefix = item.split()[0] 408 for item in list2: 409 splitlist2.extend(item.split()[1:]) 410 prefix = item.split()[0] 411 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 412 else: 413 #Normal merge, but conform to list1 newline style 414 if list1 != list2: 415 for item in list2: 416 if lineend: 417 item = item.rstrip() + lineend 418 # avoid duplicate comment lines (this might cause some problems) 419 if item not in list1 or len(item) < 5: 420 list1.append(item)
421 if not isinstance(otherpo, pounit): 422 super(pounit, self).merge(otherpo, overwrite, comments) 423 return 424 if comments: 425 mergelists(self.othercomments, otherpo.othercomments) 426 mergelists(self.typecomments, otherpo.typecomments) 427 if not authoritative: 428 # We don't bring across otherpo.automaticcomments as we consider ourself 429 # to be the the authority. Same applies to otherpo.msgidcomments 430 mergelists(self.automaticcomments, otherpo.automaticcomments) 431 mergelists(self.msgidcomments, otherpo.msgidcomments) 432 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 433 if not self.istranslated() or overwrite: 434 # Remove kde-style comments from the translation (if any). 435 if self._extract_msgidcomments(otherpo.target): 436 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '') 437 self.target = otherpo.target 438 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 439 self.markfuzzy() 440 else: 441 self.markfuzzy(otherpo.isfuzzy()) 442 elif not otherpo.istranslated(): 443 if self.source != otherpo.source: 444 self.markfuzzy() 445 else: 446 if self.target != otherpo.target: 447 self.markfuzzy()
448
449 - def isheader(self):
450 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 451 #rewritten here for performance: 452 return (is_null(self.msgid) 453 and not is_null(self.msgstr) 454 and self.msgidcomments == [] 455 and is_null(self.msgctxt))
456
457 - def isblank(self):
458 if self.isheader() or len(self.msgidcomments): 459 return False 460 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 461 return True 462 return False
463 # TODO: remove: 464 # Before, the equivalent of the following was the final return statement: 465 # return len(self.source.strip()) == 0 466
467 - def hastypecomment(self, typecomment):
468 """Check whether the given type comment is present""" 469 # check for word boundaries properly by using a regular expression... 470 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
471
472 - def hasmarkedcomment(self, commentmarker):
473 """Check whether the given comment marker is present as # (commentmarker) ...""" 474 commentmarker = "(%s)" % commentmarker 475 for comment in self.othercomments: 476 if comment.replace("#", "", 1).strip().startswith(commentmarker): 477 return True 478 return False
479
480 - def settypecomment(self, typecomment, present=True):
481 """Alters whether a given typecomment is present""" 482 if self.hastypecomment(typecomment) != present: 483 if present: 484 if len(self.typecomments): 485 # There is already a comment, so we have to add onto it 486 self.typecomments[0] = "%s, %s\n" % (self.typecomments[0][:-1], typecomment) 487 else: 488 self.typecomments.append("#, %s\n" % typecomment) 489 else: 490 # this should handle word boundaries properly ... 491 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 492 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
493
494 - def isfuzzy(self):
495 state_isfuzzy = self.STATE[self.S_FUZZY][0] <= self.get_state_n() < self.STATE[self.S_FUZZY][1] 496 if self.hastypecomment('fuzzy') != state_isfuzzy: 497 raise ValueError('Inconsistent fuzzy state') 498 return super(pounit, self).isfuzzy()
499
500 - def markfuzzy(self, present=True):
501 if present: 502 self.set_state_n(self.STATE[self.S_FUZZY][0]) 503 elif self.hasplural() and not self._msgstrlen() or is_null(self.msgstr): 504 self.set_state_n(self.STATE[self.S_UNTRANSLATED][0]) 505 else: 506 self.set_state_n(self.STATE[self.S_TRANSLATED][0])
507
508 - def _domarkfuzzy(self, present=True):
509 self.settypecomment("fuzzy", present)
510
511 - def infer_state(self):
512 if self.obsolete: 513 self.makeobsolete() 514 else: 515 self.markfuzzy(self.hastypecomment('fuzzy'))
516
517 - def isobsolete(self):
518 return self.obsolete
519
520 - def makeobsolete(self):
521 """Makes this unit obsolete""" 522 self.obsolete = True 523 if self.msgctxt: 524 self.obsoletemsgctxt = self.msgctxt 525 if self.msgid: 526 self.obsoletemsgid = self.msgid 527 self.msgid = [] 528 if self.msgidcomments: 529 self.obsoletemsgidcomments = self.msgidcomments 530 self.msgidcomments = [] 531 if self.msgid_plural: 532 self.obsoletemsgid_plural = self.msgid_plural 533 self.msgid_plural = [] 534 if self.msgstr: 535 self.obsoletemsgstr = self.msgstr 536 self.msgstr = [] 537 self.sourcecomments = [] 538 self.automaticcomments = []
539
540 - def resurrect(self):
541 """Makes an obsolete unit normal""" 542 self.obsolete = False 543 if self.obsoletemsgctxt: 544 self.msgid = self.obsoletemsgctxt 545 self.obsoletemsgctxt = [] 546 if self.obsoletemsgid: 547 self.msgid = self.obsoletemsgid 548 self.obsoletemsgid = [] 549 if self.obsoletemsgidcomments: 550 self.msgidcomments = self.obsoletemsgidcomments 551 self.obsoletemsgidcomments = [] 552 if self.obsoletemsgid_plural: 553 self.msgid_plural = self.obsoletemsgid_plural 554 self.obsoletemsgid_plural = [] 555 if self.obsoletemsgstr: 556 self.msgstr = self.obsoletemsgstr 557 self.obsoletemgstr = []
558
559 - def hasplural(self):
560 """returns whether this pounit contains plural strings...""" 561 return len(self.msgid_plural) > 0
562
563 - def parse(self, src):
564 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
565
566 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
567 if isinstance(partlines, dict): 568 partkeys = partlines.keys() 569 partkeys.sort() 570 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 571 partstr = partname + " " 572 partstartline = 0 573 if len(partlines) > 0 and len(partcomments) == 0: 574 partstr += partlines[0] 575 partstartline = 1 576 elif len(partcomments) > 0: 577 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 578 # if there is a blank leader line, it must come before the comment 579 partstr += partlines[0] + '\n' 580 # but if the whole string is blank, leave it in 581 if len(partlines) > 1: 582 partstartline += 1 583 else: 584 # All partcomments should start on a newline 585 partstr += '""\n' 586 # combine comments into one if more than one 587 if len(partcomments) > 1: 588 combinedcomment = [] 589 for comment in partcomments: 590 comment = unquotefrompo([comment]) 591 if comment.startswith("_:"): 592 comment = comment[len("_:"):] 593 if comment.endswith("\\n"): 594 comment = comment[:-len("\\n")] 595 #Before we used to strip. Necessary in some cases? 596 combinedcomment.append(comment) 597 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 598 # comments first, no blank leader line needed 599 partstr += "\n".join(partcomments) 600 partstr = quote.rstripeol(partstr) 601 else: 602 partstr += '""' 603 partstr += '\n' 604 # add the rest 605 for partline in partlines[partstartline:]: 606 partstr += partline + '\n' 607 return partstr
608
609 - def _encodeifneccessary(self, output):
610 """encodes unicode strings and returns other strings unchanged""" 611 if isinstance(output, unicode): 612 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8")) 613 return output.encode(encoding) 614 return output
615
616 - def __str__(self):
617 """convert to a string. double check that unicode is handled somehow here""" 618 output = self._getoutput() 619 return self._encodeifneccessary(output)
620
621 - def _getoutput(self):
622 """return this po element as a string""" 623 624 def add_prev_msgid_lines(lines, prefix, header, var): 625 if len(var) > 0: 626 lines.append("%s %s %s\n" % (prefix, header, var[0])) 627 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
628 629 def add_prev_msgid_info(lines, prefix): 630 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt) 631 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid) 632 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural) 633 634 lines = [] 635 lines.extend(self.othercomments) 636 if self.isobsolete(): 637 lines.extend(self.typecomments) 638 obsoletelines = [] 639 add_prev_msgid_info(obsoletelines, prefix="#~|") 640 if self.obsoletemsgctxt: 641 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 642 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 643 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 644 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 645 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 646 for index, obsoleteline in enumerate(obsoletelines): 647 # We need to account for a multiline msgid or msgstr here 648 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 649 lines.extend(obsoletelines) 650 return u"".join(lines) 651 # if there's no msgid don't do msgid and string, unless we're the header 652 # this will also discard any comments other than plain othercomments... 653 if is_null(self.msgid): 654 if not (self.isheader() or self.getcontext() or self.sourcecomments): 655 return u"".join(lines) 656 lines.extend(self.automaticcomments) 657 lines.extend(self.sourcecomments) 658 lines.extend(self.typecomments) 659 add_prev_msgid_info(lines, prefix="#|") 660 if self.msgctxt: 661 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt)) 662 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments)) 663 if self.msgid_plural or self.msgid_pluralcomments: 664 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 665 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr)) 666 postr = u"".join(lines) 667 return postr 668
669 - def getlocations(self):
670 """Get a list of locations from sourcecomments in the PO unit 671 672 rtype: List 673 return: A list of the locations with '#: ' stripped 674 675 """ 676 locations = [] 677 for sourcecomment in self.sourcecomments: 678 locations += quote.rstripeol(sourcecomment)[3:].split() 679 for i, loc in enumerate(locations): 680 locations[i] = urllib.unquote_plus(loc) 681 return locations
682
683 - def addlocation(self, location):
684 """Add a location to sourcecomments in the PO unit 685 686 @param location: Text location e.g. 'file.c:23' does not include #: 687 @type location: String 688 689 """ 690 if location.find(" ") != -1: 691 location = urllib.quote_plus(location) 692 self.sourcecomments.append("#: %s\n" % location)
693
694 - def _extract_msgidcomments(self, text=None):
695 """Extract KDE style msgid comments from the unit. 696 697 @rtype: String 698 @return: Returns the extracted msgidcomments found in this unit's msgid. 699 """ 700 701 if not text: 702 text = unquotefrompo(self.msgidcomments) 703 return text.split('\n')[0].replace('_: ', '', 1)
704
705 - def setmsgidcomment(self, msgidcomment):
706 if msgidcomment: 707 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 708 else: 709 self.msgidcomments = []
710 711 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 712
713 - def getcontext(self):
714 """Get the message context.""" 715 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
716
717 - def setcontext(self, context):
718 context = data.forceunicode(context) 719 self.msgctxt = quoteforpo(context)
720
721 - def getid(self):
722 """Returns a unique identifier for this unit.""" 723 context = self.getcontext() 724 # Gettext does not consider the plural to determine duplicates, only 725 # the msgid. For generation of .mo files, we might want to use this 726 # code to generate the entry for the hash table, but for now, it is 727 # commented out for conformance to gettext. 728 # id = '\0'.join(self.source.strings) 729 id = self.source 730 if self.msgidcomments: 731 id = u"_: %s\n%s" % (context, id) 732 elif context: 733 id = u"%s\04%s" % (context, id) 734 return id
735 736
737 -class pofile(pocommon.pofile):
738 """A .po file containing various units""" 739 UnitClass = pounit 740
741 - def parse(self, input):
742 """Parses the given file or file source string.""" 743 if True: 744 # try: 745 if hasattr(input, 'name'): 746 self.filename = input.name 747 elif not getattr(self, 'filename', ''): 748 self.filename = '' 749 if isinstance(input, str): 750 input = cStringIO.StringIO(input) 751 # clear units to get rid of automatically generated headers before parsing 752 self.units = [] 753 poparser.parse_units(poparser.ParseState(input, pounit), self)
754 # except Exception, e: 755 # raise base.ParseError(e) 756
757 - def removeduplicates(self, duplicatestyle="merge"):
758 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 759 # TODO: can we handle consecutive calls to removeduplicates()? What 760 # about files already containing msgctxt? - test 761 id_dict = {} 762 uniqueunits = [] 763 # TODO: this is using a list as the pos aren't hashable, but this is slow. 764 # probably not used frequently enough to worry about it, though. 765 markedpos = [] 766 767 def addcomment(thepo): 768 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 769 markedpos.append(thepo)
770 for thepo in self.units: 771 id = thepo.getid() 772 if thepo.isheader() and not thepo.getlocations(): 773 # header msgids shouldn't be merged... 774 uniqueunits.append(thepo) 775 elif id in id_dict: 776 if duplicatestyle == "merge": 777 if id: 778 id_dict[id].merge(thepo) 779 else: 780 addcomment(thepo) 781 uniqueunits.append(thepo) 782 elif duplicatestyle == "msgctxt": 783 origpo = id_dict[id] 784 if origpo not in markedpos: 785 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 786 markedpos.append(thepo) 787 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 788 uniqueunits.append(thepo) 789 else: 790 if not id: 791 if duplicatestyle == "merge": 792 addcomment(thepo) 793 else: 794 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 795 id_dict[id] = thepo 796 uniqueunits.append(thepo) 797 self.units = uniqueunits
798
799 - def __str__(self):
800 """Convert to a string. double check that unicode is handled somehow here""" 801 output = self._getoutput() 802 if isinstance(output, unicode): 803 try: 804 return output.encode(getattr(self, "_encoding", "UTF-8")) 805 except UnicodeEncodeError, e: 806 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8") 807 self._encoding = "UTF-8" 808 for unit in self.units: 809 unit._encoding = "UTF-8" 810 return self._getoutput().encode("UTF-8") 811 812 return output
813
814 - def _getoutput(self):
815 """convert the units back to lines""" 816 lines = [] 817 for unit in self.units: 818 unitsrc = unit._getoutput() + u"\n" 819 lines.append(unitsrc) 820 lines = u"".join(lines).rstrip() 821 #After the last pounit we will have \n\n and we only want to end in \n: 822 if lines: 823 lines += u"\n" 824 return lines
825
826 - def encode(self, lines):
827 """encode any unicode strings in lines in self._encoding""" 828 newlines = [] 829 encoding = self._encoding 830 if encoding is None or encoding.lower() == "charset": 831 encoding = 'UTF-8' 832 for line in lines: 833 if isinstance(line, unicode): 834 line = line.encode(encoding) 835 newlines.append(line) 836 return newlines
837
838 - def decode(self, lines):
839 """decode any non-unicode strings in lines with self._encoding""" 840 newlines = [] 841 for line in lines: 842 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 843 try: 844 line = line.decode(self._encoding) 845 except UnicodeError, e: 846 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 847 newlines.append(line) 848 return newlines
849
850 - def unit_iter(self):
851 for unit in self.units: 852 if not (unit.isheader() or unit.isobsolete()): 853 yield unit
854