1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 Classes that hold units of .oo files (oounit) or entire files (oofile).
24
25 These are specific .oo files for localisation exported by OpenOffice.org - SDF
26 format (previously knows as GSI files). For an overview of the format, see
27 U{http://l10n.openoffice.org/L10N_Framework/Intermediate_file_format.html}
28
29 The behaviour in terms of escaping is explained in detail in the programming
30 comments.
31 """
32
33
34 import os
35 import re
36 import warnings
37
38 from translate.misc import quote
39 from translate.misc import wStringIO
40
41
42
43 normalfilenamechars = "/#.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
44 normalizetable = ""
45 for i in map(chr, range(256)):
46 if i in normalfilenamechars:
47 normalizetable += i
48 else:
49 normalizetable += "_"
50
51
53
55 self.normalchars = {}
56 for char in normalchars:
57 self.normalchars[ord(char)] = char
58
60 return self.normalchars.get(key, u"_")
61
62 unormalizetable = unormalizechar(normalfilenamechars.decode("ascii"))
63
64
71
72
74 """converts an oo key tuple into a unique identifier
75
76 @param ookey: an oo key
77 @type ookey: tuple
78 @param long_keys: Use long keys
79 @type long_keys: Boolean
80 @rtype: str
81 @return: unique ascii identifier
82 """
83 project, sourcefile, resourcetype, groupid, localid, platform = ookey
84 sourcefile = sourcefile.replace('\\', '/')
85 if long_keys:
86 sourcebase = os.path.join(project, sourcefile)
87 else:
88 sourceparts = sourcefile.split('/')
89 sourcebase = "".join(sourceparts[-1:])
90 if len(groupid) == 0 or len(localid) == 0:
91 fullid = groupid + localid
92 else:
93 fullid = groupid + "." + localid
94 if resourcetype:
95 fullid = fullid + "." + resourcetype
96 key = "%s#%s" % (sourcebase, fullid)
97 return normalizefilename(key)
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120 -def escape_text(text):
121 """Escapes SDF text to be suitable for unit consumption."""
122 return text.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
123
124
125 -def unescape_text(text):
126 """Unescapes SDF text to be suitable for unit consumption."""
127 return text.replace("\\\\", "\a").replace("\\n", "\n").replace("\\t", "\t").\
128 replace("\\r", "\r").replace("\a", "\\\\")
129
130 helptagre = re.compile('''<[/]??[a-z_\-]+?(?:| +[a-z]+?=".*?") *[/]??>''')
131
132
134 """Escapes the help text as it would be in an SDF file.
135
136 <, >, " are only escaped in <[[:lower:]]> tags. Some HTML tags make it in in
137 lowercase so those are dealt with. Some OpenOffice.org help tags are not
138 escaped.
139 """
140 text = text.replace("\\", "\\\\")
141 for tag in helptagre.findall(text):
142 escapethistag = False
143 for escape_tag in ["ahelp", "link", "item", "emph", "defaultinline", "switchinline", "caseinline", "variable", "bookmark_value", "image", "embedvar", "alt"]:
144 if tag.startswith("<%s" % escape_tag) or tag == "</%s>" % escape_tag:
145 escapethistag = True
146 if tag in ["<br/>", "<help-id-missing/>"]:
147 escapethistag = True
148 if escapethistag:
149 escaped_tag = ("\\<" + tag[1:-1] + "\\>").replace('"', '\\"')
150 text = text.replace(tag, escaped_tag)
151 return text
152
153
155 """Unescapes normal text to be suitable for writing to the SDF file."""
156 return text.replace(r"\<", "<").replace(r"\>", ">").replace(r'\"', '"').replace(r"\\", "\\")
157
158
160 """Encode a Unicode string the the specified encoding"""
161 if isinstance(text, unicode):
162 return text.encode('UTF-8')
163 return text
164
165
167 """this represents one line, one translation in an .oo file"""
168
170 """construct an ooline from its parts"""
171 if parts is None:
172 self.project, self.sourcefile, self.dummy, self.resourcetype, \
173 self.groupid, self.localid, self.helpid, self.platform, \
174 self.width, self.languageid, self.text, self.helptext, \
175 self.quickhelptext, self.title, self.timestamp = [""] * 15
176 else:
177 self.setparts(parts)
178
180 """create a line from its tab-delimited parts"""
181 if len(parts) != 15:
182 warnings.warn("oo line contains %d parts, it should contain 15: %r" % \
183 (len(parts), parts))
184 newparts = list(parts)
185 if len(newparts) < 15:
186 newparts = newparts + [""] * (15-len(newparts))
187 else:
188 newparts = newparts[:15]
189 parts = tuple(newparts)
190 self.project, self.sourcefile, self.dummy, self.resourcetype, \
191 self.groupid, self.localid, self.helpid, self.platform, \
192 self.width, self.languageid, self._text, self.helptext, \
193 self.quickhelptext, self.title, self.timestamp = parts
194
196 """return a list of parts in this line"""
197 return (self.project, self.sourcefile, self.dummy, self.resourcetype,
198 self.groupid, self.localid, self.helpid, self.platform,
199 self.width, self.languageid, self._text, self.helptext,
200 self.quickhelptext, self.title, self.timestamp)
201
203 """Obtains the text column and handle escaping."""
204 if self.sourcefile.endswith(".xhp"):
205 return unescape_help_text(self._text)
206 else:
207 return unescape_text(self._text)
208
209 - def settext(self, text):
210 """Sets the text column and handle escaping."""
211 if self.sourcefile.endswith(".xhp"):
212 self._text = escape_help_text(text)
213 else:
214 self._text = escape_text(text)
215 text = property(gettext, settext)
216
220
222 """return a line in tab-delimited form"""
223 parts = self.getparts()
224 return "\t".join(parts)
225
227 """get the key that identifies the resource"""
228 return (self.project, self.sourcefile, self.resourcetype, self.groupid,
229 self.localid, self.platform)
230
231
233 """this represents a number of translations of a resource"""
234
236 """construct the oounit"""
237 self.languages = {}
238 self.lines = []
239
241 """add a line to the oounit"""
242 self.languages[line.languageid] = line
243 self.lines.append(line)
244
248
250 """return the lines in tab-delimited form"""
251 return "\r\n".join([str(line) for line in self.lines])
252
253
255 """this represents an entire .oo file"""
256 UnitClass = oounit
257
259 """constructs the oofile"""
260 self.oolines = []
261 self.units = []
262 self.ookeys = {}
263 self.filename = ""
264 self.languages = []
265 if input is not None:
266 self.parse(input)
267
269 """adds a parsed line to the file"""
270 key = thisline.getkey()
271 element = self.ookeys.get(key, None)
272 if element is None:
273 element = self.UnitClass()
274 self.units.append(element)
275 self.ookeys[key] = element
276 element.addline(thisline)
277 self.oolines.append(thisline)
278 if thisline.languageid not in self.languages:
279 self.languages.append(thisline.languageid)
280
282 """parses lines and adds them to the file"""
283 if not self.filename:
284 self.filename = getattr(input, 'name', '')
285 if hasattr(input, "read"):
286 src = input.read()
287 input.close()
288 else:
289 src = input
290 for line in src.split("\n"):
291 line = quote.rstripeol(line)
292 if not line:
293 continue
294 parts = line.split("\t")
295 thisline = ooline(parts)
296 self.addline(thisline)
297
301
303 """converts all the lines back to tab-delimited form"""
304 lines = []
305 for oe in self.units:
306 if len(oe.lines) > 2:
307 warnings.warn("contains %d lines (should be 2 at most): languages %r" % (len(oe.lines), oe.languages))
308 oekeys = [line.getkey() for line in oe.lines]
309 warnings.warn("contains %d lines (should be 2 at most): keys %r" % (len(oe.lines), oekeys))
310 oeline = str(oe) + "\r\n"
311 lines.append(oeline)
312 return "".join(lines)
313
314
316 """this takes a huge GSI file and represents it as multiple smaller files..."""
317
318 - def __init__(self, filename, mode=None, multifilestyle="single"):
319 """initialises oomultifile from a seekable inputfile or writable outputfile"""
320 self.filename = filename
321 if mode is None:
322 if os.path.exists(filename):
323 mode = 'r'
324 else:
325 mode = 'w'
326 self.mode = mode
327 self.multifilestyle = multifilestyle
328 self.multifilename = os.path.splitext(filename)[0]
329 self.multifile = open(filename, mode)
330 self.subfilelines = {}
331 if mode == "r":
332 self.createsubfileindex()
333
335 """reads in all the lines and works out the subfiles"""
336 linenum = 0
337 for line in self.multifile:
338 subfile = self.getsubfilename(line)
339 if not subfile in self.subfilelines:
340 self.subfilelines[subfile] = []
341 self.subfilelines[subfile].append(linenum)
342 linenum += 1
343
345 """looks up the subfile name for the line"""
346 if line.count("\t") < 2:
347 raise ValueError("invalid tab-delimited line: %r" % line)
348 lineparts = line.split("\t", 2)
349 module, filename = lineparts[0], lineparts[1]
350 if self.multifilestyle == "onefile":
351 ooname = self.multifilename
352 elif self.multifilestyle == "toplevel":
353 ooname = module
354 else:
355 filename = filename.replace("\\", "/")
356 fileparts = [module] + filename.split("/")
357 ooname = os.path.join(*fileparts[:-1])
358 return ooname + os.extsep + "oo"
359
361 """returns a list of subfiles in the file"""
362 return self.subfilelines.keys()
363
365 """iterates through the subfile names"""
366 for subfile in self.listsubfiles():
367 yield subfile
368
370 """checks if this pathname is a valid subfile"""
371 return pathname in self.subfilelines
372
374 """returns the list of lines matching the subfile"""
375 lines = []
376 requiredlines = dict.fromkeys(self.subfilelines[subfile])
377 linenum = 0
378 self.multifile.seek(0)
379 for line in self.multifile:
380 if linenum in requiredlines:
381 lines.append(line)
382 linenum += 1
383 return "".join(lines)
384
391
393 """returns a pseudo-file object for the given subfile"""
394
395 def onclose(contents):
396 self.multifile.write(contents)
397 self.multifile.flush()
398 outputfile = wStringIO.CatchStringOutput(onclose)
399 outputfile.filename = subfile
400 return outputfile
401
403 """returns an oofile built up from the given subfile's lines"""
404 subfilesrc = self.getsubfilesrc(subfile)
405 oosubfile = oofile()
406 oosubfile.filename = subfile
407 oosubfile.parse(subfilesrc)
408 return oosubfile
409