Package gavo :: Package utils :: Module texttricks
Source Code for Module gavo.utils.texttricks

  1  """ 
  2  Formatting, text manipulation, string constants, and such. 
  3  """ 
  4   
  5  #c Copyright 2008-2019, the GAVO project 
  6  #c 
  7  #c This program is free software, covered by the GNU GPL.  See the 
  8  #c COPYING file in the source distribution. 
  9   
 10   
 11  import datetime 
 12  import math 
 13  import os 
 14  import random 
 15  import re 
 16  import string 
 17  import time 
 18  from email import utils as emailutils 
 19   
 20  from gavo.utils import codetricks 
 21  from gavo.utils import misctricks 
 22  from gavo.utils.excs import Error, SourceParseError 
 23   
 24  floatRE = r"[+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?" 
 25  dateRE = re.compile("\d\d\d\d-\d\d-\d\d$") 
 26  datetimeRE = re.compile("\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ?$") 
 27  identifierPattern = re.compile("[A-Za-z_][A-Za-z0-9_]*$") 
 28  isoTimestampFmt = "%Y-%m-%dT%H:%M:%SZ" 
 29  isoTimestampFmtNoTZ = "%Y-%m-%dT%H:%M:%S" 
 30  entityrefPat = re.compile("&([^;])+;") 
 31  looksLikeURLPat = re.compile("[a-z]{2,5}://") 
 32   
 33   
 34  # file names that don't cause headaches in URLs and are otherwise reasonable 
 35  # benign (so, let's disallow shell metachars while we're at it). 
 36  _SAFE_FILENAME = re.compile("[,-:=@-Z_a-z{}~-]+$") 
 37   
 38  xmlEntities = { 
 39                  'lt': '<', 
 40                  'gt': '>', 
 41                  'amp': '&', 
 42                  'apos': "'", 
 43                  'quot': '"', 
 44  } 
 45   
 46   
 47 -def formatSize(val, sf=1): 
 48          """returns a human-friendly representation of a file size. 
 49          """ 
 50          if val<1e3: 
 51                  return "%d Bytes"%int(val) 
 52          elif val<1e6: 
 53                  return "%.*fkiB"%(sf, val/1024.) 
 54          elif val<1e9: 
 55                  return "%.*fMiB"%(sf, val/1024./1024.) 
 56          else: 
 57                  return "%.*fGiB"%(sf, val/1024./1024./1024) 
 58   
 59   
 60 -def makeEllipsis(aStr, maxLen=60): 
 61          """returns aStr cropped to maxLen if necessary. 
 62   
 63          Cropped strings are returned with an ellipsis marker. 
 64          """ 
 65          if len(aStr)>maxLen: 
 66                  return aStr[:maxLen-3]+"..." 
 67          return aStr 
 68   
 69   
 70 -def makeLeftEllipsis(aStr, maxLen=60): 
 71          """returns aStr shortened to maxLen by dropping prefixes if necessary. 
 72   
 73          Cropped strings are returned with an ellipsis marker. 
 74          >>> makeLeftEllipsis("0123456789"*2, 11) 
 75          '...23456789' 
 76          """ 
 77          if len(aStr)>maxLen: 
 78                  return "..."+aStr[-maxLen+3:] 
 79          return aStr 
 80   
 81   
 82 -def makeSourceEllipsis(sourceToken): 
 83          """returns a string hopefully representative for a source token. 
 84   
 85          These are, in particular, passed around withing rsc.makeData.  Usually, 
 86          these are (potentially long) strings, but now and then they can be 
 87          other things with appallingly long reprs.  When DaCHS messages 
 88          need to refer to such sources, this function is used to come up 
 89          with representative strings. 
 90          """ 
 91          if isinstance(sourceToken, basestring): 
 92                  return makeLeftEllipsis(sourceToken) 
 93          else: 
 94                  return makeEllipsis(repr(sourceToken), maxLen=160) 
 95   
 96   
 97  @codetricks.document 
 98 -def getFileStem(fPath): 
 99          """returns the file stem of a file path. 
100   
101          The base name is what remains if you take the base name and split off 
102          extensions.  The extension here starts with the last dot in the file name, 
103          except up to one of some common compression extensions (.gz, .xz, .bz2,  
104          .Z, .z) is stripped off the end if present before determining the extension. 
105   
106          >>> getFileStem("/foo/bar/baz.x.y") 
107          'baz.x' 
108          >>> getFileStem("/foo/bar/baz.x.gz") 
109          'baz' 
110          >>> getFileStem("/foo/bar/baz") 
111          'baz' 
112          """ 
113          for ext in [".gz", ".xz", ".bz2", ".Z", ".z"]: 
114                  if fPath.endswith(ext): 
115                          fPath = fPath[:-len(ext)] 
116                          break 
117          return os.path.splitext(os.path.basename(fPath))[0] 
118           
119   
120 -def formatSimpleTable(data, stringify=True, titles=None): 
121          """returns a string containing a text representation of tabular data. 
122   
123          All columns of data are simply stringified, then the longest member 
124          determines the width of the text column.  The behaviour if data 
125          does not contain rows of equal length is unspecified; data must 
126          contain at least one row. 
127   
128          If you have serialised the values in data yourself, pass stringify=False. 
129   
130          If you pass titles, it must be a sequence of strings; they are then 
131          used as table headers; the shorter of data[0] and titles will determine 
132          the number fo columns displayed. 
133          """ 
134          if stringify: 
135                  data = [[str(v) for v in row] for row in data] 
136   
137          if not data: 
138                  return "" 
139   
140          colWidthes = [max(len(row[colInd]) for row in data) 
141                  for colInd in range(len(data[0]))] 
142          if titles is not None: 
143                  colWidthes = [max(len(t), l) for t, l in zip(titles, colWidthes)] 
144           
145          fmtStr = "  ".join("%%%ds"%w for w in colWidthes) 
146          table = "\n".join(fmtStr%tuple(row) for row in data) 
147          if titles is not None: 
148                  table = fmtStr%tuple(titles)+"\n\n"+table 
149          return table 
150   
151   
152  @codetricks.document 
153 -def getRelativePath(fullPath, rootPath, liberalChars=True): 
154          """returns rest if fullPath has the form rootPath/rest and raises an 
155          exception otherwise. 
156   
157          Pass ``liberalChars=False`` to make this raise a ValueError when 
158          URL-dangerous characters (blanks, amperands, pluses, non-ASCII, and  
159          similar) are present in the result.  This is mainly for products. 
160          """ 
161          if not fullPath.startswith(rootPath): 
162                  raise ValueError( 
163                          "Full path %s does not start with resource root %s"%(fullPath, rootPath)) 
164          res = fullPath[len(rootPath):].lstrip("/") 
165          if not liberalChars and not _SAFE_FILENAME.match(res): 
166                  raise ValueError("File path '%s' contains characters known to" 
167                          " the DaCHS authors to be hazardous in URLs.  Please defuse the name" 
168                          " before using it for published names (or see howDoI)."%res) 
169          return res 
170   
171   
172 -def resolvePath(rootPath, relPath): 
173          """joins relPath to rootPath and makes sure the result really is 
174          in rootPath. 
175          """ 
176          relPath = relPath.lstrip("/") 
177          fullPath = os.path.realpath(os.path.join(rootPath, relPath)) 
178          if not fullPath.startswith(rootPath): 
179                  raise ValueError( 
180                          "Full path %s does not start with resource root %s"%(fullPath, rootPath)) 
181          if not os.path.exists(fullPath): 
182                  raise ValueError( 
183                          "Invalid path %s. This should not happend."%(fullPath)) 
184          return fullPath 
185   
186   
187 -def fixIndentation(code, newIndent, governingLine=0): 
188          """returns code with all whitespace from governingLine removed from 
189          every line and newIndent prepended to every line. 
190   
191          governingLine lets you select a line different from the first one 
192          for the determination of the leading white space.  Lines before that 
193          line are left alone. 
194   
195          >>> fixIndentation("  foo\\n  bar", "") 
196          'foo\\nbar' 
197          >>> fixIndentation("  foo\\n   bar", " ") 
198          ' foo\\n  bar' 
199          >>> fixIndentation("  foo\\n   bar\\n    baz", "", 1) 
200          'foo\\nbar\\n baz' 
201          >>> fixIndentation("  foo\\nbar", "") 
202          Traceback (most recent call last): 
203          Error: Bad indent in line 'bar' 
204          """ 
205          codeLines = [line for line in code.split("\n")] 
206          reserved, codeLines = codeLines[:governingLine], codeLines[governingLine:] 
207          while codeLines: 
208                  if codeLines[0].strip(): 
209                          firstIndent = re.match("^\s*", codeLines[0]).group() 
210                          break 
211                  else: 
212                          reserved.append(codeLines.pop(0)) 
213          if codeLines: 
214                  fixedLines = [] 
215                  for line in codeLines: 
216                          if not line.strip(): 
217                                  fixedLines.append(newIndent) 
218                          else: 
219                                  if line[:len(firstIndent)]!=firstIndent: 
220                                          raise Error("Bad indent in line %s"%repr(line)) 
221                                  fixedLines.append(newIndent+line[len(firstIndent):]) 
222          else: 
223                  fixedLines = codeLines 
224          reserved = [newIndent+l.lstrip() for l in reserved] 
225          return "\n".join(reserved+fixedLines) 
226   
227   
228  @codetricks.memoized 
229 -def _getREForPercentExpression(format): 
230          """helps parsePercentExpression. 
231          """ 
232          parts = re.split(r"(%\w)", format) 
233          newReParts = [] 
234          for ind, p in enumerate(parts): 
235                  if p.startswith("%"): 
236                          # the time-parsing hack explained in the docstring: 
237                          if ind+2<len(parts) and parts[ind+1]=="": 
238                                  if p[1] in "HMS": 
239                                          newReParts.append("(?P<%s>..)"%p[1]) 
240                                  else: 
241                                          raise ValueError( 
242                                                  "At %s: conversions with no intervening literal not supported."% p) 
243                          else: 
244                                  newReParts.append("(?P<%s>.*?)"%p[1]) 
245                  else: 
246                          newReParts.append(re.escape(p)) 
247          return re.compile("".join(newReParts)+"$") 
248   
249   
250 -def parsePercentExpression(literal, format): 
251          """returns a dictionary of parts in the %-template format. 
252   
253          format is a template with %<conv> conversions, no modifiers are 
254          allowed.  Each conversion is allowed to contain zero or more characters 
255          matched stingily.  Successive conversions without intervening literals 
256          aren't really supported.  There's a hack for strptime-type times, though: 
257          H, M, and S just eat two characters each if there's no seperator. 
258           
259          This is really only meant as a quick hack to support times like 25:33. 
260   
261          >>> r=parsePercentExpression("12,xy:33,","%a:%b,%c"); r["a"], r["b"], r["c"] 
262          ('12,xy', '33', '') 
263          >>> sorted(parsePercentExpression("2357-x", "%H%M-%u").items()) 
264          [('H', '23'), ('M', '57'), ('u', 'x')] 
265          >>> r = parsePercentExpression("12,13,14", "%a:%b,%c") 
266          Traceback (most recent call last): 
267          ValueError: '12,13,14' cannot be parsed using format '%a:%b,%c' 
268          """ 
269          mat = _getREForPercentExpression(format).match(literal) 
270          if not mat: 
271                  raise ValueError("'%s' cannot be parsed using format '%s'"%( 
272                          literal, format)) 
273          return mat.groupdict() 
274   
275   
276 -def parseAssignments(assignments): 
277          """returns a name mapping dictionary from a list of assignments. 
278   
279          This is the preferred form of communicating a mapping from external names 
280          to field names in records to macros -- in a string that contains 
281          ":"-seprated pairs seperated by whitespace, like "a:b  b:c", where 
282          the incoming names are leading, the desired names are trailing. 
283   
284          If you need defaults to kick in when the incoming data is None, try 
285          _parseDestWithDefault in the client function. 
286   
287          This function parses a dictionary mapping original names to desired names. 
288   
289          >>> parseAssignments("a:b  b:c") 
290          {'a': 'b', 'b': 'c'} 
291          """ 
292          return dict([(lead, trail) for lead, trail in 
293                  [litPair.split(":") for litPair in assignments.split()]]) 
294   
295   
296  @codetricks.document 
297 -def hmsToDeg(hms, sepChar=None): 
298          """returns the time angle (h m s.decimals) as a float in degrees. 
299   
300          >>> "%3.8f"%hmsToDeg("22 23 23.3") 
301          '335.84708333' 
302          >>> "%3.8f"%hmsToDeg("22:23:23.3", ":") 
303          '335.84708333' 
304          >>> "%3.8f"%hmsToDeg("222323.3", "") 
305          '335.84708333' 
306          >>> hmsToDeg("junk") 
307          Traceback (most recent call last): 
308          ValueError: Invalid time with sepChar None: 'junk' 
309          """ 
310          hms = hms.strip() 
311          try: 
312                  if sepChar=="": 
313                          parts = hms[:2], hms[2:4], hms[4:] 
314                  else: 
315                          parts = hms.split(sepChar) 
316                  if len(parts)==3: 
317                          hours, minutes, seconds = parts 
318                  elif len(parts)==2: 
319                          hours, minutes = parts 
320                          seconds = 0 
321                  else: 
322                          raise ValueError("Too many parts") 
323                  timeSeconds = int(hours)*3600+float(minutes)*60+float(seconds or "0") 
324          except ValueError: 
325                  raise ValueError("Invalid time with sepChar %s: %s"%( 
326                          repr(sepChar), repr(hms))) 
327          return timeSeconds/3600/24*360 
328   
329   
330  @codetricks.document 
331 -def dmsToDeg(dmsAngle, sepChar=None): 
332          """returns the degree minutes seconds-specified dmsAngle as a  
333          float in degrees. 
334   
335          >>> "%3.8f"%dmsToDeg("45 30.6") 
336          '45.51000000' 
337          >>> "%3.8f"%dmsToDeg("45:30.6", ":") 
338          '45.51000000' 
339          >>> "%3.8f"%dmsToDeg("-45 30 7.6") 
340          '-45.50211111' 
341          >>> dmsToDeg("junk") 
342          Traceback (most recent call last): 
343          ValueError: Invalid dms value with sepChar None: 'junk' 
344          """ 
345          dmsAngle = dmsAngle.strip() 
346          sign = 1 
347          if dmsAngle.startswith("+"): 
348                  dmsAngle = dmsAngle[1:].strip() 
349          elif dmsAngle.startswith("-"): 
350                  sign, dmsAngle = -1, dmsAngle[1:].strip() 
351          try: 
352                  if sepChar=="": 
353                          parts = dmsAngle[:2], dmsAngle[2:4], dmsAngle[4:] 
354                  else: 
355                          parts = dmsAngle.split(sepChar) 
356                  if len(parts)==3: 
357                          deg, min, sec = parts 
358                  elif len(parts)==2: 
359                          deg, min = parts 
360                          sec = 0 
361                  else: 
362                          raise ValueError("Invalid # of parts") 
363                  arcSecs = sign*(int(deg)*3600+float(min)*60+float(sec or 0)) 
364          except ValueError: 
365                  raise misctricks.logOldExc( 
366                          ValueError("Invalid dms value with sepChar %s: %s"%( 
367                                  repr(sepChar), repr(dmsAngle)))) 
368          return arcSecs/3600 
369   
370   
371 -def fracHoursToDeg(fracHours): 
372          """returns the time angle fracHours given in decimal hours in degrees. 
373          """ 
374          return float(fracHours)*360./24. 
375   
376   
377 -def degToHms(deg, sepChar=" ", secondFracs=3): 
378          """converts a float angle in degrees to an time angle (hh:mm:ss.mmm). 
379   
380          >>> degToHms(0) 
381          '00 00 00.000' 
382          >>> degToHms(122.056, secondFracs=1) 
383          '08 08 13.4' 
384          >>> degToHms(-0.056, secondFracs=0) 
385          '-00 00 13' 
386          >>> degToHms(-1.056, secondFracs=0) 
387          '-00 04 13' 
388          >>> degToHms(359.2222, secondFracs=4, sepChar=":") 
389          '23:56:53.3280' 
390          >>> "%.4f"%hmsToDeg(degToHms(256.25, secondFracs=9)) 
391          '256.2500' 
392          """ 
393          sign = "" 
394          if deg<0: 
395                  sign = "-" 
396                  deg = -deg 
397          rest, hours = math.modf(deg/360.*24) 
398          rest, minutes = math.modf(rest*60) 
399          if secondFracs<1: 
400                  secondFracs = -1 
401          return sign+sepChar.join(["%02d"%int(hours), "%02d"%abs(int(minutes)),  
402                  "%0*.*f"%(secondFracs+3, secondFracs, abs(rest*60))]) 
403   
404   
405 -def degToDms(deg, sepChar=" ", secondFracs=2): 
406          """converts a float angle in degrees to a sexagesimal string. 
407   
408          >>> degToDms(0) 
409          '+0 00 00.00' 
410          >>> degToDms(-0.25) 
411          '-0 15 00.00' 
412          >>> degToDms(-23.50, secondFracs=4) 
413          '-23 30 00.0000' 
414          >>> "%.4f"%dmsToDeg(degToDms(-25.6835, sepChar=":"), sepChar=":") 
415          '-25.6835' 
416          """ 
417          sign = '+' 
418          if deg<0: 
419                  sign = "-" 
420                  deg = -deg 
421          rest, degs = math.modf(deg) 
422          rest, minutes = math.modf(rest*60) 
423          if secondFracs==0: 
424                  secondFracs = -1 
425          return sepChar.join(["%s%d"%(sign, int(degs)), "%02d"%abs(int(minutes)),  
426                  "%0*.*f"%(secondFracs+3, secondFracs, abs(rest*60))]) 
427   
428   
429 -def datetimeToRFC2616(dt): 
430          """returns a UTC datetime object in the format requried by http. 
431   
432          This may crap when you fuzz with the locale.  In general, when handling 
433          "real" times within the DC, prefer unix timestamps over datetimes and 
434          use the other *RFC2616 functions. 
435          """ 
436          return dt.strftime('%a, %d %b %Y %H:%M:%S GMT') 
437   
438   
439 -def parseRFC2616Date(s): 
440          """returns seconds since unix epoch representing UTC from the HTTP-compatible 
441          time specification s. 
442          """ 
443          parts = emailutils.parsedate_tz(s) 
444          return emailutils.mktime_tz(parts) 
445   
446   
447  # The following timegm implementation is due to Frederik Lundh 
448 -def _d(y, m, d, days=(0,31,59,90,120,151,181,212,243,273,304,334,365)):  
449                  return (((y - 1901)*1461)/4 + days[m-1] + d + ( 
450                          (m > 2 and not y % 4 and (y % 100 or not y % 400)) and 1)) 
451   
452 -def timegm(tm, epoch=_d(1970,1,1)):  
453                  year, month, day, h, m, s = tm[:6]  
454                  return (_d(year, month, day) - epoch)*86400 + h*3600 + m*60 + s 
455   
456   
457 -def formatRFC2616Date(secs=None): 
458          """returns an RFC2616 date string for UTC seconds since unix epoch. 
459          """ 
460          if secs is None: 
461                  secs = time.time() 
462          return emailutils.formatdate(secs, localtime=False, usegmt=True) 
463   
464   
465  _isoDTRE = re.compile(r"(?P<year>\d\d\d\d)-?(?P<month>\d\d)-?(?P<day>\d\d)" 
466                  r"(?:[T ](?P<hour>\d\d):?(?P<minute>\d\d):?" 
467                  r"(?P<seconds>\d\d)(?P<secFracs>\.\d*)?Z?(\+00:00)?)?$") 
468   
469   
470  @codetricks.document 
471 -def parseISODT(literal): 
472          """returns a datetime object for a ISO time literal. 
473   
474          There's no real timezone support yet, but we accept and ignore various 
475          ways of specifying UTC. 
476   
477          >>> parseISODT("1998-12-14") 
478          datetime.datetime(1998, 12, 14, 0, 0) 
479          >>> parseISODT("1998-12-14T13:30:12") 
480          datetime.datetime(1998, 12, 14, 13, 30, 12) 
481          >>> parseISODT("1998-12-14T13:30:12Z") 
482          datetime.datetime(1998, 12, 14, 13, 30, 12) 
483          >>> parseISODT("1998-12-14T13:30:12.224Z") 
484          datetime.datetime(1998, 12, 14, 13, 30, 12, 224000) 
485          >>> parseISODT("19981214T133012Z") 
486          datetime.datetime(1998, 12, 14, 13, 30, 12) 
487          >>> parseISODT("19981214T133012+00:00") 
488          datetime.datetime(1998, 12, 14, 13, 30, 12) 
489          >>> parseISODT("junk") 
490          Traceback (most recent call last): 
491          ValueError: Bad ISO datetime literal: junk (required format: yyyy-mm-ddThh:mm:ssZ) 
492          """ 
493          literal = literal.rstrip("Z") 
494          mat = _isoDTRE.match(literal.strip()) 
495          if not mat: 
496                  raise ValueError("Bad ISO datetime literal: %s" 
497                          " (required format: yyyy-mm-ddThh:mm:ssZ)"%literal) 
498          parts = mat.groupdict() 
499          if parts["hour"] is None: 
500                  parts["hour"] = parts["minute"] = parts["seconds"] = 0 
501          if parts["secFracs"] is None: 
502                  parts["secFracs"] = 0 
503          else: 
504                  parts["secFracs"] = "0"+parts["secFracs"] 
505          return datetime.datetime(int(parts["year"]), int(parts["month"]), 
506                  int(parts["day"]), int(parts["hour"]), int(parts["minute"]),  
507                  int(parts["seconds"]), int(float(parts["secFracs"])*1000000)) 
508   
509   
510  _SUPPORTED_DT_FORMATS =[ 
511          '%Y-%m-%dT%H:%M:%S', 
512          '%Y-%m-%d %H:%M:%S', 
513          '%Y-%m-%d',] 
514   
515 -def parseDefaultDatetime(literal): 
516          if literal is None or isinstance(literal, datetime.datetime): 
517                  return literal 
518          if literal.endswith("Z"): 
519                  literal = literal[:-1] 
520          # just nuke fractional seconds, they're trouble with strptime. 
521          literal = literal.split(".")[0] 
522          for format in _SUPPORTED_DT_FORMATS: 
523                  try: 
524                          return datetime.datetime( 
525                                  *time.strptime(literal, format)[:6]) 
526                  except ValueError: 
527                          pass 
528          return parseISODT(literal) 
529   
530   
531 -def parseDefaultDate(literal): 
532          if literal is None or isinstance(literal, datetime.date): 
533                  return literal 
534          return datetime.date(*time.strptime(literal, '%Y-%m-%d')[:3]) 
535   
536   
537 -def parseDefaultTime(literal): 
538          if literal is None or isinstance(literal, datetime.time): 
539                  return literal 
540          # as long as we're builing on top of time, we can't do fractional seconds 
541          return datetime.time(*time.strptime(literal, '%H:%M:%S')[3:6]) 
542   
543   
544 -def roundToSeconds(dt): 
545          """returns a datetime instance rounded to whole seconds. 
546   
547          This also recklessly clears any time zone marker.  So, don't pass 
548          in anything with a meaningful time zone. 
549          """ 
550          if dt.microsecond>500000: 
551                  return dt.replace(microsecond=0, tzinfo=None 
552                          )+datetime.timedelta(seconds=1) 
553          else: 
554                  return dt.replace(microsecond=0, tzinfo=None) 
555   
556 -def formatISODT(dt): 
557          """returns some ISO8601 representation of a datetime instance. 
558   
559          The reason for preferring this function over a simple str is that 
560          datetime's default representation is too difficult for some other 
561          code (e.g., itself); hence, this code suppresses any microsecond part 
562          and always adds a Z (where strftime works, utils.isoTimestampFmt produces 
563          an identical string). 
564   
565          The behaviour of this function for timezone-aware datetimes is undefined. 
566   
567          For convenience, None is returned as None 
568   
569          >>> formatISODT(datetime.datetime(2015, 10, 20, 12, 34, 22, 250)) 
570          '2015-10-20T12:34:22Z' 
571          >>> formatISODT(datetime.datetime(1815, 10, 20, 12, 34, 22, 250)) 
572          '1815-10-20T12:34:22Z' 
573          >>> formatISODT(datetime.datetime(2018, 9, 21, 23, 59, 59, 640000)) 
574          '2018-09-22T00:00:00Z' 
575          """ 
576          if dt is None: 
577                  return None 
578          return roundToSeconds(dt).isoformat()+"Z" 
579   
580   
581 -class NameMap(object): 
582          """is a name mapper fed from a simple text file. 
583   
584          The text file format simply is: 
585   
586          <target-id> "TAB" <src-id>{whitespace <src-id>} 
587   
588          src-ids have to be encoded quoted-printable when they contain whitespace 
589          or other "bad" characters ("="!).  You can have #-comments and empty 
590          lines. 
591          """ 
592 -        def __init__(self, src, missingOk=False): 
593                  self._parseSrc(src, missingOk) 
594           
595 -        def __contains__(self, name): 
596                  return name in self.namesDict 
597   
598 -        def _parseSrc(self, src, missingOk): 
599                  self.namesDict = {} 
600                  try: 
601                          f = open(src) 
602                  except IOError: 
603                          if not missingOk: 
604                                  raise 
605                          else: 
606                                  return 
607                  try: 
608                          for ln in f: 
609                                  if ln.startswith("#") or not ln.strip(): 
610                                          continue 
611                                  ob, names = re.split("\t+", ln) 
612                                  for name in names.lower().split(): 
613                                          self.namesDict[name.decode("quoted-printable")] = ob 
614                  except ValueError: 
615                          raise misctricks.logOldExc(ValueError( 
616                                  "Syntax error in %s: Line %s not understood."%(src, repr(ln)))) 
617                  f.close() 
618           
619 -        def resolve(self, name): 
620                  return self.namesDict[name.lower()] 
621   
622   
623  _STANDARD_ENTITIES = { 
624                  'lt': '<', 
625                  'gt': '>', 
626                  'amp': '&', 
627                  'apos': "'", 
628                  'quot': '"', 
629  } 
630   
631   
632 -def _decodeEntityref(matob): 
633          entRef = matob.group(1) 
634          if entRef in _STANDARD_ENTITIES: 
635                  return _STANDARD_ENTITIES[entRef] 
636          elif entRef.startswith("#x"): 
637                  return unichr(int(entRef[2:], 16)) 
638          elif entRef.startswith("#"): 
639                  return unichr(int(entRef[1:])) 
640          else: 
641                  raise ValueError("Unknown entity reference: &%s;"%entRef) 
642   
643   
644 -def replaceXMLEntityRefs(unicodeString): 
645          return entityrefPat.sub(_decodeEntityref, unicodeString) 
646   
647   
648 -def ensureOneSlash(s): 
649          """returns s with exactly one trailing slash. 
650          """ 
651          return s.rstrip("/")+"/" 
652   
653   
654 -def _iterSimpleTextNoContinuation(f): 
655          """helps iterSimpleText. 
656          """ 
657          for (lineNumber, curLine) in enumerate(f): 
658                  curLine = curLine.strip() 
659                  if curLine and not curLine.startswith("#"): 
660                          yield (lineNumber+1), curLine 
661   
662   
663  @codetricks.document 
664 -def iterSimpleText(f): 
665          """iterates over ``(physLineNumber, line)`` in f with some usual  
666          conventions for simple data files. 
667   
668          You should use this function to read from simple configuration and/or 
669          table files that don't warrant a full-blown grammar/rowmaker combo. 
670          The intended use is somewhat like this:: 
671                   
672                  with open(rd.getAbsPath("res/mymeta")) as f: 
673                          for lineNumber, content in iterSimpleText(f): 
674                                  try: 
675                                          ... 
676                                  except Exception, exc: 
677                                          sys.stderr.write("Bad input line %s: %s"%(lineNumber, exc)) 
678   
679          The grammar rules are, specifically: 
680   
681          * leading and trailing whitespace is stripped 
682          * empty lines are ignored 
683          * lines beginning with a hash are ignored 
684          * lines ending with a backslash are joined with the following line; 
685            to have intervening whitespace, have a blank in front of the backslash. 
686          """ 
687          iter = _iterSimpleTextNoContinuation(f) 
688          try: 
689                  while True: 
690                          lineNumber, curLine = iter.next() 
691   
692                          while curLine.endswith("\\"): 
693                                  try: 
694                                          lineNumber, newStuff = iter.next() 
695                                  except StopIteration: 
696                                          raise SourceParseError("File ends with a backslash", 
697                                                  location="line %d"%lineNumber) 
698                                  curLine = curLine[:-1]+newStuff 
699   
700                          yield lineNumber, curLine 
701          except StopIteration:  # all done, leave loop 
702                  pass 
703   
704   
705  _RANDOM_STRING_OK_CHARS = string.letters+string.digits+"_.," 
706   
707 -def getRandomString(length): 
708          """returns a random string of harmless printable characters. 
709          """ 
710          return "".join( 
711                  random.choice(_RANDOM_STRING_OK_CHARS) for c in range(length)) 
712   
713   
714 -def safe_str(val): 
715          if isinstance(val, str): 
716                  return val 
717          elif isinstance(val, unicode): 
718                  return val.encode("ascii", "ignore") 
719          else: 
720                  return str(val) 
721   
722   
723 -def parseAccept(aString): 
724          """parses an RFC 2616 accept header and returns a dict mapping media 
725          type patterns to their (unparsed) parameters. 
726   
727          If aString is None, an empty dict is returned 
728   
729          If we ever want to do fancy things with http content negotiation, this 
730          will be further wrapped to provide something implementing the complex 
731          RFC 2616 rules; this primitive interface really is intended for telling 
732          apart browsers (which accept text/html) from other clients (which 
733          hopefully do not) at this point. 
734   
735          >>> sorted(parseAccept("text/html, text/*; q=0.2; level=3").items()) 
736          [('text/*', 'q=0.2; level=3'), ('text/html', '')] 
737          >>> parseAccept(None) 
738          {} 
739          """ 
740          res = {} 
741          if aString is not None: 
742                  for item in aString.split(","): 
743                          if ";" in item: 
744                                  key, params = item.split(";", 1) 
745                          else: 
746                                  key, params = item, "" 
747                          res[key.strip()] = params.strip() 
748           
749          return res 
750   
751   
752 -def _test(): 
753          import doctest, texttricks 
754          doctest.testmod(texttricks) 
755   
756   
757  if __name__=="__main__": 
758          _test() 
759