Package gavo :: Package utils :: Module texttricks
[frames] | no frames]

Source Code for Module gavo.utils.texttricks

  1  """ 
  2  Formatting, text manipulation, string constants, and such. 
  3  """ 
  4   
  5  #c Copyright 2008-2019, the GAVO project 
  6  #c 
  7  #c This program is free software, covered by the GNU GPL.  See the 
  8  #c COPYING file in the source distribution. 
  9   
 10   
 11  import datetime 
 12  import math 
 13  import os 
 14  import random 
 15  import re 
 16  import string 
 17  import time 
 18  from email import utils as emailutils 
 19   
 20  from gavo.utils import codetricks 
 21  from gavo.utils import misctricks 
 22  from gavo.utils.excs import Error, SourceParseError 
 23   
 24  floatRE = r"[+-]?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?" 
 25  dateRE = re.compile("\d\d\d\d-\d\d-\d\d$") 
 26  datetimeRE = re.compile("\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ?$") 
 27  identifierPattern = re.compile("[A-Za-z_][A-Za-z0-9_]*$") 
 28  isoTimestampFmt = "%Y-%m-%dT%H:%M:%SZ" 
 29  isoTimestampFmtNoTZ = "%Y-%m-%dT%H:%M:%S" 
 30  entityrefPat = re.compile("&([^;])+;") 
 31  looksLikeURLPat = re.compile("[a-z]{2,5}://") 
 32   
 33   
 34  # file names that don't cause headaches in URLs and are otherwise reasonable 
 35  # benign (so, let's disallow shell metachars while we're at it). 
 36  _SAFE_FILENAME = re.compile("[,-:=@-Z_a-z{}~-]+$") 
 37   
 38  xmlEntities = { 
 39                  'lt': '<', 
 40                  'gt': '>', 
 41                  'amp': '&', 
 42                  'apos': "'", 
 43                  'quot': '"', 
 44  } 
45 46 47 -def formatSize(val, sf=1):
48 """returns a human-friendly representation of a file size. 49 """ 50 if val<1e3: 51 return "%d Bytes"%int(val) 52 elif val<1e6: 53 return "%.*fkiB"%(sf, val/1024.) 54 elif val<1e9: 55 return "%.*fMiB"%(sf, val/1024./1024.) 56 else: 57 return "%.*fGiB"%(sf, val/1024./1024./1024)
58
59 60 -def makeEllipsis(aStr, maxLen=60):
61 """returns aStr cropped to maxLen if necessary. 62 63 Cropped strings are returned with an ellipsis marker. 64 """ 65 if len(aStr)>maxLen: 66 return aStr[:maxLen-3]+"..." 67 return aStr
68
69 70 -def makeLeftEllipsis(aStr, maxLen=60):
71 """returns aStr shortened to maxLen by dropping prefixes if necessary. 72 73 Cropped strings are returned with an ellipsis marker. 74 >>> makeLeftEllipsis("0123456789"*2, 11) 75 '...23456789' 76 """ 77 if len(aStr)>maxLen: 78 return "..."+aStr[-maxLen+3:] 79 return aStr
80
81 82 -def makeSourceEllipsis(sourceToken):
83 """returns a string hopefully representative for a source token. 84 85 These are, in particular, passed around withing rsc.makeData. Usually, 86 these are (potentially long) strings, but now and then they can be 87 other things with appallingly long reprs. When DaCHS messages 88 need to refer to such sources, this function is used to come up 89 with representative strings. 90 """ 91 if isinstance(sourceToken, basestring): 92 return makeLeftEllipsis(sourceToken) 93 else: 94 return makeEllipsis(repr(sourceToken), maxLen=160)
95
96 97 @codetricks.document 98 -def getFileStem(fPath):
99 """returns the file stem of a file path. 100 101 The base name is what remains if you take the base name and split off 102 extensions. The extension here starts with the last dot in the file name, 103 except up to one of some common compression extensions (.gz, .xz, .bz2, 104 .Z, .z) is stripped off the end if present before determining the extension. 105 106 >>> getFileStem("/foo/bar/baz.x.y") 107 'baz.x' 108 >>> getFileStem("/foo/bar/baz.x.gz") 109 'baz' 110 >>> getFileStem("/foo/bar/baz") 111 'baz' 112 """ 113 for ext in [".gz", ".xz", ".bz2", ".Z", ".z"]: 114 if fPath.endswith(ext): 115 fPath = fPath[:-len(ext)] 116 break 117 return os.path.splitext(os.path.basename(fPath))[0]
118
119 120 -def formatSimpleTable(data, stringify=True, titles=None):
121 """returns a string containing a text representation of tabular data. 122 123 All columns of data are simply stringified, then the longest member 124 determines the width of the text column. The behaviour if data 125 does not contain rows of equal length is unspecified; data must 126 contain at least one row. 127 128 If you have serialised the values in data yourself, pass stringify=False. 129 130 If you pass titles, it must be a sequence of strings; they are then 131 used as table headers; the shorter of data[0] and titles will determine 132 the number fo columns displayed. 133 """ 134 if stringify: 135 data = [[str(v) for v in row] for row in data] 136 137 if not data: 138 return "" 139 140 colWidthes = [max(len(row[colInd]) for row in data) 141 for colInd in range(len(data[0]))] 142 if titles is not None: 143 colWidthes = [max(len(t), l) for t, l in zip(titles, colWidthes)] 144 145 fmtStr = " ".join("%%%ds"%w for w in colWidthes) 146 table = "\n".join(fmtStr%tuple(row) for row in data) 147 if titles is not None: 148 table = fmtStr%tuple(titles)+"\n\n"+table 149 return table
150
151 152 @codetricks.document 153 -def getRelativePath(fullPath, rootPath, liberalChars=True):
154 """returns rest if fullPath has the form rootPath/rest and raises an 155 exception otherwise. 156 157 Pass ``liberalChars=False`` to make this raise a ValueError when 158 URL-dangerous characters (blanks, amperands, pluses, non-ASCII, and 159 similar) are present in the result. This is mainly for products. 160 """ 161 if not fullPath.startswith(rootPath): 162 raise ValueError( 163 "Full path %s does not start with resource root %s"%(fullPath, rootPath)) 164 res = fullPath[len(rootPath):].lstrip("/") 165 if not liberalChars and not _SAFE_FILENAME.match(res): 166 raise ValueError("File path '%s' contains characters known to" 167 " the DaCHS authors to be hazardous in URLs. Please defuse the name" 168 " before using it for published names (or see howDoI)."%res) 169 return res
170
171 172 -def resolvePath(rootPath, relPath):
173 """joins relPath to rootPath and makes sure the result really is 174 in rootPath. 175 """ 176 relPath = relPath.lstrip("/") 177 fullPath = os.path.realpath(os.path.join(rootPath, relPath)) 178 if not fullPath.startswith(rootPath): 179 raise ValueError( 180 "Full path %s does not start with resource root %s"%(fullPath, rootPath)) 181 if not os.path.exists(fullPath): 182 raise ValueError( 183 "Invalid path %s. This should not happend."%(fullPath)) 184 return fullPath
185
186 187 -def fixIndentation(code, newIndent, governingLine=0):
188 """returns code with all whitespace from governingLine removed from 189 every line and newIndent prepended to every line. 190 191 governingLine lets you select a line different from the first one 192 for the determination of the leading white space. Lines before that 193 line are left alone. 194 195 >>> fixIndentation(" foo\\n bar", "") 196 'foo\\nbar' 197 >>> fixIndentation(" foo\\n bar", " ") 198 ' foo\\n bar' 199 >>> fixIndentation(" foo\\n bar\\n baz", "", 1) 200 'foo\\nbar\\n baz' 201 >>> fixIndentation(" foo\\nbar", "") 202 Traceback (most recent call last): 203 Error: Bad indent in line 'bar' 204 """ 205 codeLines = [line for line in code.split("\n")] 206 reserved, codeLines = codeLines[:governingLine], codeLines[governingLine:] 207 while codeLines: 208 if codeLines[0].strip(): 209 firstIndent = re.match("^\s*", codeLines[0]).group() 210 break 211 else: 212 reserved.append(codeLines.pop(0)) 213 if codeLines: 214 fixedLines = [] 215 for line in codeLines: 216 if not line.strip(): 217 fixedLines.append(newIndent) 218 else: 219 if line[:len(firstIndent)]!=firstIndent: 220 raise Error("Bad indent in line %s"%repr(line)) 221 fixedLines.append(newIndent+line[len(firstIndent):]) 222 else: 223 fixedLines = codeLines 224 reserved = [newIndent+l.lstrip() for l in reserved] 225 return "\n".join(reserved+fixedLines)
226
227 228 @codetricks.memoized 229 -def _getREForPercentExpression(format):
230 """helps parsePercentExpression. 231 """ 232 parts = re.split(r"(%\w)", format) 233 newReParts = [] 234 for ind, p in enumerate(parts): 235 if p.startswith("%"): 236 # the time-parsing hack explained in the docstring: 237 if ind+2<len(parts) and parts[ind+1]=="": 238 if p[1] in "HMS": 239 newReParts.append("(?P<%s>..)"%p[1]) 240 else: 241 raise ValueError( 242 "At %s: conversions with no intervening literal not supported."% p) 243 else: 244 newReParts.append("(?P<%s>.*?)"%p[1]) 245 else: 246 newReParts.append(re.escape(p)) 247 return re.compile("".join(newReParts)+"$")
248
249 250 -def parsePercentExpression(literal, format):
251 """returns a dictionary of parts in the %-template format. 252 253 format is a template with %<conv> conversions, no modifiers are 254 allowed. Each conversion is allowed to contain zero or more characters 255 matched stingily. Successive conversions without intervening literals 256 aren't really supported. There's a hack for strptime-type times, though: 257 H, M, and S just eat two characters each if there's no seperator. 258 259 This is really only meant as a quick hack to support times like 25:33. 260 261 >>> r=parsePercentExpression("12,xy:33,","%a:%b,%c"); r["a"], r["b"], r["c"] 262 ('12,xy', '33', '') 263 >>> sorted(parsePercentExpression("2357-x", "%H%M-%u").items()) 264 [('H', '23'), ('M', '57'), ('u', 'x')] 265 >>> r = parsePercentExpression("12,13,14", "%a:%b,%c") 266 Traceback (most recent call last): 267 ValueError: '12,13,14' cannot be parsed using format '%a:%b,%c' 268 """ 269 mat = _getREForPercentExpression(format).match(literal) 270 if not mat: 271 raise ValueError("'%s' cannot be parsed using format '%s'"%( 272 literal, format)) 273 return mat.groupdict()
274
275 276 -def parseAssignments(assignments):
277 """returns a name mapping dictionary from a list of assignments. 278 279 This is the preferred form of communicating a mapping from external names 280 to field names in records to macros -- in a string that contains 281 ":"-seprated pairs seperated by whitespace, like "a:b b:c", where 282 the incoming names are leading, the desired names are trailing. 283 284 If you need defaults to kick in when the incoming data is None, try 285 _parseDestWithDefault in the client function. 286 287 This function parses a dictionary mapping original names to desired names. 288 289 >>> parseAssignments("a:b b:c") 290 {'a': 'b', 'b': 'c'} 291 """ 292 return dict([(lead, trail) for lead, trail in 293 [litPair.split(":") for litPair in assignments.split()]])
294
295 296 @codetricks.document 297 -def hmsToDeg(hms, sepChar=None):
298 """returns the time angle (h m s.decimals) as a float in degrees. 299 300 >>> "%3.8f"%hmsToDeg("22 23 23.3") 301 '335.84708333' 302 >>> "%3.8f"%hmsToDeg("22:23:23.3", ":") 303 '335.84708333' 304 >>> "%3.8f"%hmsToDeg("222323.3", "") 305 '335.84708333' 306 >>> hmsToDeg("junk") 307 Traceback (most recent call last): 308 ValueError: Invalid time with sepChar None: 'junk' 309 """ 310 hms = hms.strip() 311 try: 312 if sepChar=="": 313 parts = hms[:2], hms[2:4], hms[4:] 314 else: 315 parts = hms.split(sepChar) 316 if len(parts)==3: 317 hours, minutes, seconds = parts 318 elif len(parts)==2: 319 hours, minutes = parts 320 seconds = 0 321 else: 322 raise ValueError("Too many parts") 323 timeSeconds = int(hours)*3600+float(minutes)*60+float(seconds or "0") 324 except ValueError: 325 raise ValueError("Invalid time with sepChar %s: %s"%( 326 repr(sepChar), repr(hms))) 327 return timeSeconds/3600/24*360
328
329 330 @codetricks.document 331 -def dmsToDeg(dmsAngle, sepChar=None):
332 """returns the degree minutes seconds-specified dmsAngle as a 333 float in degrees. 334 335 >>> "%3.8f"%dmsToDeg("45 30.6") 336 '45.51000000' 337 >>> "%3.8f"%dmsToDeg("45:30.6", ":") 338 '45.51000000' 339 >>> "%3.8f"%dmsToDeg("-45 30 7.6") 340 '-45.50211111' 341 >>> dmsToDeg("junk") 342 Traceback (most recent call last): 343 ValueError: Invalid dms value with sepChar None: 'junk' 344 """ 345 dmsAngle = dmsAngle.strip() 346 sign = 1 347 if dmsAngle.startswith("+"): 348 dmsAngle = dmsAngle[1:].strip() 349 elif dmsAngle.startswith("-"): 350 sign, dmsAngle = -1, dmsAngle[1:].strip() 351 try: 352 if sepChar=="": 353 parts = dmsAngle[:2], dmsAngle[2:4], dmsAngle[4:] 354 else: 355 parts = dmsAngle.split(sepChar) 356 if len(parts)==3: 357 deg, min, sec = parts 358 elif len(parts)==2: 359 deg, min = parts 360 sec = 0 361 else: 362 raise ValueError("Invalid # of parts") 363 arcSecs = sign*(int(deg)*3600+float(min)*60+float(sec or 0)) 364 except ValueError: 365 raise misctricks.logOldExc( 366 ValueError("Invalid dms value with sepChar %s: %s"%( 367 repr(sepChar), repr(dmsAngle)))) 368 return arcSecs/3600
369
370 371 -def fracHoursToDeg(fracHours):
372 """returns the time angle fracHours given in decimal hours in degrees. 373 """ 374 return float(fracHours)*360./24.
375
376 377 -def degToHms(deg, sepChar=" ", secondFracs=3):
378 """converts a float angle in degrees to an time angle (hh:mm:ss.mmm). 379 380 >>> degToHms(0) 381 '00 00 00.000' 382 >>> degToHms(122.056, secondFracs=1) 383 '08 08 13.4' 384 >>> degToHms(-0.056, secondFracs=0) 385 '-00 00 13' 386 >>> degToHms(-1.056, secondFracs=0) 387 '-00 04 13' 388 >>> degToHms(359.2222, secondFracs=4, sepChar=":") 389 '23:56:53.3280' 390 >>> "%.4f"%hmsToDeg(degToHms(256.25, secondFracs=9)) 391 '256.2500' 392 """ 393 sign = "" 394 if deg<0: 395 sign = "-" 396 deg = -deg 397 rest, hours = math.modf(deg/360.*24) 398 rest, minutes = math.modf(rest*60) 399 if secondFracs<1: 400 secondFracs = -1 401 return sign+sepChar.join(["%02d"%int(hours), "%02d"%abs(int(minutes)), 402 "%0*.*f"%(secondFracs+3, secondFracs, abs(rest*60))])
403
404 405 -def degToDms(deg, sepChar=" ", secondFracs=2):
406 """converts a float angle in degrees to a sexagesimal string. 407 408 >>> degToDms(0) 409 '+0 00 00.00' 410 >>> degToDms(-0.25) 411 '-0 15 00.00' 412 >>> degToDms(-23.50, secondFracs=4) 413 '-23 30 00.0000' 414 >>> "%.4f"%dmsToDeg(degToDms(-25.6835, sepChar=":"), sepChar=":") 415 '-25.6835' 416 """ 417 sign = '+' 418 if deg<0: 419 sign = "-" 420 deg = -deg 421 rest, degs = math.modf(deg) 422 rest, minutes = math.modf(rest*60) 423 if secondFracs==0: 424 secondFracs = -1 425 return sepChar.join(["%s%d"%(sign, int(degs)), "%02d"%abs(int(minutes)), 426 "%0*.*f"%(secondFracs+3, secondFracs, abs(rest*60))])
427
428 429 -def datetimeToRFC2616(dt):
430 """returns a UTC datetime object in the format requried by http. 431 432 This may crap when you fuzz with the locale. In general, when handling 433 "real" times within the DC, prefer unix timestamps over datetimes and 434 use the other *RFC2616 functions. 435 """ 436 return dt.strftime('%a, %d %b %Y %H:%M:%S GMT')
437
438 439 -def parseRFC2616Date(s):
440 """returns seconds since unix epoch representing UTC from the HTTP-compatible 441 time specification s. 442 """ 443 parts = emailutils.parsedate_tz(s) 444 return emailutils.mktime_tz(parts)
445
446 447 # The following timegm implementation is due to Frederik Lundh 448 -def _d(y, m, d, days=(0,31,59,90,120,151,181,212,243,273,304,334,365)):
449 return (((y - 1901)*1461)/4 + days[m-1] + d + ( 450 (m > 2 and not y % 4 and (y % 100 or not y % 400)) and 1)) 451
452 -def timegm(tm, epoch=_d(1970,1,1)):
453 year, month, day, h, m, s = tm[:6] 454 return (_d(year, month, day) - epoch)*86400 + h*3600 + m*60 + s 455
456 457 -def formatRFC2616Date(secs=None):
458 """returns an RFC2616 date string for UTC seconds since unix epoch. 459 """ 460 if secs is None: 461 secs = time.time() 462 return emailutils.formatdate(secs, localtime=False, usegmt=True)
463 464 465 _isoDTRE = re.compile(r"(?P<year>\d\d\d\d)-?(?P<month>\d\d)-?(?P<day>\d\d)" 466 r"(?:[T ](?P<hour>\d\d):?(?P<minute>\d\d):?" 467 r"(?P<seconds>\d\d)(?P<secFracs>\.\d*)?Z?(\+00:00)?)?$")
468 469 470 @codetricks.document 471 -def parseISODT(literal):
472 """returns a datetime object for a ISO time literal. 473 474 There's no real timezone support yet, but we accept and ignore various 475 ways of specifying UTC. 476 477 >>> parseISODT("1998-12-14") 478 datetime.datetime(1998, 12, 14, 0, 0) 479 >>> parseISODT("1998-12-14T13:30:12") 480 datetime.datetime(1998, 12, 14, 13, 30, 12) 481 >>> parseISODT("1998-12-14T13:30:12Z") 482 datetime.datetime(1998, 12, 14, 13, 30, 12) 483 >>> parseISODT("1998-12-14T13:30:12.224Z") 484 datetime.datetime(1998, 12, 14, 13, 30, 12, 224000) 485 >>> parseISODT("19981214T133012Z") 486 datetime.datetime(1998, 12, 14, 13, 30, 12) 487 >>> parseISODT("19981214T133012+00:00") 488 datetime.datetime(1998, 12, 14, 13, 30, 12) 489 >>> parseISODT("junk") 490 Traceback (most recent call last): 491 ValueError: Bad ISO datetime literal: junk (required format: yyyy-mm-ddThh:mm:ssZ) 492 """ 493 literal = literal.rstrip("Z") 494 mat = _isoDTRE.match(literal.strip()) 495 if not mat: 496 raise ValueError("Bad ISO datetime literal: %s" 497 " (required format: yyyy-mm-ddThh:mm:ssZ)"%literal) 498 parts = mat.groupdict() 499 if parts["hour"] is None: 500 parts["hour"] = parts["minute"] = parts["seconds"] = 0 501 if parts["secFracs"] is None: 502 parts["secFracs"] = 0 503 else: 504 parts["secFracs"] = "0"+parts["secFracs"] 505 return datetime.datetime(int(parts["year"]), int(parts["month"]), 506 int(parts["day"]), int(parts["hour"]), int(parts["minute"]), 507 int(parts["seconds"]), int(float(parts["secFracs"])*1000000))
508 509 510 _SUPPORTED_DT_FORMATS =[ 511 '%Y-%m-%dT%H:%M:%S', 512 '%Y-%m-%d %H:%M:%S', 513 '%Y-%m-%d',]
514 515 -def parseDefaultDatetime(literal):
516 if literal is None or isinstance(literal, datetime.datetime): 517 return literal 518 if literal.endswith("Z"): 519 literal = literal[:-1] 520 # just nuke fractional seconds, they're trouble with strptime. 521 literal = literal.split(".")[0] 522 for format in _SUPPORTED_DT_FORMATS: 523 try: 524 return datetime.datetime( 525 *time.strptime(literal, format)[:6]) 526 except ValueError: 527 pass 528 return parseISODT(literal)
529
530 531 -def parseDefaultDate(literal):
532 if literal is None or isinstance(literal, datetime.date): 533 return literal 534 return datetime.date(*time.strptime(literal, '%Y-%m-%d')[:3])
535
536 537 -def parseDefaultTime(literal):
538 if literal is None or isinstance(literal, datetime.time): 539 return literal 540 # as long as we're builing on top of time, we can't do fractional seconds 541 return datetime.time(*time.strptime(literal, '%H:%M:%S')[3:6])
542
543 544 -def roundToSeconds(dt):
545 """returns a datetime instance rounded to whole seconds. 546 547 This also recklessly clears any time zone marker. So, don't pass 548 in anything with a meaningful time zone. 549 """ 550 if dt.microsecond>500000: 551 return dt.replace(microsecond=0, tzinfo=None 552 )+datetime.timedelta(seconds=1) 553 else: 554 return dt.replace(microsecond=0, tzinfo=None)
555
556 -def formatISODT(dt):
557 """returns some ISO8601 representation of a datetime instance. 558 559 The reason for preferring this function over a simple str is that 560 datetime's default representation is too difficult for some other 561 code (e.g., itself); hence, this code suppresses any microsecond part 562 and always adds a Z (where strftime works, utils.isoTimestampFmt produces 563 an identical string). 564 565 The behaviour of this function for timezone-aware datetimes is undefined. 566 567 For convenience, None is returned as None 568 569 >>> formatISODT(datetime.datetime(2015, 10, 20, 12, 34, 22, 250)) 570 '2015-10-20T12:34:22Z' 571 >>> formatISODT(datetime.datetime(1815, 10, 20, 12, 34, 22, 250)) 572 '1815-10-20T12:34:22Z' 573 >>> formatISODT(datetime.datetime(2018, 9, 21, 23, 59, 59, 640000)) 574 '2018-09-22T00:00:00Z' 575 """ 576 if dt is None: 577 return None 578 return roundToSeconds(dt).isoformat()+"Z"
579
580 581 -class NameMap(object):
582 """is a name mapper fed from a simple text file. 583 584 The text file format simply is: 585 586 <target-id> "TAB" <src-id>{whitespace <src-id>} 587 588 src-ids have to be encoded quoted-printable when they contain whitespace 589 or other "bad" characters ("="!). You can have #-comments and empty 590 lines. 591 """
592 - def __init__(self, src, missingOk=False):
593 self._parseSrc(src, missingOk)
594
595 - def __contains__(self, name):
596 return name in self.namesDict
597
598 - def _parseSrc(self, src, missingOk):
599 self.namesDict = {} 600 try: 601 f = open(src) 602 except IOError: 603 if not missingOk: 604 raise 605 else: 606 return 607 try: 608 for ln in f: 609 if ln.startswith("#") or not ln.strip(): 610 continue 611 ob, names = re.split("\t+", ln) 612 for name in names.lower().split(): 613 self.namesDict[name.decode("quoted-printable")] = ob 614 except ValueError: 615 raise misctricks.logOldExc(ValueError( 616 "Syntax error in %s: Line %s not understood."%(src, repr(ln)))) 617 f.close()
618
619 - def resolve(self, name):
620 return self.namesDict[name.lower()]
621 622 623 _STANDARD_ENTITIES = { 624 'lt': '<', 625 'gt': '>', 626 'amp': '&', 627 'apos': "'", 628 'quot': '"', 629 }
630 631 632 -def _decodeEntityref(matob):
633 entRef = matob.group(1) 634 if entRef in _STANDARD_ENTITIES: 635 return _STANDARD_ENTITIES[entRef] 636 elif entRef.startswith("#x"): 637 return unichr(int(entRef[2:], 16)) 638 elif entRef.startswith("#"): 639 return unichr(int(entRef[1:])) 640 else: 641 raise ValueError("Unknown entity reference: &%s;"%entRef)
642
643 644 -def replaceXMLEntityRefs(unicodeString):
645 return entityrefPat.sub(_decodeEntityref, unicodeString)
646
647 648 -def ensureOneSlash(s):
649 """returns s with exactly one trailing slash. 650 """ 651 return s.rstrip("/")+"/"
652
653 654 -def _iterSimpleTextNoContinuation(f):
655 """helps iterSimpleText. 656 """ 657 for (lineNumber, curLine) in enumerate(f): 658 curLine = curLine.strip() 659 if curLine and not curLine.startswith("#"): 660 yield (lineNumber+1), curLine
661
662 663 @codetricks.document 664 -def iterSimpleText(f):
665 """iterates over ``(physLineNumber, line)`` in f with some usual 666 conventions for simple data files. 667 668 You should use this function to read from simple configuration and/or 669 table files that don't warrant a full-blown grammar/rowmaker combo. 670 The intended use is somewhat like this:: 671 672 with open(rd.getAbsPath("res/mymeta")) as f: 673 for lineNumber, content in iterSimpleText(f): 674 try: 675 ... 676 except Exception, exc: 677 sys.stderr.write("Bad input line %s: %s"%(lineNumber, exc)) 678 679 The grammar rules are, specifically: 680 681 * leading and trailing whitespace is stripped 682 * empty lines are ignored 683 * lines beginning with a hash are ignored 684 * lines ending with a backslash are joined with the following line; 685 to have intervening whitespace, have a blank in front of the backslash. 686 """ 687 iter = _iterSimpleTextNoContinuation(f) 688 try: 689 while True: 690 lineNumber, curLine = iter.next() 691 692 while curLine.endswith("\\"): 693 try: 694 lineNumber, newStuff = iter.next() 695 except StopIteration: 696 raise SourceParseError("File ends with a backslash", 697 location="line %d"%lineNumber) 698 curLine = curLine[:-1]+newStuff 699 700 yield lineNumber, curLine 701 except StopIteration: # all done, leave loop 702 pass
703 704 705 _RANDOM_STRING_OK_CHARS = string.letters+string.digits+"_.,"
706 707 -def getRandomString(length):
708 """returns a random string of harmless printable characters. 709 """ 710 return "".join( 711 random.choice(_RANDOM_STRING_OK_CHARS) for c in range(length))
712
713 714 -def safe_str(val):
715 if isinstance(val, str): 716 return val 717 elif isinstance(val, unicode): 718 return val.encode("ascii", "ignore") 719 else: 720 return str(val)
721
722 723 -def parseAccept(aString):
724 """parses an RFC 2616 accept header and returns a dict mapping media 725 type patterns to their (unparsed) parameters. 726 727 If aString is None, an empty dict is returned 728 729 If we ever want to do fancy things with http content negotiation, this 730 will be further wrapped to provide something implementing the complex 731 RFC 2616 rules; this primitive interface really is intended for telling 732 apart browsers (which accept text/html) from other clients (which 733 hopefully do not) at this point. 734 735 >>> sorted(parseAccept("text/html, text/*; q=0.2; level=3").items()) 736 [('text/*', 'q=0.2; level=3'), ('text/html', '')] 737 >>> parseAccept(None) 738 {} 739 """ 740 res = {} 741 if aString is not None: 742 for item in aString.split(","): 743 if ";" in item: 744 key, params = item.split(";", 1) 745 else: 746 key, params = item, "" 747 res[key.strip()] = params.strip() 748 749 return res
750
751 752 -def _test():
753 import doctest, texttricks 754 doctest.testmod(texttricks)
755 756 757 if __name__=="__main__": 758 _test() 759