1 """
2 A parser for ADQL.
3
4 The grammar follows the official BNF grammar quite closely, except where
5 pyparsing makes a different approach desirable; the names should mostly
6 match except for the obious underscore to camel case map.
7
8 The grammar given in the spec has some nasty rules when you're parsing
9 without backtracking and by recursive descent (which is what pyparsing
10 does). I need some reformulations. The more interesting of those
11 include:
12
13 TableReference
14 --------------
15
16 Trouble is that table_reference is left-recursive in the following rules::
17
18 <table_reference> ::=
19 <table_name> [ <correlation_specification> ]
20 | <derived_table> <correlation_specification>
21 | <joined_table>
22
23 <joined_table> ::=
24 <qualified_join>
25 | <left_paren> <joined_table> <right_paren>
26
27 <qualified_join> ::=
28 <table_reference> [ NATURAL ] [ <join_type> ] JOIN
29 <table_reference> [ <join_specification> ]
30
31 We fix this by adding rules::
32
33 <sub_join> ::= '(' <joinedTable> ')'
34 <join_opener> ::=
35 <table_name> [ <correlation_specification> ]
36 | <derived_table> <correlation_specification>
37 | <sub_join>
38
39 and then writing::
40
41 <qualified_join> ::=
42 <join_opener> [ NATURAL ] [ <join_type> ] JOIN
43 <table_reference> [ <join_specification> ]
44
45
46
47 statement
48 ---------
49
50 I can't have StringEnd appended to querySpecification since it's used
51 in subqueries, but I need to have it to keep pyparsing from just matching
52 parts of the input. Thus, the top-level production is for "statement".
53
54
55 trig_function, math_function, system_defined_function
56 -----------------------------------------------------
57
58 I think it's a bit funny to have the arity of functions in the syntax, but
59 there you go. Anyway, I don't want to have the function names in separate
60 symbols since they are expensive but go for a Regex (trig1ArgFunctionName).
61 The only exception is ATAN since it has a different arity from the rest of the
62 lot.
63
64 Similarly, for math_function I group symbols by arity.
65
66 The system defined functions are also regrouped to keep the number of
67 symbols reasonable.
68
69 column_reference and below
70 --------------------------
71
72 Here the lack of backtracking hurts badly, since once, say, schema name
73 is matched with a dot that's it, even if the dot should really have separated
74 schema and table.
75
76 Hence, we don't assign semantic labels in the grammar but leave that to
77 whatever interprets the tokens.
78
79 The important rules here are::
80
81 <column_name> ::= <identifier>
82 <correlation_name> ::= <identifier>
83 <catalog_name> ::= <identifier>
84 <unqualified_schema name> ::= <identifier>
85 <schema_name> ::= [ <catalog_name> <period> ] <unqualified_schema name>
86 <table_name> ::= [ <schema_name> <period> ] <identifier>
87 <qualifier> ::= <table_name> | <correlation_name>
88 <column_reference> ::= [ <qualifier> <period> ] <column_name>
89
90 By substitution, one has::
91
92 <schema_name> ::= [ <identifier> <period> ] <identifier>
93
94 hence::
95
96 <table_name> ::= [[ <identifier> <period> ] <identifier> <period> ]
97 <identifier>
98
99 hence::
100
101 <qualifier> ::= [[ <identifier> <period> ] <identifier> <period> ]
102 <identifier>
103
104 (which matches both table_name and correlation_name) and thus::
105
106 <column_reference> ::= [[[ <identifier> <period> ] <identifier> <period> ]
107 <identifier> <period> ] <identifier>
108
109 We need the table_name, qualifier, and column_reference productions.
110
111
112 generalLiterals in unsigngedLiterals
113 ------------------------------------
114
115 One point I'm deviating from the published grammar is that I disallow
116 generalLiterals in unsignedLiterals. Allowing them would let pyparsing
117 match a string literal as a numericValueLiteral, which messes up
118 string expressions. I'm not sure why generalLiterals are allowed
119 in there anyway. If this bites at some point, we'll face a major rewrite
120 of the grammar (or we need to dump pyparsing).
121
122 To make the whole thing work, I added the generalLiteral to the
123 characterPrimary production.
124 """
125
126
127
128
129
130
131
132 from __future__ import print_function
133
134 from pyparsing import (
135 Word, Literal, Optional, alphas, CaselessKeyword,
136 ZeroOrMore, OneOrMore, StringEnd,
137 Forward, nums,
138 CaselessLiteral, ParseException, Regex, sglQuotedString, alphanums,
139 ParserElement, White,
140 ParseExpression,
141 Suppress)
142 from pyparsing import ParseSyntaxException
143
144 from gavo import utils
145 from gavo import stc
146
147 __docformat__ = "restructuredtext en"
148
149 import sys
150
151
152
153 adqlReservedWords = set([ "ABS", "ACOS", "AREA", "ASIN", "ATAN", "ATAN2",
154 "CEILING", "CENTROID", "CIRCLE", "CONTAINS", "COS", "DEGREES", "DISTANCE",
155 "EXP", "FLOOR", "INTERSECTS", "LATITUDE", "LOG", "LOG10", "COORD1",
156 "COORD2", "COORDSYS", "BOX",
157 "MOD", "PI", "POINT", "POLYGON", "POWER", "RADIANS", "REGION",
158 "RAND", "ROUND", "SIN", "SQUARE", "SQRT", "TOP", "TAN", "TRUNCATE",
159 "OFFSET", "BITWISE_AND", "BITWISE_OR", "BITWISE_XOR", "BITWISE_NOT"])
160
161 sqlReservedWords = set([
162 "ABSOLUTE", "ACTION", "ADD", "ALL", "ALLOCATE", "ALTER", "AND", "ANY",
163 "ARE", "AS", "ASC", "ASSERTION", "AT", "AUTHORIZATION", "AVG", "BEGIN",
164 "BETWEEN", "BIT", "BIT_LENGTH", "BOTH", "BY", "CASCADE", "CASCADED",
165 "CASE", "CAST", "CATALOG", "CHAR", "CHARACTER", "CHAR_LENGTH",
166 "CHARACTER_LENGTH", "CHECK", "CLOSE", "COALESCE", "COLLATE",
167 "COLLATION", "COLUMN", "COMMIT", "CONNECT", "CONNECTION", "CONSTRAINT",
168 "CONSTRAINTS", "CONTINUE", "CONVERT", "CORRESPONDING", "COUNT",
169 "CREATE", "CROSS", "CURRENT", "CURRENT_DATE", "CURRENT_TIME",
170 "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATE", "DAY",
171 "DEALLOCATE", "DECIMAL", "DECLARE", "DEFAULT", "DEFERRABLE", "DEFERRED",
172 "DELETE", "DESC", "DESCRIBE", "DESCRIPTOR", "DIAGNOSTICS", "DISCONNECT",
173 "DISTINCT", "DOMAIN", "DOUBLE", "DROP", "ELSE", "END", "END-EXEC",
174 "ESCAPE", "EXCEPT", "EXCEPTION", "EXEC", "EXECUTE", "EXISTS",
175 "EXTERNAL", "EXTRACT", "FALSE", "FETCH", "FIRST", "FLOAT", "FOR",
176 "FOREIGN", "FOUND", "FROM", "FULL", "GET", "GLOBAL", "GO", "GOTO",
177 "GRANT", "GROUP", "HAVING", "HOUR", "IDENTITY", "IMMEDIATE", "IN",
178 "INDICATOR", "INITIALLY", "INNER", "INPUT", "INSENSITIVE", "INSERT",
179 "INT", "INTEGER", "INTERSECT", "INTERVAL", "INTO", "IS", "ISOLATION",
180 "JOIN", "KEY", "LANGUAGE", "LAST", "LEADING", "LEFT", "LEVEL", "LIKE",
181 "LOCAL", "LOWER", "MATCH", "MAX", "MIN", "MINUTE", "MODULE", "MONTH",
182 "NAMES", "NATIONAL", "NATURAL", "NCHAR", "NEXT", "NO", "NOT", "NULL",
183 "NULLIF", "NUMERIC", "OCTET_LENGTH", "OF", "ON", "ONLY", "OPEN",
184 "OPTION", "OR", "ORDER", "OUTER", "OUTPUT", "OVERLAPS", "PAD",
185 "PARTIAL", "POSITION", "PRECISION", "PREPARE", "PRESERVE", "PRIMARY",
186 "PRIOR", "PRIVILEGES", "PROCEDURE", "PUBLIC", "READ", "REAL",
187 "REFERENCES", "RELATIVE", "RESTRICT", "REVOKE", "RIGHT", "ROLLBACK",
188 "ROWS", "SCHEMA", "SCROLL", "SECOND", "SECTION", "SELECT", "SESSION",
189 "SESSION_USER", "SET", "SIZE", "SMALLINT", "SOME", "SPACE", "SQL",
190 "SQLCODE", "SQLERROR", "SQLSTATE", "SUBSTRING", "SUM", "SYSTEM_USER",
191 "TABLE", "TEMPORARY", "THEN", "TIME", "TIMESTAMP", "TIMEZONE_HOUR",
192 "TIMEZONE_MINUTE", "TO", "TRAILING", "TRANSACTION", "TRANSLATE",
193 "TRANSLATION", "TRIM", "TRUE", "UNION", "UNIQUE", "UNKNOWN", "UPDATE",
194 "UPPER", "USAGE", "USER", "USING", "VALUE", "VALUES", "VARCHAR",
195 "VARYING", "VIEW", "WHEN", "WHENEVER", "WHERE", "WITH", "WORK", "WRITE",
196 "YEAR", "ZONE"])
197
198 allReservedWords = adqlReservedWords | sqlReservedWords
199
200
201
202 userFunctionPrefix = "(gavo|ivo)"
203
204
209
210
211 -def Args(pyparseSymbol):
212 """wraps pyparseSymbol such that matches get added to an args list
213 on the parent node.
214 """
215 return pyparseSymbol.setResultsName("args", listAllMatches=True)
216
217
219 return " ".join(toks)
220
221
223 """regular identifiers are all C-style identifiers except reserved
224 words.
225
226 Filtering these in the parse action doesn't always work properly for
227 all versions of pyparsing, thus this special class.
228
229 reservedWords are assumed to be in upper case, but matching
230 case-insensitively.
231 """
235
236 - def parseImpl(self, instring, loc, doActions=True):
237 locNew, match = super(RegularIdentifier, self).parseImpl(instring,
238 loc, doActions)
239 if match.upper() in self.reservedWords:
240 raise ParseException(instring, loc, "Reserved word not allowed here")
241 return locNew, match
242
243
245 """pyparsing's Or, except that ParseFatalExceptions are still propagated.
246 """
247 - def __init__(self, exprs, savelist=False):
248 super(LongestMatch, self).__init__(exprs, savelist)
249 self.mayReturnEmpty = False
250 for e in self.exprs:
251 if e.mayReturnEmpty:
252 self.mayReturnEmpty = True
253 break
254
255 - def parseImpl(self, instring, loc, doActions=True):
256 maxExcLoc = -1
257 maxMatchLoc = -1
258 maxException = None
259 for e in self.exprs:
260 try:
261 loc2 = e._parse(instring, loc, doActions=False)[0]
262 except ParseException as err:
263 if err.loc>maxExcLoc:
264 maxException = err
265 maxExcLoc = err.loc
266 except IndexError:
267 if len(instring)>maxExcLoc:
268 maxException = ParseException(
269 instring, len(instring), e.errmsg, self)
270 maxExcLoc = len(instring)
271 else:
272 if loc2>maxMatchLoc:
273 maxMatchLoc = loc2
274 maxMatchExp = e
275
276 if maxMatchLoc<0:
277 if maxException is not None:
278 raise maxException
279 else:
280 raise ParseException(instring, loc,
281 "no defined alternatives to match", self)
282
283 return maxMatchExp._parse(instring, loc, doActions)
284
286 if hasattr(self,"name"):
287 return self.name
288
289 if self.strRepr is None:
290 self.strRepr = "{" + " ^ ".join( [ str(e) for e in self.exprs ] ) + "}"
291
292 return self.strRepr
293
295 subRecCheckList = parseElementList[:] + [ self ]
296 for e in self.exprs:
297 e.checkRecursion( subRecCheckList )
298
299
301 """returns a pair symbols, selectSymbol for a grammar parsing ADQL.
302
303 You should only use this if you actually require a fresh copy
304 of the ADQL grammar. Otherwise, use getADQLGrammar or a wrapper
305 function defined by a client module.
306 """
307 with utils.pyparsingWhitechars("\n\t\r "):
308
309
310
311
312
313 sqlComment = Regex("--[^\n\r]*")
314 whitespace = Word(" \t\n")
315
316
317 unsignedInteger = Word(nums)
318 unsignedInteger.setName("unsigned integer")
319 _exactNumericRE = r"\d+(\.(\d+)?)?|\.\d+"
320 exactNumericLiteral = Regex(_exactNumericRE)
321 approximateNumericLiteral = Regex(r"(?i)(%s)E[+-]?\d+"%_exactNumericRE)
322 unsignedHexadecimal = Regex("0[xX][0-9A-Fa-f]+"
323 ).addParseAction(lambda s,p,t: str(int(t[0], 16))
324 ).setName("numeric literal")
325 unsignedNumericLiteral = (
326 unsignedHexadecimal
327 | approximateNumericLiteral
328 | exactNumericLiteral )
329 characterStringLiteral = sglQuotedString + ZeroOrMore( sglQuotedString )
330 generalLiteral = characterStringLiteral.copy()
331 unsignedLiteral = unsignedNumericLiteral
332 sign = Literal("+") | "-"
333 signedInteger = Optional( sign ) + unsignedInteger
334 signedInteger.setName("signed integer")
335 multOperator = Literal("*") | Literal("/")
336 addOperator = Literal("+") | Literal("-")
337 notKeyword = CaselessKeyword("NOT")
338 nullLiteral = CaselessKeyword("NULL")
339
340 regularIdentifier = RegularIdentifier(allReservedWords)
341 regularIdentifier.setName("identifier")
342
343
344
345
346
347
348 delimitedIdentifier = Regex('("[^"]*")+').addParseAction(
349 _makeQuotedName)
350 identifier = regularIdentifier | delimitedIdentifier
351
352
353 compOp = Regex("=|!=|<=|>=|<|>")
354 compOp.setName("comparison operator")
355
356
357 columnName = identifier.copy().setName("column name")
358 correlationName = identifier.copy()
359 qualifier = (identifier
360 + Optional( "." + identifier )
361 + Optional( "." + identifier ))
362 tableName = qualifier("tableName")
363 columnReferenceByUCD = (
364 CaselessKeyword("UCDCOL")
365 + '(' + characterStringLiteral + ')')
366 columnReference = (
367 columnReferenceByUCD
368 | identifier
369 + Optional( "." + identifier )
370 + Optional( "." + identifier )
371 + Optional( "." + identifier )).setName("column reference")
372 asClause = Optional(CaselessKeyword("AS")) + columnName("alias")
373
374 valueExpression = Forward().setName("value expression")
375
376
377 setFunctionType = Regex("(?i)AVG|MAX|MIN|SUM|COUNT")
378 setQuantifier = Regex("(?i)DISTINCT|ALL")
379 generalSetFunction = (setFunctionType("fName")
380 + '(' + Optional( setQuantifier ) + Args(valueExpression) + ')')
381 countAll = (CaselessLiteral("COUNT")("fName")
382 + '(' + Args(Literal('*')) + ')')
383 setFunctionSpecification = (countAll | generalSetFunction)
384
385
386 castOperand = (
387 valueExpression
388 | nullLiteral)
389 length = unsignedInteger
390 castTarget = (
391 (CaselessKeyword("CHAR")
392 + Optional(Literal("(") - length - Literal(")")))
393 | (CaselessKeyword("NATIONAL") - CaselessKeyword("CHAR")
394 + Optional(Literal("(") - length - Literal(")")))
395 | CaselessKeyword("INTEGER")
396 | CaselessKeyword("BIGINT")
397 | CaselessKeyword("SMALLINT")
398 | CaselessKeyword("REAL")
399 | CaselessKeyword("DOUBLE") + CaselessKeyword("PRECISION")
400 | CaselessKeyword("TIMESTAMP")).addParseAction(
401 lambda s,p,t: " ".join(t).upper())
402 castSpecification = (
403 CaselessLiteral("CAST")
404 - "(" + castOperand.setResultsName("value")
405 - CaselessLiteral("AS") + castTarget.setResultsName("newType") - ')')
406
407
408 arrayReference = (
409 columnReference
410 + Literal('[')
411 + valueExpression
412 + Literal(']'))
413 valueExpressionPrimary = (
414 CaselessKeyword("NULL")
415 | unsignedLiteral
416 | arrayReference
417 | columnReference
418 | setFunctionSpecification
419 | '(' + valueExpression + ')'
420 | castSpecification).setName("value expression")
421
422
423 characterPrimary = Forward()
424 characterFactor = characterPrimary
425 characterValueExpression = ( characterFactor +
426 ZeroOrMore( "||" + characterFactor ))
427 stringValueExpression = characterValueExpression
428
429
430 numericValueExpression = Forward()
431 numericValueFunction = Forward()
432 numericExpressionPrimary = (
433 unsignedLiteral | columnReference
434 | setFunctionSpecification | '(' + valueExpression + ')')
435 numericPrimary = ( numericValueFunction | valueExpressionPrimary
436 ).setName("numeric expression")
437 factor = Optional( sign ) + numericPrimary
438 term = (factor + ZeroOrMore( multOperator + factor ))
439 numericValueExpression << (
440 term + ZeroOrMore( addOperator + term )
441 ).setName("numeric value expression")
442 nvArgs = Args(numericValueExpression)
443
444
445 userDefinedFunction = Forward()
446 geometryValueExpression = Forward()
447 tapCoordLiteral = Regex("(?i)'(?P<sys>%s)'"%"|".join(stc.TAP_SYSTEMS)
448 ).addParseAction(lambda s,p,t: t["sys"].upper())
449 tapCoordLiteral.setName("coordinate system literal (ICRS, GALACTIC,...)")
450 coordSys = (tapCoordLiteral
451 | nullLiteral.copy().addParseAction(lambda s,p,t: "UNKNOWN")
452 ).setResultsName("coordSys"
453 ).setName("coordinate system literal (ICRS, GALACTIC,...)")
454 coordinates = (nvArgs + ',' + nvArgs)
455 box = (CaselessKeyword("BOX")("fName")
456 - '(' + Optional( coordSys + ',' )
457 + coordinates + ','
458 + coordinates + ')')
459 point = (CaselessKeyword("POINT")("fName")
460 - '(' + Optional( coordSys - ',' )
461 + coordinates + ')')
462 coordValue = userDefinedFunction | point | columnReference
463
464 circleSplitCooArgs = (coordinates + ',' - nvArgs)
465 circlePointCooArgs = (
466 Args(coordValue) + ','
467 - nvArgs)
468 circle = (CaselessKeyword("CIRCLE")("fName")
469 - '(' + Optional( coordSys + ',' )
470 + ( circleSplitCooArgs | circlePointCooArgs )
471 + ')')
472
473 polygonSplitCooArgs = (coordinates
474 + ',' + coordinates
475 + OneOrMore( ',' + coordinates ))
476 polygonPointCooArgs = (Args(coordValue)
477 - ',' - Args(coordValue)
478 - OneOrMore(',' - Args(coordValue)))
479 polygon = (CaselessKeyword("POLYGON")("fName")
480 - '(' + Optional( coordSys + ',' )
481 + Args(polygonSplitCooArgs | polygonPointCooArgs)
482 + ')')
483
484 region = (CaselessKeyword("REGION")("fName")
485 + '('
486 + Args(stringValueExpression) + ')')
487 geometryExpression = box | point | circle | polygon | region
488 geometryValue = columnReference.copy()
489 centroid = (CaselessKeyword("CENTROID")("fName")
490 + '(' + Args(geometryValueExpression) + ')')
491 geometryValueExpression << (geometryExpression
492 | userDefinedFunction
493 | geometryValue
494 | centroid).setName("geometry value expression")
495
496
497 distanceSplitCooArgs = coordinates + ',' + coordinates
498 distanceSplitCooArgs.setName("Numeric coordinates")
499 distancePointCooArgs = Args(coordValue) + ',' + Args(coordValue)
500 distancePointCooArgs.setName("Geometry argument")
501 distanceFunction = (CaselessKeyword("DISTANCE")("fName")
502 - '(' + (distanceSplitCooArgs | distancePointCooArgs) + ')')
503 pointFunction = (Regex("(?i)COORD[12]|COORDSYS")("fName") + '(' +
504 Args(coordValue) + ')')
505 area = (CaselessKeyword("AREA")("fName")
506 + '(' + Args(geometryValueExpression) + ')')
507 nonPredicateGeometryFunction = (
508 distanceFunction
509 | pointFunction
510 | area )
511 predicateGeoFunctionName = Regex("(?i)CONTAINS|INTERSECTS")
512 predicateGeometryFunction = (predicateGeoFunctionName("fName")
513 + '(' + Args(geometryValueExpression)
514 + ',' + Args(geometryValueExpression) + ')')
515 numericGeometryFunction = (predicateGeometryFunction
516 | nonPredicateGeometryFunction)
517
518
519 misc5ArgFunctionName = CaselessKeyword("CROSSMATCH")
520 miscFunction = (
521 misc5ArgFunctionName("fName")
522 + '('
523 + nvArgs
524 + ( ',' + nvArgs ) * 4
525 + ')')
526
527
528 trig1ArgFunctionName = Regex("(?i)ACOS|ASIN|ATAN|COS|COT|SIN|TAN")
529
530
531
532 trig1ArgFunctionName.setName("numeric expression")
533 trigFunction = (
534 trig1ArgFunctionName("fName") + '('
535 + nvArgs + ')'
536 | CaselessKeyword("ATAN2")("fName") + '(' +
537 nvArgs + ','
538 + nvArgs + ')')
539 math0ArgFunctionName = Regex("(?i)PI")
540 optIntFunctionName = Regex("(?i)RAND")
541 math1ArgFunctionName = Regex("(?i)ABS|CEILING|DEGREES|EXP|FLOOR|LOG10|"
542 "LOG|RADIANS|SQUARE|SQRT|BITWISE_NOT")
543 optPrecArgFunctionName = Regex("(?i)ROUND|TRUNCATE")
544 math2ArgFunctionName = Regex("(?i)POWER|MOD|BITWISE_AND|BITWISE_OR"
545 "|BITWISE_XOR")
546 mathFunction = (
547 math0ArgFunctionName("fName") + '(' + ')'
548 | optIntFunctionName("fName") + '('
549 + Optional( Args(unsignedInteger) ) + ')'
550 | math1ArgFunctionName("fName") + '('
551 + nvArgs + ')'
552 | optPrecArgFunctionName("fName") + '('
553 + nvArgs +
554 Optional( ',' + Args(signedInteger) ) + ')'
555 | math2ArgFunctionName("fName") + '('
556 + nvArgs
557 + ',' + nvArgs + ')')
558 inUnitFunction = (
559 CaselessKeyword("IN_UNIT")
560 - '('
561 - numericValueExpression
562 - ','
563 - characterStringLiteral
564 - ')')
565 userDefinedFunctionParam = valueExpression
566 userDefinedFunctionName = Regex("(?i)"+userFunctionPrefix+"_[A-Za-z_]+")
567 userDefinedFunctionName.setName("Name of locally defined function")
568 userDefinedFunction << ( userDefinedFunctionName("fName") + '(' +
569 Args(userDefinedFunctionParam)
570 + ZeroOrMore( "," + Args(userDefinedFunctionParam) )
571 + ')')
572 numericValueFunction << (trigFunction
573 | mathFunction
574 | miscFunction
575 | inUnitFunction
576 | userDefinedFunction
577 | numericGeometryFunction )
578
579 foldFunction = (
580 ( CaselessKeyword("UPPER") | CaselessKeyword("LOWER") )("fName")
581 - '(' + Args(characterValueExpression) + ')' )
582 stringValueFunction = foldFunction
583
584 characterPrimary << (
585 stringValueFunction
586 | generalLiteral
587 | valueExpressionPrimary
588 | userDefinedFunction)
589
590 timestampFunction = (
591 CaselessKeyword('TIMESTAMP')("fName") - '('
592 + Args(stringValueExpression) - ')')
593 dateValueExpression = (timestampFunction)
594
595
596 valueExpression << (
597 LongestMatch([
598 numericValueExpression,
599 stringValueExpression,
600 dateValueExpression,
601 geometryValueExpression]) ).setName("value expression")
602 derivedColumn = valueExpression("expr") + Optional( asClause )
603
604
605 setQuantifier = (CaselessKeyword( "DISTINCT" )
606 | CaselessKeyword( "ALL" ))("setQuantifier")
607 setLimit = CaselessKeyword( "TOP" ) - unsignedInteger("setLimit")
608 offsetSpec = CaselessKeyword( "OFFSET" )- unsignedInteger("offset")
609 qualifiedStar = qualifier + "." + "*"
610 selectSublist = (qualifiedStar | derivedColumn
611 ).setResultsName("fieldSel", listAllMatches=True
612 ).setName("select list item")
613 selectList = (Literal("*")("starSel")
614 | selectSublist + ZeroOrMore( "," - selectSublist ))
615 selectList.setName("select list")
616
617
618 subquery = Forward()
619 searchCondition = Forward()
620 comparisonPredicate = valueExpression + compOp + valueExpression
621 betweenPredicate = (valueExpression + Optional( notKeyword ) +
622 CaselessKeyword("BETWEEN") - valueExpression +
623 CaselessKeyword("AND") - valueExpression)
624 inValueList = valueExpression + ZeroOrMore( ',' + valueExpression )
625 inPredicateValue = subquery | ( "(" + inValueList + ")" )
626 inPredicate = (valueExpression + Optional( notKeyword ) +
627 CaselessKeyword("IN") + inPredicateValue)
628 existsPredicate = CaselessKeyword("EXISTS") - subquery
629 likePredicate = (
630 characterValueExpression
631 + Optional( notKeyword )
632 + (CaselessKeyword("LIKE") | CaselessKeyword("ILIKE"))
633 + characterValueExpression)
634 nullPredicate = (columnReference + CaselessKeyword("IS") +
635 Optional( notKeyword ) - nullLiteral)
636 predicate = (comparisonPredicate | betweenPredicate | inPredicate |
637 likePredicate | nullPredicate | existsPredicate)
638 booleanPrimaryOpener = Literal('(')
639 booleanPrimaryOpener.setName("boolean expression")
640 booleanPrimary = (
641 booleanPrimaryOpener + searchCondition + ')' | predicate
642 ).setName("boolean expression")
643 booleanFactor = Optional( notKeyword ) + booleanPrimary
644 booleanTerm = ( booleanFactor +
645 ZeroOrMore( CaselessKeyword("AND") - booleanFactor ))
646
647
648 searchCondition << ( booleanTerm +
649 ZeroOrMore( CaselessKeyword("OR") - booleanTerm ))
650 searchCondition.setName("search condition")
651 whereClause = (CaselessKeyword("WHERE") - searchCondition)("whereClause")
652
653
654 queryExpression = Forward()
655 correlationSpecification = ((
656 Optional( CaselessKeyword("AS") ) | whitespace
657 ) + correlationName("alias"))
658 subqueryOpener = Literal('(')
659 subqueryOpener.setName("subquery")
660 subquery << (subqueryOpener + queryExpression + ')')
661 derivedTable = subquery.copy() + correlationSpecification
662 possiblyAliasedTable = tableName + Optional(correlationSpecification)
663 joinedTable = Forward()
664 subJoin = '(' + joinedTable + ')'
665 joinOperand = (possiblyAliasedTable
666 | derivedTable
667 | subJoin)
668 tableReference = (joinedTable
669 | possiblyAliasedTable
670 | derivedTable).setName("table reference")
671
672
673 columnNameList = (columnName + ZeroOrMore( "," + columnName)
674 ).setName("column name list")
675 namedColumnsJoin = (CaselessKeyword("USING") + '(' +
676 columnNameList("columnNames") + ')')
677 joinCondition = CaselessKeyword("ON") - searchCondition
678 joinSpecification = joinCondition | namedColumnsJoin
679 outerJoinType = (CaselessKeyword("LEFT")
680 | CaselessKeyword("RIGHT")
681 | CaselessKeyword("FULL"))
682 joinType = (CaselessKeyword("INNER")
683 | (outerJoinType + CaselessKeyword("OUTER"))
684 | CaselessKeyword("CROSS"))
685 joinOperator = (Optional( CaselessKeyword("NATURAL") )
686 + Optional( joinType )
687 + CaselessKeyword( "JOIN" )
688 )
689 joinedTable << (joinOperand
690 + ZeroOrMore( joinOperator
691 + joinOperand
692 + Optional( joinSpecification ) ))
693
694
695 groupByClause = (CaselessKeyword( "GROUP" ) + CaselessKeyword( "BY" )
696 + columnReference
697 + ZeroOrMore( ',' + columnReference ))("groupby")
698 havingClause = (CaselessKeyword( "HAVING" )
699 + searchCondition)("having")
700 orderingSpecification = (CaselessKeyword( "ASC")
701 | CaselessKeyword("DESC"))
702 sortKey = columnName | unsignedInteger
703 sortSpecification = sortKey + Optional( orderingSpecification )
704 orderByClause = (CaselessKeyword("ORDER")
705 + CaselessKeyword("BY") + sortSpecification
706 + ZeroOrMore( ',' + sortSpecification ))("orderBy")
707
708
709 fromClause = ( Suppress(CaselessKeyword("FROM"))
710 + tableReference
711 + ZeroOrMore( Suppress(',') - tableReference ))("fromClause")
712 tableExpression = (fromClause
713 + Optional( whereClause )
714 + Optional( groupByClause )
715 + Optional( havingClause )
716 + Optional( orderByClause ))
717
718
719 selectQuery = Forward()
720 withQuery = ( regularIdentifier("queryName")
721 + CaselessKeyword("AS")
722 + '(' + selectQuery + ')' )
723 withSpecification = ( CaselessLiteral("WITH")
724 + withQuery + ZeroOrMore(',' + withQuery) )
725
726
727 queryExpression << ( selectQuery | joinedTable )
728
729 optionalAll = Optional( CaselessKeyword("ALL") )
730 intersectOperator = (
731 CaselessKeyword("INTERSECT")
732 + optionalAll )
733 additiveSetOperator = ((
734 CaselessKeyword("UNION")
735 | CaselessKeyword("EXCEPT") )
736 + optionalAll )
737
738 selectNoParens = ( CaselessKeyword("SELECT")
739 + Optional( setQuantifier )
740 + Optional( setLimit )
741 + selectList + tableExpression )
742 setTerm = (
743 '(' + selectQuery + ')'
744 | selectNoParens
745 + ZeroOrMore(
746 intersectOperator
747 + selectNoParens) )
748 firstSetTerm = (
749 selectNoParens
750 + ZeroOrMore(
751 intersectOperator
752 + selectNoParens) )
753
754 selectQuery << (
755 firstSetTerm
756 + ZeroOrMore(
757 additiveSetOperator
758 + setTerm )
759 + Optional( offsetSpec ))
760
761 querySpecification = Optional(withSpecification) + selectQuery
762
763 statement = querySpecification + Optional( White() ) + StringEnd()
764
765
766 statement.ignore(sqlComment)
767 return dict((k, v) for k, v in locals().iteritems()
768 if isinstance(v, ParserElement)), statement
769
770
771 _grammarCache = None
772
774 if not debugNames:
775 debugNames = syms
776 for name in debugNames:
777 ob = syms[name]
778 if not ob.debug:
779 ob.setDebug(True)
780 ob.setName(name)
781 if "sqlComment" in syms:
782 syms["sqlComment"].setDebug(False)
783
785 def makeAction(name):
786 def action(s, pos, toks):
787 return [name, toks]
788 return action
789 for name in syms:
790 ob = syms[name]
791 if not ob.debug:
792 ob.setDebug(True)
793 ob.setName(name)
794 ob.addParseAction(makeAction(name))
795
796
808
809
810 if __name__=="__main__":
812 print("---------------Tokens:", toks)
813 import pprint
814 syms, grammar = getADQLGrammar()
815 enableTree(syms)
816 res = syms["geometryValueExpression"].parseString(
817 """
818 CENTROID(3)
819 """
820 , parseAll=True)
821 pprint.pprint(res.asList(), stream=sys.stderr)
822