sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63class _Parser(type): 64 def __new__(cls, clsname, bases, attrs): 65 klass = super().__new__(cls, clsname, bases, attrs) 66 67 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 68 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 69 70 return klass 71 72 73class Parser(metaclass=_Parser): 74 """ 75 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 76 77 Args: 78 error_level: The desired error level. 79 Default: ErrorLevel.IMMEDIATE 80 error_message_context: Determines the amount of context to capture from a 81 query string when displaying the error message (in number of characters). 82 Default: 100 83 max_errors: Maximum number of error messages to include in a raised ParseError. 84 This is only relevant if error_level is ErrorLevel.RAISE. 85 Default: 3 86 """ 87 88 FUNCTIONS: t.Dict[str, t.Callable] = { 89 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 90 "CONCAT": lambda args, dialect: exp.Concat( 91 expressions=args, 92 safe=not dialect.STRICT_STRING_CONCAT, 93 coalesce=dialect.CONCAT_COALESCE, 94 ), 95 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 96 expressions=args, 97 safe=not dialect.STRICT_STRING_CONCAT, 98 coalesce=dialect.CONCAT_COALESCE, 99 ), 100 "DATE_TO_DATE_STR": lambda args: exp.Cast( 101 this=seq_get(args, 0), 102 to=exp.DataType(this=exp.DataType.Type.TEXT), 103 ), 104 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 105 "LIKE": parse_like, 106 "LOG": parse_logarithm, 107 "TIME_TO_TIME_STR": lambda args: exp.Cast( 108 this=seq_get(args, 0), 109 to=exp.DataType(this=exp.DataType.Type.TEXT), 110 ), 111 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 112 this=exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 start=exp.Literal.number(1), 117 length=exp.Literal.number(10), 118 ), 119 "VAR_MAP": parse_var_map, 120 } 121 122 NO_PAREN_FUNCTIONS = { 123 TokenType.CURRENT_DATE: exp.CurrentDate, 124 TokenType.CURRENT_DATETIME: exp.CurrentDate, 125 TokenType.CURRENT_TIME: exp.CurrentTime, 126 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 127 TokenType.CURRENT_USER: exp.CurrentUser, 128 } 129 130 STRUCT_TYPE_TOKENS = { 131 TokenType.NESTED, 132 TokenType.STRUCT, 133 } 134 135 NESTED_TYPE_TOKENS = { 136 TokenType.ARRAY, 137 TokenType.LOWCARDINALITY, 138 TokenType.MAP, 139 TokenType.NULLABLE, 140 *STRUCT_TYPE_TOKENS, 141 } 142 143 ENUM_TYPE_TOKENS = { 144 TokenType.ENUM, 145 TokenType.ENUM8, 146 TokenType.ENUM16, 147 } 148 149 TYPE_TOKENS = { 150 TokenType.BIT, 151 TokenType.BOOLEAN, 152 TokenType.TINYINT, 153 TokenType.UTINYINT, 154 TokenType.SMALLINT, 155 TokenType.USMALLINT, 156 TokenType.INT, 157 TokenType.UINT, 158 TokenType.BIGINT, 159 TokenType.UBIGINT, 160 TokenType.INT128, 161 TokenType.UINT128, 162 TokenType.INT256, 163 TokenType.UINT256, 164 TokenType.MEDIUMINT, 165 TokenType.UMEDIUMINT, 166 TokenType.FIXEDSTRING, 167 TokenType.FLOAT, 168 TokenType.DOUBLE, 169 TokenType.CHAR, 170 TokenType.NCHAR, 171 TokenType.VARCHAR, 172 TokenType.NVARCHAR, 173 TokenType.TEXT, 174 TokenType.MEDIUMTEXT, 175 TokenType.LONGTEXT, 176 TokenType.MEDIUMBLOB, 177 TokenType.LONGBLOB, 178 TokenType.BINARY, 179 TokenType.VARBINARY, 180 TokenType.JSON, 181 TokenType.JSONB, 182 TokenType.INTERVAL, 183 TokenType.TINYBLOB, 184 TokenType.TINYTEXT, 185 TokenType.TIME, 186 TokenType.TIMETZ, 187 TokenType.TIMESTAMP, 188 TokenType.TIMESTAMP_S, 189 TokenType.TIMESTAMP_MS, 190 TokenType.TIMESTAMP_NS, 191 TokenType.TIMESTAMPTZ, 192 TokenType.TIMESTAMPLTZ, 193 TokenType.DATETIME, 194 TokenType.DATETIME64, 195 TokenType.DATE, 196 TokenType.INT4RANGE, 197 TokenType.INT4MULTIRANGE, 198 TokenType.INT8RANGE, 199 TokenType.INT8MULTIRANGE, 200 TokenType.NUMRANGE, 201 TokenType.NUMMULTIRANGE, 202 TokenType.TSRANGE, 203 TokenType.TSMULTIRANGE, 204 TokenType.TSTZRANGE, 205 TokenType.TSTZMULTIRANGE, 206 TokenType.DATERANGE, 207 TokenType.DATEMULTIRANGE, 208 TokenType.DECIMAL, 209 TokenType.UDECIMAL, 210 TokenType.BIGDECIMAL, 211 TokenType.UUID, 212 TokenType.GEOGRAPHY, 213 TokenType.GEOMETRY, 214 TokenType.HLLSKETCH, 215 TokenType.HSTORE, 216 TokenType.PSEUDO_TYPE, 217 TokenType.SUPER, 218 TokenType.SERIAL, 219 TokenType.SMALLSERIAL, 220 TokenType.BIGSERIAL, 221 TokenType.XML, 222 TokenType.YEAR, 223 TokenType.UNIQUEIDENTIFIER, 224 TokenType.USERDEFINED, 225 TokenType.MONEY, 226 TokenType.SMALLMONEY, 227 TokenType.ROWVERSION, 228 TokenType.IMAGE, 229 TokenType.VARIANT, 230 TokenType.OBJECT, 231 TokenType.OBJECT_IDENTIFIER, 232 TokenType.INET, 233 TokenType.IPADDRESS, 234 TokenType.IPPREFIX, 235 TokenType.UNKNOWN, 236 TokenType.NULL, 237 *ENUM_TYPE_TOKENS, 238 *NESTED_TYPE_TOKENS, 239 } 240 241 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 242 TokenType.BIGINT: TokenType.UBIGINT, 243 TokenType.INT: TokenType.UINT, 244 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 245 TokenType.SMALLINT: TokenType.USMALLINT, 246 TokenType.TINYINT: TokenType.UTINYINT, 247 TokenType.DECIMAL: TokenType.UDECIMAL, 248 } 249 250 SUBQUERY_PREDICATES = { 251 TokenType.ANY: exp.Any, 252 TokenType.ALL: exp.All, 253 TokenType.EXISTS: exp.Exists, 254 TokenType.SOME: exp.Any, 255 } 256 257 RESERVED_TOKENS = { 258 *Tokenizer.SINGLE_TOKENS.values(), 259 TokenType.SELECT, 260 } 261 262 DB_CREATABLES = { 263 TokenType.DATABASE, 264 TokenType.SCHEMA, 265 TokenType.TABLE, 266 TokenType.VIEW, 267 TokenType.MODEL, 268 TokenType.DICTIONARY, 269 } 270 271 CREATABLES = { 272 TokenType.COLUMN, 273 TokenType.CONSTRAINT, 274 TokenType.FUNCTION, 275 TokenType.INDEX, 276 TokenType.PROCEDURE, 277 TokenType.FOREIGN_KEY, 278 *DB_CREATABLES, 279 } 280 281 # Tokens that can represent identifiers 282 ID_VAR_TOKENS = { 283 TokenType.VAR, 284 TokenType.ANTI, 285 TokenType.APPLY, 286 TokenType.ASC, 287 TokenType.AUTO_INCREMENT, 288 TokenType.BEGIN, 289 TokenType.CACHE, 290 TokenType.CASE, 291 TokenType.COLLATE, 292 TokenType.COMMAND, 293 TokenType.COMMENT, 294 TokenType.COMMIT, 295 TokenType.CONSTRAINT, 296 TokenType.DEFAULT, 297 TokenType.DELETE, 298 TokenType.DESC, 299 TokenType.DESCRIBE, 300 TokenType.DICTIONARY, 301 TokenType.DIV, 302 TokenType.END, 303 TokenType.EXECUTE, 304 TokenType.ESCAPE, 305 TokenType.FALSE, 306 TokenType.FIRST, 307 TokenType.FILTER, 308 TokenType.FINAL, 309 TokenType.FORMAT, 310 TokenType.FULL, 311 TokenType.IS, 312 TokenType.ISNULL, 313 TokenType.INTERVAL, 314 TokenType.KEEP, 315 TokenType.KILL, 316 TokenType.LEFT, 317 TokenType.LOAD, 318 TokenType.MERGE, 319 TokenType.NATURAL, 320 TokenType.NEXT, 321 TokenType.OFFSET, 322 TokenType.OPERATOR, 323 TokenType.ORDINALITY, 324 TokenType.OVERLAPS, 325 TokenType.OVERWRITE, 326 TokenType.PARTITION, 327 TokenType.PERCENT, 328 TokenType.PIVOT, 329 TokenType.PRAGMA, 330 TokenType.RANGE, 331 TokenType.RECURSIVE, 332 TokenType.REFERENCES, 333 TokenType.REFRESH, 334 TokenType.REPLACE, 335 TokenType.RIGHT, 336 TokenType.ROW, 337 TokenType.ROWS, 338 TokenType.SEMI, 339 TokenType.SET, 340 TokenType.SETTINGS, 341 TokenType.SHOW, 342 TokenType.TEMPORARY, 343 TokenType.TOP, 344 TokenType.TRUE, 345 TokenType.UNIQUE, 346 TokenType.UNPIVOT, 347 TokenType.UPDATE, 348 TokenType.USE, 349 TokenType.VOLATILE, 350 TokenType.WINDOW, 351 *CREATABLES, 352 *SUBQUERY_PREDICATES, 353 *TYPE_TOKENS, 354 *NO_PAREN_FUNCTIONS, 355 } 356 357 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 358 359 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 360 TokenType.ANTI, 361 TokenType.APPLY, 362 TokenType.ASOF, 363 TokenType.FULL, 364 TokenType.LEFT, 365 TokenType.LOCK, 366 TokenType.NATURAL, 367 TokenType.OFFSET, 368 TokenType.RIGHT, 369 TokenType.SEMI, 370 TokenType.WINDOW, 371 } 372 373 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 374 375 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 376 377 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 378 379 FUNC_TOKENS = { 380 TokenType.COLLATE, 381 TokenType.COMMAND, 382 TokenType.CURRENT_DATE, 383 TokenType.CURRENT_DATETIME, 384 TokenType.CURRENT_TIMESTAMP, 385 TokenType.CURRENT_TIME, 386 TokenType.CURRENT_USER, 387 TokenType.FILTER, 388 TokenType.FIRST, 389 TokenType.FORMAT, 390 TokenType.GLOB, 391 TokenType.IDENTIFIER, 392 TokenType.INDEX, 393 TokenType.ISNULL, 394 TokenType.ILIKE, 395 TokenType.INSERT, 396 TokenType.LIKE, 397 TokenType.MERGE, 398 TokenType.OFFSET, 399 TokenType.PRIMARY_KEY, 400 TokenType.RANGE, 401 TokenType.REPLACE, 402 TokenType.RLIKE, 403 TokenType.ROW, 404 TokenType.UNNEST, 405 TokenType.VAR, 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.DATE, 409 TokenType.DATETIME, 410 TokenType.TABLE, 411 TokenType.TIMESTAMP, 412 TokenType.TIMESTAMPTZ, 413 TokenType.WINDOW, 414 TokenType.XOR, 415 *TYPE_TOKENS, 416 *SUBQUERY_PREDICATES, 417 } 418 419 CONJUNCTION = { 420 TokenType.AND: exp.And, 421 TokenType.OR: exp.Or, 422 } 423 424 EQUALITY = { 425 TokenType.COLON_EQ: exp.PropertyEQ, 426 TokenType.EQ: exp.EQ, 427 TokenType.NEQ: exp.NEQ, 428 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 429 } 430 431 COMPARISON = { 432 TokenType.GT: exp.GT, 433 TokenType.GTE: exp.GTE, 434 TokenType.LT: exp.LT, 435 TokenType.LTE: exp.LTE, 436 } 437 438 BITWISE = { 439 TokenType.AMP: exp.BitwiseAnd, 440 TokenType.CARET: exp.BitwiseXor, 441 TokenType.PIPE: exp.BitwiseOr, 442 } 443 444 TERM = { 445 TokenType.DASH: exp.Sub, 446 TokenType.PLUS: exp.Add, 447 TokenType.MOD: exp.Mod, 448 TokenType.COLLATE: exp.Collate, 449 } 450 451 FACTOR = { 452 TokenType.DIV: exp.IntDiv, 453 TokenType.LR_ARROW: exp.Distance, 454 TokenType.SLASH: exp.Div, 455 TokenType.STAR: exp.Mul, 456 } 457 458 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 459 460 TIMES = { 461 TokenType.TIME, 462 TokenType.TIMETZ, 463 } 464 465 TIMESTAMPS = { 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TIMESTAMPLTZ, 469 *TIMES, 470 } 471 472 SET_OPERATIONS = { 473 TokenType.UNION, 474 TokenType.INTERSECT, 475 TokenType.EXCEPT, 476 } 477 478 JOIN_METHODS = { 479 TokenType.NATURAL, 480 TokenType.ASOF, 481 } 482 483 JOIN_SIDES = { 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.FULL, 487 } 488 489 JOIN_KINDS = { 490 TokenType.INNER, 491 TokenType.OUTER, 492 TokenType.CROSS, 493 TokenType.SEMI, 494 TokenType.ANTI, 495 } 496 497 JOIN_HINTS: t.Set[str] = set() 498 499 LAMBDAS = { 500 TokenType.ARROW: lambda self, expressions: self.expression( 501 exp.Lambda, 502 this=self._replace_lambda( 503 self._parse_conjunction(), 504 {node.name for node in expressions}, 505 ), 506 expressions=expressions, 507 ), 508 TokenType.FARROW: lambda self, expressions: self.expression( 509 exp.Kwarg, 510 this=exp.var(expressions[0].name), 511 expression=self._parse_conjunction(), 512 ), 513 } 514 515 COLUMN_OPERATORS = { 516 TokenType.DOT: None, 517 TokenType.DCOLON: lambda self, this, to: self.expression( 518 exp.Cast if self.STRICT_CAST else exp.TryCast, 519 this=this, 520 to=to, 521 ), 522 TokenType.ARROW: lambda self, this, path: self.expression( 523 exp.JSONExtract, 524 this=this, 525 expression=path, 526 ), 527 TokenType.DARROW: lambda self, this, path: self.expression( 528 exp.JSONExtractScalar, 529 this=this, 530 expression=path, 531 ), 532 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 533 exp.JSONBExtract, 534 this=this, 535 expression=path, 536 ), 537 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 538 exp.JSONBExtractScalar, 539 this=this, 540 expression=path, 541 ), 542 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 543 exp.JSONBContains, 544 this=this, 545 expression=key, 546 ), 547 } 548 549 EXPRESSION_PARSERS = { 550 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 551 exp.Column: lambda self: self._parse_column(), 552 exp.Condition: lambda self: self._parse_conjunction(), 553 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 554 exp.Expression: lambda self: self._parse_statement(), 555 exp.From: lambda self: self._parse_from(), 556 exp.Group: lambda self: self._parse_group(), 557 exp.Having: lambda self: self._parse_having(), 558 exp.Identifier: lambda self: self._parse_id_var(), 559 exp.Join: lambda self: self._parse_join(), 560 exp.Lambda: lambda self: self._parse_lambda(), 561 exp.Lateral: lambda self: self._parse_lateral(), 562 exp.Limit: lambda self: self._parse_limit(), 563 exp.Offset: lambda self: self._parse_offset(), 564 exp.Order: lambda self: self._parse_order(), 565 exp.Ordered: lambda self: self._parse_ordered(), 566 exp.Properties: lambda self: self._parse_properties(), 567 exp.Qualify: lambda self: self._parse_qualify(), 568 exp.Returning: lambda self: self._parse_returning(), 569 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 570 exp.Table: lambda self: self._parse_table_parts(), 571 exp.TableAlias: lambda self: self._parse_table_alias(), 572 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 573 exp.Where: lambda self: self._parse_where(), 574 exp.Window: lambda self: self._parse_named_window(), 575 exp.With: lambda self: self._parse_with(), 576 "JOIN_TYPE": lambda self: self._parse_join_parts(), 577 } 578 579 STATEMENT_PARSERS = { 580 TokenType.ALTER: lambda self: self._parse_alter(), 581 TokenType.BEGIN: lambda self: self._parse_transaction(), 582 TokenType.CACHE: lambda self: self._parse_cache(), 583 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 584 TokenType.COMMENT: lambda self: self._parse_comment(), 585 TokenType.CREATE: lambda self: self._parse_create(), 586 TokenType.DELETE: lambda self: self._parse_delete(), 587 TokenType.DESC: lambda self: self._parse_describe(), 588 TokenType.DESCRIBE: lambda self: self._parse_describe(), 589 TokenType.DROP: lambda self: self._parse_drop(), 590 TokenType.INSERT: lambda self: self._parse_insert(), 591 TokenType.KILL: lambda self: self._parse_kill(), 592 TokenType.LOAD: lambda self: self._parse_load(), 593 TokenType.MERGE: lambda self: self._parse_merge(), 594 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 595 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 596 TokenType.REFRESH: lambda self: self._parse_refresh(), 597 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 598 TokenType.SET: lambda self: self._parse_set(), 599 TokenType.UNCACHE: lambda self: self._parse_uncache(), 600 TokenType.UPDATE: lambda self: self._parse_update(), 601 TokenType.USE: lambda self: self.expression( 602 exp.Use, 603 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 604 and exp.var(self._prev.text), 605 this=self._parse_table(schema=False), 606 ), 607 } 608 609 UNARY_PARSERS = { 610 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 611 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 612 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 613 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 614 } 615 616 PRIMARY_PARSERS = { 617 TokenType.STRING: lambda self, token: self.expression( 618 exp.Literal, this=token.text, is_string=True 619 ), 620 TokenType.NUMBER: lambda self, token: self.expression( 621 exp.Literal, this=token.text, is_string=False 622 ), 623 TokenType.STAR: lambda self, _: self.expression( 624 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 625 ), 626 TokenType.NULL: lambda self, _: self.expression(exp.Null), 627 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 628 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 629 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 630 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 631 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 632 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 633 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 634 exp.National, this=token.text 635 ), 636 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 637 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 638 exp.RawString, this=token.text 639 ), 640 TokenType.UNICODE_STRING: lambda self, token: self.expression( 641 exp.UnicodeString, 642 this=token.text, 643 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 644 ), 645 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 646 } 647 648 PLACEHOLDER_PARSERS = { 649 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 650 TokenType.PARAMETER: lambda self: self._parse_parameter(), 651 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 652 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 653 else None, 654 } 655 656 RANGE_PARSERS = { 657 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 658 TokenType.GLOB: binary_range_parser(exp.Glob), 659 TokenType.ILIKE: binary_range_parser(exp.ILike), 660 TokenType.IN: lambda self, this: self._parse_in(this), 661 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 662 TokenType.IS: lambda self, this: self._parse_is(this), 663 TokenType.LIKE: binary_range_parser(exp.Like), 664 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 665 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 666 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 667 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 668 } 669 670 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 671 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 672 "AUTO": lambda self: self._parse_auto_property(), 673 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 674 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 675 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 676 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 677 "CHECKSUM": lambda self: self._parse_checksum(), 678 "CLUSTER BY": lambda self: self._parse_cluster(), 679 "CLUSTERED": lambda self: self._parse_clustered_by(), 680 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 681 exp.CollateProperty, **kwargs 682 ), 683 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 684 "COPY": lambda self: self._parse_copy_property(), 685 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 686 "DEFINER": lambda self: self._parse_definer(), 687 "DETERMINISTIC": lambda self: self.expression( 688 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 689 ), 690 "DISTKEY": lambda self: self._parse_distkey(), 691 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 692 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 693 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 694 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 695 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 696 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 697 "FREESPACE": lambda self: self._parse_freespace(), 698 "HEAP": lambda self: self.expression(exp.HeapProperty), 699 "IMMUTABLE": lambda self: self.expression( 700 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 701 ), 702 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 703 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 704 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 705 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 706 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 707 "LIKE": lambda self: self._parse_create_like(), 708 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 709 "LOCK": lambda self: self._parse_locking(), 710 "LOCKING": lambda self: self._parse_locking(), 711 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 712 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 713 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 714 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 715 "NO": lambda self: self._parse_no_property(), 716 "ON": lambda self: self._parse_on_property(), 717 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 718 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 719 "PARTITION": lambda self: self._parse_partitioned_of(), 720 "PARTITION BY": lambda self: self._parse_partitioned_by(), 721 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 722 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 723 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 724 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 725 "REMOTE": lambda self: self._parse_remote_with_connection(), 726 "RETURNS": lambda self: self._parse_returns(), 727 "ROW": lambda self: self._parse_row(), 728 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 729 "SAMPLE": lambda self: self.expression( 730 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 731 ), 732 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 733 "SETTINGS": lambda self: self.expression( 734 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 735 ), 736 "SORTKEY": lambda self: self._parse_sortkey(), 737 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 738 "STABLE": lambda self: self.expression( 739 exp.StabilityProperty, this=exp.Literal.string("STABLE") 740 ), 741 "STORED": lambda self: self._parse_stored(), 742 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 743 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 744 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 745 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 746 "TO": lambda self: self._parse_to_table(), 747 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 748 "TRANSFORM": lambda self: self.expression( 749 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 750 ), 751 "TTL": lambda self: self._parse_ttl(), 752 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 753 "VOLATILE": lambda self: self._parse_volatile_property(), 754 "WITH": lambda self: self._parse_with_property(), 755 } 756 757 CONSTRAINT_PARSERS = { 758 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 759 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 760 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 761 "CHARACTER SET": lambda self: self.expression( 762 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 763 ), 764 "CHECK": lambda self: self.expression( 765 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 766 ), 767 "COLLATE": lambda self: self.expression( 768 exp.CollateColumnConstraint, this=self._parse_var() 769 ), 770 "COMMENT": lambda self: self.expression( 771 exp.CommentColumnConstraint, this=self._parse_string() 772 ), 773 "COMPRESS": lambda self: self._parse_compress(), 774 "CLUSTERED": lambda self: self.expression( 775 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 776 ), 777 "NONCLUSTERED": lambda self: self.expression( 778 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 779 ), 780 "DEFAULT": lambda self: self.expression( 781 exp.DefaultColumnConstraint, this=self._parse_bitwise() 782 ), 783 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 784 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 785 "FORMAT": lambda self: self.expression( 786 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 787 ), 788 "GENERATED": lambda self: self._parse_generated_as_identity(), 789 "IDENTITY": lambda self: self._parse_auto_increment(), 790 "INLINE": lambda self: self._parse_inline(), 791 "LIKE": lambda self: self._parse_create_like(), 792 "NOT": lambda self: self._parse_not_constraint(), 793 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 794 "ON": lambda self: ( 795 self._match(TokenType.UPDATE) 796 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 797 ) 798 or self.expression(exp.OnProperty, this=self._parse_id_var()), 799 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 800 "PERIOD": lambda self: self._parse_period_for_system_time(), 801 "PRIMARY KEY": lambda self: self._parse_primary_key(), 802 "REFERENCES": lambda self: self._parse_references(match=False), 803 "TITLE": lambda self: self.expression( 804 exp.TitleColumnConstraint, this=self._parse_var_or_string() 805 ), 806 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 807 "UNIQUE": lambda self: self._parse_unique(), 808 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 809 "WITH": lambda self: self.expression( 810 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 811 ), 812 } 813 814 ALTER_PARSERS = { 815 "ADD": lambda self: self._parse_alter_table_add(), 816 "ALTER": lambda self: self._parse_alter_table_alter(), 817 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 818 "DROP": lambda self: self._parse_alter_table_drop(), 819 "RENAME": lambda self: self._parse_alter_table_rename(), 820 } 821 822 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 823 824 NO_PAREN_FUNCTION_PARSERS = { 825 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 826 "CASE": lambda self: self._parse_case(), 827 "IF": lambda self: self._parse_if(), 828 "NEXT": lambda self: self._parse_next_value_for(), 829 } 830 831 INVALID_FUNC_NAME_TOKENS = { 832 TokenType.IDENTIFIER, 833 TokenType.STRING, 834 } 835 836 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 837 838 FUNCTION_PARSERS = { 839 "ANY_VALUE": lambda self: self._parse_any_value(), 840 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 841 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 842 "DECODE": lambda self: self._parse_decode(), 843 "EXTRACT": lambda self: self._parse_extract(), 844 "JSON_OBJECT": lambda self: self._parse_json_object(), 845 "JSON_TABLE": lambda self: self._parse_json_table(), 846 "MATCH": lambda self: self._parse_match_against(), 847 "OPENJSON": lambda self: self._parse_open_json(), 848 "POSITION": lambda self: self._parse_position(), 849 "PREDICT": lambda self: self._parse_predict(), 850 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 851 "STRING_AGG": lambda self: self._parse_string_agg(), 852 "SUBSTRING": lambda self: self._parse_substring(), 853 "TRIM": lambda self: self._parse_trim(), 854 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 855 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 856 } 857 858 QUERY_MODIFIER_PARSERS = { 859 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 860 TokenType.WHERE: lambda self: ("where", self._parse_where()), 861 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 862 TokenType.HAVING: lambda self: ("having", self._parse_having()), 863 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 864 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 865 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 866 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 867 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 868 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 869 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 870 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 871 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 872 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 873 TokenType.CLUSTER_BY: lambda self: ( 874 "cluster", 875 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 876 ), 877 TokenType.DISTRIBUTE_BY: lambda self: ( 878 "distribute", 879 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 880 ), 881 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 882 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 883 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 884 } 885 886 SET_PARSERS = { 887 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 888 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 889 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 890 "TRANSACTION": lambda self: self._parse_set_transaction(), 891 } 892 893 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 894 895 TYPE_LITERAL_PARSERS = { 896 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 897 } 898 899 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 900 901 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 902 903 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 904 905 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 906 TRANSACTION_CHARACTERISTICS = { 907 "ISOLATION LEVEL REPEATABLE READ", 908 "ISOLATION LEVEL READ COMMITTED", 909 "ISOLATION LEVEL READ UNCOMMITTED", 910 "ISOLATION LEVEL SERIALIZABLE", 911 "READ WRITE", 912 "READ ONLY", 913 } 914 915 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 916 917 CLONE_KEYWORDS = {"CLONE", "COPY"} 918 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 919 920 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 921 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 922 923 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 924 925 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 926 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 927 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 928 929 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 930 931 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 932 933 DISTINCT_TOKENS = {TokenType.DISTINCT} 934 935 NULL_TOKENS = {TokenType.NULL} 936 937 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 938 939 STRICT_CAST = True 940 941 PREFIXED_PIVOT_COLUMNS = False 942 IDENTIFY_PIVOT_STRINGS = False 943 944 LOG_DEFAULTS_TO_LN = False 945 946 # Whether or not ADD is present for each column added by ALTER TABLE 947 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 948 949 # Whether or not the table sample clause expects CSV syntax 950 TABLESAMPLE_CSV = False 951 952 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 953 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 954 955 # Whether the TRIM function expects the characters to trim as its first argument 956 TRIM_PATTERN_FIRST = False 957 958 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 959 MODIFIERS_ATTACHED_TO_UNION = True 960 UNION_MODIFIERS = {"order", "limit", "offset"} 961 962 __slots__ = ( 963 "error_level", 964 "error_message_context", 965 "max_errors", 966 "dialect", 967 "sql", 968 "errors", 969 "_tokens", 970 "_index", 971 "_curr", 972 "_next", 973 "_prev", 974 "_prev_comments", 975 ) 976 977 # Autofilled 978 SHOW_TRIE: t.Dict = {} 979 SET_TRIE: t.Dict = {} 980 981 def __init__( 982 self, 983 error_level: t.Optional[ErrorLevel] = None, 984 error_message_context: int = 100, 985 max_errors: int = 3, 986 dialect: DialectType = None, 987 ): 988 from sqlglot.dialects import Dialect 989 990 self.error_level = error_level or ErrorLevel.IMMEDIATE 991 self.error_message_context = error_message_context 992 self.max_errors = max_errors 993 self.dialect = Dialect.get_or_raise(dialect) 994 self.reset() 995 996 def reset(self): 997 self.sql = "" 998 self.errors = [] 999 self._tokens = [] 1000 self._index = 0 1001 self._curr = None 1002 self._next = None 1003 self._prev = None 1004 self._prev_comments = None 1005 1006 def parse( 1007 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1008 ) -> t.List[t.Optional[exp.Expression]]: 1009 """ 1010 Parses a list of tokens and returns a list of syntax trees, one tree 1011 per parsed SQL statement. 1012 1013 Args: 1014 raw_tokens: The list of tokens. 1015 sql: The original SQL string, used to produce helpful debug messages. 1016 1017 Returns: 1018 The list of the produced syntax trees. 1019 """ 1020 return self._parse( 1021 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1022 ) 1023 1024 def parse_into( 1025 self, 1026 expression_types: exp.IntoType, 1027 raw_tokens: t.List[Token], 1028 sql: t.Optional[str] = None, 1029 ) -> t.List[t.Optional[exp.Expression]]: 1030 """ 1031 Parses a list of tokens into a given Expression type. If a collection of Expression 1032 types is given instead, this method will try to parse the token list into each one 1033 of them, stopping at the first for which the parsing succeeds. 1034 1035 Args: 1036 expression_types: The expression type(s) to try and parse the token list into. 1037 raw_tokens: The list of tokens. 1038 sql: The original SQL string, used to produce helpful debug messages. 1039 1040 Returns: 1041 The target Expression. 1042 """ 1043 errors = [] 1044 for expression_type in ensure_list(expression_types): 1045 parser = self.EXPRESSION_PARSERS.get(expression_type) 1046 if not parser: 1047 raise TypeError(f"No parser registered for {expression_type}") 1048 1049 try: 1050 return self._parse(parser, raw_tokens, sql) 1051 except ParseError as e: 1052 e.errors[0]["into_expression"] = expression_type 1053 errors.append(e) 1054 1055 raise ParseError( 1056 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1057 errors=merge_errors(errors), 1058 ) from errors[-1] 1059 1060 def _parse( 1061 self, 1062 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1063 raw_tokens: t.List[Token], 1064 sql: t.Optional[str] = None, 1065 ) -> t.List[t.Optional[exp.Expression]]: 1066 self.reset() 1067 self.sql = sql or "" 1068 1069 total = len(raw_tokens) 1070 chunks: t.List[t.List[Token]] = [[]] 1071 1072 for i, token in enumerate(raw_tokens): 1073 if token.token_type == TokenType.SEMICOLON: 1074 if i < total - 1: 1075 chunks.append([]) 1076 else: 1077 chunks[-1].append(token) 1078 1079 expressions = [] 1080 1081 for tokens in chunks: 1082 self._index = -1 1083 self._tokens = tokens 1084 self._advance() 1085 1086 expressions.append(parse_method(self)) 1087 1088 if self._index < len(self._tokens): 1089 self.raise_error("Invalid expression / Unexpected token") 1090 1091 self.check_errors() 1092 1093 return expressions 1094 1095 def check_errors(self) -> None: 1096 """Logs or raises any found errors, depending on the chosen error level setting.""" 1097 if self.error_level == ErrorLevel.WARN: 1098 for error in self.errors: 1099 logger.error(str(error)) 1100 elif self.error_level == ErrorLevel.RAISE and self.errors: 1101 raise ParseError( 1102 concat_messages(self.errors, self.max_errors), 1103 errors=merge_errors(self.errors), 1104 ) 1105 1106 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1107 """ 1108 Appends an error in the list of recorded errors or raises it, depending on the chosen 1109 error level setting. 1110 """ 1111 token = token or self._curr or self._prev or Token.string("") 1112 start = token.start 1113 end = token.end + 1 1114 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1115 highlight = self.sql[start:end] 1116 end_context = self.sql[end : end + self.error_message_context] 1117 1118 error = ParseError.new( 1119 f"{message}. Line {token.line}, Col: {token.col}.\n" 1120 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1121 description=message, 1122 line=token.line, 1123 col=token.col, 1124 start_context=start_context, 1125 highlight=highlight, 1126 end_context=end_context, 1127 ) 1128 1129 if self.error_level == ErrorLevel.IMMEDIATE: 1130 raise error 1131 1132 self.errors.append(error) 1133 1134 def expression( 1135 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1136 ) -> E: 1137 """ 1138 Creates a new, validated Expression. 1139 1140 Args: 1141 exp_class: The expression class to instantiate. 1142 comments: An optional list of comments to attach to the expression. 1143 kwargs: The arguments to set for the expression along with their respective values. 1144 1145 Returns: 1146 The target expression. 1147 """ 1148 instance = exp_class(**kwargs) 1149 instance.add_comments(comments) if comments else self._add_comments(instance) 1150 return self.validate_expression(instance) 1151 1152 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1153 if expression and self._prev_comments: 1154 expression.add_comments(self._prev_comments) 1155 self._prev_comments = None 1156 1157 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1158 """ 1159 Validates an Expression, making sure that all its mandatory arguments are set. 1160 1161 Args: 1162 expression: The expression to validate. 1163 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1164 1165 Returns: 1166 The validated expression. 1167 """ 1168 if self.error_level != ErrorLevel.IGNORE: 1169 for error_message in expression.error_messages(args): 1170 self.raise_error(error_message) 1171 1172 return expression 1173 1174 def _find_sql(self, start: Token, end: Token) -> str: 1175 return self.sql[start.start : end.end + 1] 1176 1177 def _is_connected(self) -> bool: 1178 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1179 1180 def _advance(self, times: int = 1) -> None: 1181 self._index += times 1182 self._curr = seq_get(self._tokens, self._index) 1183 self._next = seq_get(self._tokens, self._index + 1) 1184 1185 if self._index > 0: 1186 self._prev = self._tokens[self._index - 1] 1187 self._prev_comments = self._prev.comments 1188 else: 1189 self._prev = None 1190 self._prev_comments = None 1191 1192 def _retreat(self, index: int) -> None: 1193 if index != self._index: 1194 self._advance(index - self._index) 1195 1196 def _parse_command(self) -> exp.Command: 1197 return self.expression( 1198 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1199 ) 1200 1201 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1202 start = self._prev 1203 exists = self._parse_exists() if allow_exists else None 1204 1205 self._match(TokenType.ON) 1206 1207 kind = self._match_set(self.CREATABLES) and self._prev 1208 if not kind: 1209 return self._parse_as_command(start) 1210 1211 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1212 this = self._parse_user_defined_function(kind=kind.token_type) 1213 elif kind.token_type == TokenType.TABLE: 1214 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1215 elif kind.token_type == TokenType.COLUMN: 1216 this = self._parse_column() 1217 else: 1218 this = self._parse_id_var() 1219 1220 self._match(TokenType.IS) 1221 1222 return self.expression( 1223 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1224 ) 1225 1226 def _parse_to_table( 1227 self, 1228 ) -> exp.ToTableProperty: 1229 table = self._parse_table_parts(schema=True) 1230 return self.expression(exp.ToTableProperty, this=table) 1231 1232 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1233 def _parse_ttl(self) -> exp.Expression: 1234 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1235 this = self._parse_bitwise() 1236 1237 if self._match_text_seq("DELETE"): 1238 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1239 if self._match_text_seq("RECOMPRESS"): 1240 return self.expression( 1241 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1242 ) 1243 if self._match_text_seq("TO", "DISK"): 1244 return self.expression( 1245 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1246 ) 1247 if self._match_text_seq("TO", "VOLUME"): 1248 return self.expression( 1249 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1250 ) 1251 1252 return this 1253 1254 expressions = self._parse_csv(_parse_ttl_action) 1255 where = self._parse_where() 1256 group = self._parse_group() 1257 1258 aggregates = None 1259 if group and self._match(TokenType.SET): 1260 aggregates = self._parse_csv(self._parse_set_item) 1261 1262 return self.expression( 1263 exp.MergeTreeTTL, 1264 expressions=expressions, 1265 where=where, 1266 group=group, 1267 aggregates=aggregates, 1268 ) 1269 1270 def _parse_statement(self) -> t.Optional[exp.Expression]: 1271 if self._curr is None: 1272 return None 1273 1274 if self._match_set(self.STATEMENT_PARSERS): 1275 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1276 1277 if self._match_set(Tokenizer.COMMANDS): 1278 return self._parse_command() 1279 1280 expression = self._parse_expression() 1281 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1282 return self._parse_query_modifiers(expression) 1283 1284 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1285 start = self._prev 1286 temporary = self._match(TokenType.TEMPORARY) 1287 materialized = self._match_text_seq("MATERIALIZED") 1288 1289 kind = self._match_set(self.CREATABLES) and self._prev.text 1290 if not kind: 1291 return self._parse_as_command(start) 1292 1293 return self.expression( 1294 exp.Drop, 1295 comments=start.comments, 1296 exists=exists or self._parse_exists(), 1297 this=self._parse_table(schema=True), 1298 kind=kind, 1299 temporary=temporary, 1300 materialized=materialized, 1301 cascade=self._match_text_seq("CASCADE"), 1302 constraints=self._match_text_seq("CONSTRAINTS"), 1303 purge=self._match_text_seq("PURGE"), 1304 ) 1305 1306 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1307 return ( 1308 self._match_text_seq("IF") 1309 and (not not_ or self._match(TokenType.NOT)) 1310 and self._match(TokenType.EXISTS) 1311 ) 1312 1313 def _parse_create(self) -> exp.Create | exp.Command: 1314 # Note: this can't be None because we've matched a statement parser 1315 start = self._prev 1316 comments = self._prev_comments 1317 1318 replace = start.text.upper() == "REPLACE" or self._match_pair( 1319 TokenType.OR, TokenType.REPLACE 1320 ) 1321 unique = self._match(TokenType.UNIQUE) 1322 1323 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1324 self._advance() 1325 1326 properties = None 1327 create_token = self._match_set(self.CREATABLES) and self._prev 1328 1329 if not create_token: 1330 # exp.Properties.Location.POST_CREATE 1331 properties = self._parse_properties() 1332 create_token = self._match_set(self.CREATABLES) and self._prev 1333 1334 if not properties or not create_token: 1335 return self._parse_as_command(start) 1336 1337 exists = self._parse_exists(not_=True) 1338 this = None 1339 expression: t.Optional[exp.Expression] = None 1340 indexes = None 1341 no_schema_binding = None 1342 begin = None 1343 end = None 1344 clone = None 1345 1346 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1347 nonlocal properties 1348 if properties and temp_props: 1349 properties.expressions.extend(temp_props.expressions) 1350 elif temp_props: 1351 properties = temp_props 1352 1353 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1354 this = self._parse_user_defined_function(kind=create_token.token_type) 1355 1356 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1357 extend_props(self._parse_properties()) 1358 1359 self._match(TokenType.ALIAS) 1360 1361 if self._match(TokenType.COMMAND): 1362 expression = self._parse_as_command(self._prev) 1363 else: 1364 begin = self._match(TokenType.BEGIN) 1365 return_ = self._match_text_seq("RETURN") 1366 1367 if self._match(TokenType.STRING, advance=False): 1368 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1369 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1370 expression = self._parse_string() 1371 extend_props(self._parse_properties()) 1372 else: 1373 expression = self._parse_statement() 1374 1375 end = self._match_text_seq("END") 1376 1377 if return_: 1378 expression = self.expression(exp.Return, this=expression) 1379 elif create_token.token_type == TokenType.INDEX: 1380 this = self._parse_index(index=self._parse_id_var()) 1381 elif create_token.token_type in self.DB_CREATABLES: 1382 table_parts = self._parse_table_parts(schema=True) 1383 1384 # exp.Properties.Location.POST_NAME 1385 self._match(TokenType.COMMA) 1386 extend_props(self._parse_properties(before=True)) 1387 1388 this = self._parse_schema(this=table_parts) 1389 1390 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1391 extend_props(self._parse_properties()) 1392 1393 self._match(TokenType.ALIAS) 1394 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1395 # exp.Properties.Location.POST_ALIAS 1396 extend_props(self._parse_properties()) 1397 1398 expression = self._parse_ddl_select() 1399 1400 if create_token.token_type == TokenType.TABLE: 1401 # exp.Properties.Location.POST_EXPRESSION 1402 extend_props(self._parse_properties()) 1403 1404 indexes = [] 1405 while True: 1406 index = self._parse_index() 1407 1408 # exp.Properties.Location.POST_INDEX 1409 extend_props(self._parse_properties()) 1410 1411 if not index: 1412 break 1413 else: 1414 self._match(TokenType.COMMA) 1415 indexes.append(index) 1416 elif create_token.token_type == TokenType.VIEW: 1417 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1418 no_schema_binding = True 1419 1420 shallow = self._match_text_seq("SHALLOW") 1421 1422 if self._match_texts(self.CLONE_KEYWORDS): 1423 copy = self._prev.text.lower() == "copy" 1424 clone = self.expression( 1425 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1426 ) 1427 1428 return self.expression( 1429 exp.Create, 1430 comments=comments, 1431 this=this, 1432 kind=create_token.text.upper(), 1433 replace=replace, 1434 unique=unique, 1435 expression=expression, 1436 exists=exists, 1437 properties=properties, 1438 indexes=indexes, 1439 no_schema_binding=no_schema_binding, 1440 begin=begin, 1441 end=end, 1442 clone=clone, 1443 ) 1444 1445 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1446 # only used for teradata currently 1447 self._match(TokenType.COMMA) 1448 1449 kwargs = { 1450 "no": self._match_text_seq("NO"), 1451 "dual": self._match_text_seq("DUAL"), 1452 "before": self._match_text_seq("BEFORE"), 1453 "default": self._match_text_seq("DEFAULT"), 1454 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1455 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1456 "after": self._match_text_seq("AFTER"), 1457 "minimum": self._match_texts(("MIN", "MINIMUM")), 1458 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1459 } 1460 1461 if self._match_texts(self.PROPERTY_PARSERS): 1462 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1463 try: 1464 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1465 except TypeError: 1466 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1467 1468 return None 1469 1470 def _parse_property(self) -> t.Optional[exp.Expression]: 1471 if self._match_texts(self.PROPERTY_PARSERS): 1472 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1473 1474 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1475 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1476 1477 if self._match_text_seq("COMPOUND", "SORTKEY"): 1478 return self._parse_sortkey(compound=True) 1479 1480 if self._match_text_seq("SQL", "SECURITY"): 1481 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1482 1483 index = self._index 1484 key = self._parse_column() 1485 1486 if not self._match(TokenType.EQ): 1487 self._retreat(index) 1488 return None 1489 1490 return self.expression( 1491 exp.Property, 1492 this=key.to_dot() if isinstance(key, exp.Column) else key, 1493 value=self._parse_column() or self._parse_var(any_token=True), 1494 ) 1495 1496 def _parse_stored(self) -> exp.FileFormatProperty: 1497 self._match(TokenType.ALIAS) 1498 1499 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1500 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1501 1502 return self.expression( 1503 exp.FileFormatProperty, 1504 this=self.expression( 1505 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1506 ) 1507 if input_format or output_format 1508 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1509 ) 1510 1511 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1512 self._match(TokenType.EQ) 1513 self._match(TokenType.ALIAS) 1514 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1515 1516 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1517 properties = [] 1518 while True: 1519 if before: 1520 prop = self._parse_property_before() 1521 else: 1522 prop = self._parse_property() 1523 1524 if not prop: 1525 break 1526 for p in ensure_list(prop): 1527 properties.append(p) 1528 1529 if properties: 1530 return self.expression(exp.Properties, expressions=properties) 1531 1532 return None 1533 1534 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1535 return self.expression( 1536 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1537 ) 1538 1539 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1540 if self._index >= 2: 1541 pre_volatile_token = self._tokens[self._index - 2] 1542 else: 1543 pre_volatile_token = None 1544 1545 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1546 return exp.VolatileProperty() 1547 1548 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1549 1550 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1551 self._match_pair(TokenType.EQ, TokenType.ON) 1552 1553 prop = self.expression(exp.WithSystemVersioningProperty) 1554 if self._match(TokenType.L_PAREN): 1555 self._match_text_seq("HISTORY_TABLE", "=") 1556 prop.set("this", self._parse_table_parts()) 1557 1558 if self._match(TokenType.COMMA): 1559 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1560 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1561 1562 self._match_r_paren() 1563 1564 return prop 1565 1566 def _parse_with_property( 1567 self, 1568 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 return self._parse_wrapped_csv(self._parse_property) 1571 1572 if self._match_text_seq("JOURNAL"): 1573 return self._parse_withjournaltable() 1574 1575 if self._match_text_seq("DATA"): 1576 return self._parse_withdata(no=False) 1577 elif self._match_text_seq("NO", "DATA"): 1578 return self._parse_withdata(no=True) 1579 1580 if not self._next: 1581 return None 1582 1583 return self._parse_withisolatedloading() 1584 1585 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1586 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1587 self._match(TokenType.EQ) 1588 1589 user = self._parse_id_var() 1590 self._match(TokenType.PARAMETER) 1591 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1592 1593 if not user or not host: 1594 return None 1595 1596 return exp.DefinerProperty(this=f"{user}@{host}") 1597 1598 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1599 self._match(TokenType.TABLE) 1600 self._match(TokenType.EQ) 1601 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1602 1603 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1604 return self.expression(exp.LogProperty, no=no) 1605 1606 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1607 return self.expression(exp.JournalProperty, **kwargs) 1608 1609 def _parse_checksum(self) -> exp.ChecksumProperty: 1610 self._match(TokenType.EQ) 1611 1612 on = None 1613 if self._match(TokenType.ON): 1614 on = True 1615 elif self._match_text_seq("OFF"): 1616 on = False 1617 1618 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1619 1620 def _parse_cluster(self) -> exp.Cluster: 1621 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1622 1623 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1624 self._match_text_seq("BY") 1625 1626 self._match_l_paren() 1627 expressions = self._parse_csv(self._parse_column) 1628 self._match_r_paren() 1629 1630 if self._match_text_seq("SORTED", "BY"): 1631 self._match_l_paren() 1632 sorted_by = self._parse_csv(self._parse_ordered) 1633 self._match_r_paren() 1634 else: 1635 sorted_by = None 1636 1637 self._match(TokenType.INTO) 1638 buckets = self._parse_number() 1639 self._match_text_seq("BUCKETS") 1640 1641 return self.expression( 1642 exp.ClusteredByProperty, 1643 expressions=expressions, 1644 sorted_by=sorted_by, 1645 buckets=buckets, 1646 ) 1647 1648 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1649 if not self._match_text_seq("GRANTS"): 1650 self._retreat(self._index - 1) 1651 return None 1652 1653 return self.expression(exp.CopyGrantsProperty) 1654 1655 def _parse_freespace(self) -> exp.FreespaceProperty: 1656 self._match(TokenType.EQ) 1657 return self.expression( 1658 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1659 ) 1660 1661 def _parse_mergeblockratio( 1662 self, no: bool = False, default: bool = False 1663 ) -> exp.MergeBlockRatioProperty: 1664 if self._match(TokenType.EQ): 1665 return self.expression( 1666 exp.MergeBlockRatioProperty, 1667 this=self._parse_number(), 1668 percent=self._match(TokenType.PERCENT), 1669 ) 1670 1671 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1672 1673 def _parse_datablocksize( 1674 self, 1675 default: t.Optional[bool] = None, 1676 minimum: t.Optional[bool] = None, 1677 maximum: t.Optional[bool] = None, 1678 ) -> exp.DataBlocksizeProperty: 1679 self._match(TokenType.EQ) 1680 size = self._parse_number() 1681 1682 units = None 1683 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1684 units = self._prev.text 1685 1686 return self.expression( 1687 exp.DataBlocksizeProperty, 1688 size=size, 1689 units=units, 1690 default=default, 1691 minimum=minimum, 1692 maximum=maximum, 1693 ) 1694 1695 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1696 self._match(TokenType.EQ) 1697 always = self._match_text_seq("ALWAYS") 1698 manual = self._match_text_seq("MANUAL") 1699 never = self._match_text_seq("NEVER") 1700 default = self._match_text_seq("DEFAULT") 1701 1702 autotemp = None 1703 if self._match_text_seq("AUTOTEMP"): 1704 autotemp = self._parse_schema() 1705 1706 return self.expression( 1707 exp.BlockCompressionProperty, 1708 always=always, 1709 manual=manual, 1710 never=never, 1711 default=default, 1712 autotemp=autotemp, 1713 ) 1714 1715 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1716 no = self._match_text_seq("NO") 1717 concurrent = self._match_text_seq("CONCURRENT") 1718 self._match_text_seq("ISOLATED", "LOADING") 1719 for_all = self._match_text_seq("FOR", "ALL") 1720 for_insert = self._match_text_seq("FOR", "INSERT") 1721 for_none = self._match_text_seq("FOR", "NONE") 1722 return self.expression( 1723 exp.IsolatedLoadingProperty, 1724 no=no, 1725 concurrent=concurrent, 1726 for_all=for_all, 1727 for_insert=for_insert, 1728 for_none=for_none, 1729 ) 1730 1731 def _parse_locking(self) -> exp.LockingProperty: 1732 if self._match(TokenType.TABLE): 1733 kind = "TABLE" 1734 elif self._match(TokenType.VIEW): 1735 kind = "VIEW" 1736 elif self._match(TokenType.ROW): 1737 kind = "ROW" 1738 elif self._match_text_seq("DATABASE"): 1739 kind = "DATABASE" 1740 else: 1741 kind = None 1742 1743 if kind in ("DATABASE", "TABLE", "VIEW"): 1744 this = self._parse_table_parts() 1745 else: 1746 this = None 1747 1748 if self._match(TokenType.FOR): 1749 for_or_in = "FOR" 1750 elif self._match(TokenType.IN): 1751 for_or_in = "IN" 1752 else: 1753 for_or_in = None 1754 1755 if self._match_text_seq("ACCESS"): 1756 lock_type = "ACCESS" 1757 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1758 lock_type = "EXCLUSIVE" 1759 elif self._match_text_seq("SHARE"): 1760 lock_type = "SHARE" 1761 elif self._match_text_seq("READ"): 1762 lock_type = "READ" 1763 elif self._match_text_seq("WRITE"): 1764 lock_type = "WRITE" 1765 elif self._match_text_seq("CHECKSUM"): 1766 lock_type = "CHECKSUM" 1767 else: 1768 lock_type = None 1769 1770 override = self._match_text_seq("OVERRIDE") 1771 1772 return self.expression( 1773 exp.LockingProperty, 1774 this=this, 1775 kind=kind, 1776 for_or_in=for_or_in, 1777 lock_type=lock_type, 1778 override=override, 1779 ) 1780 1781 def _parse_partition_by(self) -> t.List[exp.Expression]: 1782 if self._match(TokenType.PARTITION_BY): 1783 return self._parse_csv(self._parse_conjunction) 1784 return [] 1785 1786 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1787 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1788 if self._match_text_seq("MINVALUE"): 1789 return exp.var("MINVALUE") 1790 if self._match_text_seq("MAXVALUE"): 1791 return exp.var("MAXVALUE") 1792 return self._parse_bitwise() 1793 1794 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1795 expression = None 1796 from_expressions = None 1797 to_expressions = None 1798 1799 if self._match(TokenType.IN): 1800 this = self._parse_wrapped_csv(self._parse_bitwise) 1801 elif self._match(TokenType.FROM): 1802 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1803 self._match_text_seq("TO") 1804 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1805 elif self._match_text_seq("WITH", "(", "MODULUS"): 1806 this = self._parse_number() 1807 self._match_text_seq(",", "REMAINDER") 1808 expression = self._parse_number() 1809 self._match_r_paren() 1810 else: 1811 self.raise_error("Failed to parse partition bound spec.") 1812 1813 return self.expression( 1814 exp.PartitionBoundSpec, 1815 this=this, 1816 expression=expression, 1817 from_expressions=from_expressions, 1818 to_expressions=to_expressions, 1819 ) 1820 1821 # https://www.postgresql.org/docs/current/sql-createtable.html 1822 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1823 if not self._match_text_seq("OF"): 1824 self._retreat(self._index - 1) 1825 return None 1826 1827 this = self._parse_table(schema=True) 1828 1829 if self._match(TokenType.DEFAULT): 1830 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1831 elif self._match_text_seq("FOR", "VALUES"): 1832 expression = self._parse_partition_bound_spec() 1833 else: 1834 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1835 1836 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1837 1838 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1839 self._match(TokenType.EQ) 1840 return self.expression( 1841 exp.PartitionedByProperty, 1842 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1843 ) 1844 1845 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1846 if self._match_text_seq("AND", "STATISTICS"): 1847 statistics = True 1848 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1849 statistics = False 1850 else: 1851 statistics = None 1852 1853 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1854 1855 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1856 if self._match_text_seq("PRIMARY", "INDEX"): 1857 return exp.NoPrimaryIndexProperty() 1858 return None 1859 1860 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1861 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1862 return exp.OnCommitProperty() 1863 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1864 return exp.OnCommitProperty(delete=True) 1865 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1866 1867 def _parse_distkey(self) -> exp.DistKeyProperty: 1868 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1869 1870 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1871 table = self._parse_table(schema=True) 1872 1873 options = [] 1874 while self._match_texts(("INCLUDING", "EXCLUDING")): 1875 this = self._prev.text.upper() 1876 1877 id_var = self._parse_id_var() 1878 if not id_var: 1879 return None 1880 1881 options.append( 1882 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1883 ) 1884 1885 return self.expression(exp.LikeProperty, this=table, expressions=options) 1886 1887 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1888 return self.expression( 1889 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1890 ) 1891 1892 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1893 self._match(TokenType.EQ) 1894 return self.expression( 1895 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1896 ) 1897 1898 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1899 self._match_text_seq("WITH", "CONNECTION") 1900 return self.expression( 1901 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1902 ) 1903 1904 def _parse_returns(self) -> exp.ReturnsProperty: 1905 value: t.Optional[exp.Expression] 1906 is_table = self._match(TokenType.TABLE) 1907 1908 if is_table: 1909 if self._match(TokenType.LT): 1910 value = self.expression( 1911 exp.Schema, 1912 this="TABLE", 1913 expressions=self._parse_csv(self._parse_struct_types), 1914 ) 1915 if not self._match(TokenType.GT): 1916 self.raise_error("Expecting >") 1917 else: 1918 value = self._parse_schema(exp.var("TABLE")) 1919 else: 1920 value = self._parse_types() 1921 1922 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1923 1924 def _parse_describe(self) -> exp.Describe: 1925 kind = self._match_set(self.CREATABLES) and self._prev.text 1926 this = self._parse_table(schema=True) 1927 properties = self._parse_properties() 1928 expressions = properties.expressions if properties else None 1929 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1930 1931 def _parse_insert(self) -> exp.Insert: 1932 comments = ensure_list(self._prev_comments) 1933 overwrite = self._match(TokenType.OVERWRITE) 1934 ignore = self._match(TokenType.IGNORE) 1935 local = self._match_text_seq("LOCAL") 1936 alternative = None 1937 1938 if self._match_text_seq("DIRECTORY"): 1939 this: t.Optional[exp.Expression] = self.expression( 1940 exp.Directory, 1941 this=self._parse_var_or_string(), 1942 local=local, 1943 row_format=self._parse_row_format(match_row=True), 1944 ) 1945 else: 1946 if self._match(TokenType.OR): 1947 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1948 1949 self._match(TokenType.INTO) 1950 comments += ensure_list(self._prev_comments) 1951 self._match(TokenType.TABLE) 1952 this = self._parse_table(schema=True) 1953 1954 returning = self._parse_returning() 1955 1956 return self.expression( 1957 exp.Insert, 1958 comments=comments, 1959 this=this, 1960 by_name=self._match_text_seq("BY", "NAME"), 1961 exists=self._parse_exists(), 1962 partition=self._parse_partition(), 1963 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1964 and self._parse_conjunction(), 1965 expression=self._parse_ddl_select(), 1966 conflict=self._parse_on_conflict(), 1967 returning=returning or self._parse_returning(), 1968 overwrite=overwrite, 1969 alternative=alternative, 1970 ignore=ignore, 1971 ) 1972 1973 def _parse_kill(self) -> exp.Kill: 1974 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1975 1976 return self.expression( 1977 exp.Kill, 1978 this=self._parse_primary(), 1979 kind=kind, 1980 ) 1981 1982 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1983 conflict = self._match_text_seq("ON", "CONFLICT") 1984 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1985 1986 if not conflict and not duplicate: 1987 return None 1988 1989 nothing = None 1990 expressions = None 1991 key = None 1992 constraint = None 1993 1994 if conflict: 1995 if self._match_text_seq("ON", "CONSTRAINT"): 1996 constraint = self._parse_id_var() 1997 else: 1998 key = self._parse_csv(self._parse_value) 1999 2000 self._match_text_seq("DO") 2001 if self._match_text_seq("NOTHING"): 2002 nothing = True 2003 else: 2004 self._match(TokenType.UPDATE) 2005 self._match(TokenType.SET) 2006 expressions = self._parse_csv(self._parse_equality) 2007 2008 return self.expression( 2009 exp.OnConflict, 2010 duplicate=duplicate, 2011 expressions=expressions, 2012 nothing=nothing, 2013 key=key, 2014 constraint=constraint, 2015 ) 2016 2017 def _parse_returning(self) -> t.Optional[exp.Returning]: 2018 if not self._match(TokenType.RETURNING): 2019 return None 2020 return self.expression( 2021 exp.Returning, 2022 expressions=self._parse_csv(self._parse_expression), 2023 into=self._match(TokenType.INTO) and self._parse_table_part(), 2024 ) 2025 2026 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2027 if not self._match(TokenType.FORMAT): 2028 return None 2029 return self._parse_row_format() 2030 2031 def _parse_row_format( 2032 self, match_row: bool = False 2033 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2034 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2035 return None 2036 2037 if self._match_text_seq("SERDE"): 2038 this = self._parse_string() 2039 2040 serde_properties = None 2041 if self._match(TokenType.SERDE_PROPERTIES): 2042 serde_properties = self.expression( 2043 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2044 ) 2045 2046 return self.expression( 2047 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2048 ) 2049 2050 self._match_text_seq("DELIMITED") 2051 2052 kwargs = {} 2053 2054 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2055 kwargs["fields"] = self._parse_string() 2056 if self._match_text_seq("ESCAPED", "BY"): 2057 kwargs["escaped"] = self._parse_string() 2058 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2059 kwargs["collection_items"] = self._parse_string() 2060 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2061 kwargs["map_keys"] = self._parse_string() 2062 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2063 kwargs["lines"] = self._parse_string() 2064 if self._match_text_seq("NULL", "DEFINED", "AS"): 2065 kwargs["null"] = self._parse_string() 2066 2067 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2068 2069 def _parse_load(self) -> exp.LoadData | exp.Command: 2070 if self._match_text_seq("DATA"): 2071 local = self._match_text_seq("LOCAL") 2072 self._match_text_seq("INPATH") 2073 inpath = self._parse_string() 2074 overwrite = self._match(TokenType.OVERWRITE) 2075 self._match_pair(TokenType.INTO, TokenType.TABLE) 2076 2077 return self.expression( 2078 exp.LoadData, 2079 this=self._parse_table(schema=True), 2080 local=local, 2081 overwrite=overwrite, 2082 inpath=inpath, 2083 partition=self._parse_partition(), 2084 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2085 serde=self._match_text_seq("SERDE") and self._parse_string(), 2086 ) 2087 return self._parse_as_command(self._prev) 2088 2089 def _parse_delete(self) -> exp.Delete: 2090 # This handles MySQL's "Multiple-Table Syntax" 2091 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2092 tables = None 2093 comments = self._prev_comments 2094 if not self._match(TokenType.FROM, advance=False): 2095 tables = self._parse_csv(self._parse_table) or None 2096 2097 returning = self._parse_returning() 2098 2099 return self.expression( 2100 exp.Delete, 2101 comments=comments, 2102 tables=tables, 2103 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2104 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2105 where=self._parse_where(), 2106 returning=returning or self._parse_returning(), 2107 limit=self._parse_limit(), 2108 ) 2109 2110 def _parse_update(self) -> exp.Update: 2111 comments = self._prev_comments 2112 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2113 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2114 returning = self._parse_returning() 2115 return self.expression( 2116 exp.Update, 2117 comments=comments, 2118 **{ # type: ignore 2119 "this": this, 2120 "expressions": expressions, 2121 "from": self._parse_from(joins=True), 2122 "where": self._parse_where(), 2123 "returning": returning or self._parse_returning(), 2124 "order": self._parse_order(), 2125 "limit": self._parse_limit(), 2126 }, 2127 ) 2128 2129 def _parse_uncache(self) -> exp.Uncache: 2130 if not self._match(TokenType.TABLE): 2131 self.raise_error("Expecting TABLE after UNCACHE") 2132 2133 return self.expression( 2134 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2135 ) 2136 2137 def _parse_cache(self) -> exp.Cache: 2138 lazy = self._match_text_seq("LAZY") 2139 self._match(TokenType.TABLE) 2140 table = self._parse_table(schema=True) 2141 2142 options = [] 2143 if self._match_text_seq("OPTIONS"): 2144 self._match_l_paren() 2145 k = self._parse_string() 2146 self._match(TokenType.EQ) 2147 v = self._parse_string() 2148 options = [k, v] 2149 self._match_r_paren() 2150 2151 self._match(TokenType.ALIAS) 2152 return self.expression( 2153 exp.Cache, 2154 this=table, 2155 lazy=lazy, 2156 options=options, 2157 expression=self._parse_select(nested=True), 2158 ) 2159 2160 def _parse_partition(self) -> t.Optional[exp.Partition]: 2161 if not self._match(TokenType.PARTITION): 2162 return None 2163 2164 return self.expression( 2165 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2166 ) 2167 2168 def _parse_value(self) -> exp.Tuple: 2169 if self._match(TokenType.L_PAREN): 2170 expressions = self._parse_csv(self._parse_expression) 2171 self._match_r_paren() 2172 return self.expression(exp.Tuple, expressions=expressions) 2173 2174 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2175 # https://prestodb.io/docs/current/sql/values.html 2176 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2177 2178 def _parse_projections(self) -> t.List[exp.Expression]: 2179 return self._parse_expressions() 2180 2181 def _parse_select( 2182 self, 2183 nested: bool = False, 2184 table: bool = False, 2185 parse_subquery_alias: bool = True, 2186 parse_set_operation: bool = True, 2187 ) -> t.Optional[exp.Expression]: 2188 cte = self._parse_with() 2189 2190 if cte: 2191 this = self._parse_statement() 2192 2193 if not this: 2194 self.raise_error("Failed to parse any statement following CTE") 2195 return cte 2196 2197 if "with" in this.arg_types: 2198 this.set("with", cte) 2199 else: 2200 self.raise_error(f"{this.key} does not support CTE") 2201 this = cte 2202 2203 return this 2204 2205 # duckdb supports leading with FROM x 2206 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2207 2208 if self._match(TokenType.SELECT): 2209 comments = self._prev_comments 2210 2211 hint = self._parse_hint() 2212 all_ = self._match(TokenType.ALL) 2213 distinct = self._match_set(self.DISTINCT_TOKENS) 2214 2215 kind = ( 2216 self._match(TokenType.ALIAS) 2217 and self._match_texts(("STRUCT", "VALUE")) 2218 and self._prev.text.upper() 2219 ) 2220 2221 if distinct: 2222 distinct = self.expression( 2223 exp.Distinct, 2224 on=self._parse_value() if self._match(TokenType.ON) else None, 2225 ) 2226 2227 if all_ and distinct: 2228 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2229 2230 limit = self._parse_limit(top=True) 2231 projections = self._parse_projections() 2232 2233 this = self.expression( 2234 exp.Select, 2235 kind=kind, 2236 hint=hint, 2237 distinct=distinct, 2238 expressions=projections, 2239 limit=limit, 2240 ) 2241 this.comments = comments 2242 2243 into = self._parse_into() 2244 if into: 2245 this.set("into", into) 2246 2247 if not from_: 2248 from_ = self._parse_from() 2249 2250 if from_: 2251 this.set("from", from_) 2252 2253 this = self._parse_query_modifiers(this) 2254 elif (table or nested) and self._match(TokenType.L_PAREN): 2255 if self._match(TokenType.PIVOT): 2256 this = self._parse_simplified_pivot() 2257 elif self._match(TokenType.FROM): 2258 this = exp.select("*").from_( 2259 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2260 ) 2261 else: 2262 this = ( 2263 self._parse_table() 2264 if table 2265 else self._parse_select(nested=True, parse_set_operation=False) 2266 ) 2267 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2268 2269 self._match_r_paren() 2270 2271 # We return early here so that the UNION isn't attached to the subquery by the 2272 # following call to _parse_set_operations, but instead becomes the parent node 2273 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2274 elif self._match(TokenType.VALUES): 2275 this = self.expression( 2276 exp.Values, 2277 expressions=self._parse_csv(self._parse_value), 2278 alias=self._parse_table_alias(), 2279 ) 2280 elif from_: 2281 this = exp.select("*").from_(from_.this, copy=False) 2282 else: 2283 this = None 2284 2285 if parse_set_operation: 2286 return self._parse_set_operations(this) 2287 return this 2288 2289 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2290 if not skip_with_token and not self._match(TokenType.WITH): 2291 return None 2292 2293 comments = self._prev_comments 2294 recursive = self._match(TokenType.RECURSIVE) 2295 2296 expressions = [] 2297 while True: 2298 expressions.append(self._parse_cte()) 2299 2300 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2301 break 2302 else: 2303 self._match(TokenType.WITH) 2304 2305 return self.expression( 2306 exp.With, comments=comments, expressions=expressions, recursive=recursive 2307 ) 2308 2309 def _parse_cte(self) -> exp.CTE: 2310 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2311 if not alias or not alias.this: 2312 self.raise_error("Expected CTE to have alias") 2313 2314 self._match(TokenType.ALIAS) 2315 return self.expression( 2316 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2317 ) 2318 2319 def _parse_table_alias( 2320 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2321 ) -> t.Optional[exp.TableAlias]: 2322 any_token = self._match(TokenType.ALIAS) 2323 alias = ( 2324 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2325 or self._parse_string_as_identifier() 2326 ) 2327 2328 index = self._index 2329 if self._match(TokenType.L_PAREN): 2330 columns = self._parse_csv(self._parse_function_parameter) 2331 self._match_r_paren() if columns else self._retreat(index) 2332 else: 2333 columns = None 2334 2335 if not alias and not columns: 2336 return None 2337 2338 return self.expression(exp.TableAlias, this=alias, columns=columns) 2339 2340 def _parse_subquery( 2341 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2342 ) -> t.Optional[exp.Subquery]: 2343 if not this: 2344 return None 2345 2346 return self.expression( 2347 exp.Subquery, 2348 this=this, 2349 pivots=self._parse_pivots(), 2350 alias=self._parse_table_alias() if parse_alias else None, 2351 ) 2352 2353 def _parse_query_modifiers( 2354 self, this: t.Optional[exp.Expression] 2355 ) -> t.Optional[exp.Expression]: 2356 if isinstance(this, self.MODIFIABLES): 2357 for join in iter(self._parse_join, None): 2358 this.append("joins", join) 2359 for lateral in iter(self._parse_lateral, None): 2360 this.append("laterals", lateral) 2361 2362 while True: 2363 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2364 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2365 key, expression = parser(self) 2366 2367 if expression: 2368 this.set(key, expression) 2369 if key == "limit": 2370 offset = expression.args.pop("offset", None) 2371 if offset: 2372 this.set("offset", exp.Offset(expression=offset)) 2373 continue 2374 break 2375 return this 2376 2377 def _parse_hint(self) -> t.Optional[exp.Hint]: 2378 if self._match(TokenType.HINT): 2379 hints = [] 2380 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2381 hints.extend(hint) 2382 2383 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2384 self.raise_error("Expected */ after HINT") 2385 2386 return self.expression(exp.Hint, expressions=hints) 2387 2388 return None 2389 2390 def _parse_into(self) -> t.Optional[exp.Into]: 2391 if not self._match(TokenType.INTO): 2392 return None 2393 2394 temp = self._match(TokenType.TEMPORARY) 2395 unlogged = self._match_text_seq("UNLOGGED") 2396 self._match(TokenType.TABLE) 2397 2398 return self.expression( 2399 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2400 ) 2401 2402 def _parse_from( 2403 self, joins: bool = False, skip_from_token: bool = False 2404 ) -> t.Optional[exp.From]: 2405 if not skip_from_token and not self._match(TokenType.FROM): 2406 return None 2407 2408 return self.expression( 2409 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2410 ) 2411 2412 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2413 if not self._match(TokenType.MATCH_RECOGNIZE): 2414 return None 2415 2416 self._match_l_paren() 2417 2418 partition = self._parse_partition_by() 2419 order = self._parse_order() 2420 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2421 2422 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2423 rows = exp.var("ONE ROW PER MATCH") 2424 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2425 text = "ALL ROWS PER MATCH" 2426 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2427 text += f" SHOW EMPTY MATCHES" 2428 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2429 text += f" OMIT EMPTY MATCHES" 2430 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2431 text += f" WITH UNMATCHED ROWS" 2432 rows = exp.var(text) 2433 else: 2434 rows = None 2435 2436 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2437 text = "AFTER MATCH SKIP" 2438 if self._match_text_seq("PAST", "LAST", "ROW"): 2439 text += f" PAST LAST ROW" 2440 elif self._match_text_seq("TO", "NEXT", "ROW"): 2441 text += f" TO NEXT ROW" 2442 elif self._match_text_seq("TO", "FIRST"): 2443 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2444 elif self._match_text_seq("TO", "LAST"): 2445 text += f" TO LAST {self._advance_any().text}" # type: ignore 2446 after = exp.var(text) 2447 else: 2448 after = None 2449 2450 if self._match_text_seq("PATTERN"): 2451 self._match_l_paren() 2452 2453 if not self._curr: 2454 self.raise_error("Expecting )", self._curr) 2455 2456 paren = 1 2457 start = self._curr 2458 2459 while self._curr and paren > 0: 2460 if self._curr.token_type == TokenType.L_PAREN: 2461 paren += 1 2462 if self._curr.token_type == TokenType.R_PAREN: 2463 paren -= 1 2464 2465 end = self._prev 2466 self._advance() 2467 2468 if paren > 0: 2469 self.raise_error("Expecting )", self._curr) 2470 2471 pattern = exp.var(self._find_sql(start, end)) 2472 else: 2473 pattern = None 2474 2475 define = ( 2476 self._parse_csv(self._parse_name_as_expression) 2477 if self._match_text_seq("DEFINE") 2478 else None 2479 ) 2480 2481 self._match_r_paren() 2482 2483 return self.expression( 2484 exp.MatchRecognize, 2485 partition_by=partition, 2486 order=order, 2487 measures=measures, 2488 rows=rows, 2489 after=after, 2490 pattern=pattern, 2491 define=define, 2492 alias=self._parse_table_alias(), 2493 ) 2494 2495 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2496 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2497 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2498 2499 if outer_apply or cross_apply: 2500 this = self._parse_select(table=True) 2501 view = None 2502 outer = not cross_apply 2503 elif self._match(TokenType.LATERAL): 2504 this = self._parse_select(table=True) 2505 view = self._match(TokenType.VIEW) 2506 outer = self._match(TokenType.OUTER) 2507 else: 2508 return None 2509 2510 if not this: 2511 this = ( 2512 self._parse_unnest() 2513 or self._parse_function() 2514 or self._parse_id_var(any_token=False) 2515 ) 2516 2517 while self._match(TokenType.DOT): 2518 this = exp.Dot( 2519 this=this, 2520 expression=self._parse_function() or self._parse_id_var(any_token=False), 2521 ) 2522 2523 if view: 2524 table = self._parse_id_var(any_token=False) 2525 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2526 table_alias: t.Optional[exp.TableAlias] = self.expression( 2527 exp.TableAlias, this=table, columns=columns 2528 ) 2529 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2530 # We move the alias from the lateral's child node to the lateral itself 2531 table_alias = this.args["alias"].pop() 2532 else: 2533 table_alias = self._parse_table_alias() 2534 2535 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2536 2537 def _parse_join_parts( 2538 self, 2539 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2540 return ( 2541 self._match_set(self.JOIN_METHODS) and self._prev, 2542 self._match_set(self.JOIN_SIDES) and self._prev, 2543 self._match_set(self.JOIN_KINDS) and self._prev, 2544 ) 2545 2546 def _parse_join( 2547 self, skip_join_token: bool = False, parse_bracket: bool = False 2548 ) -> t.Optional[exp.Join]: 2549 if self._match(TokenType.COMMA): 2550 return self.expression(exp.Join, this=self._parse_table()) 2551 2552 index = self._index 2553 method, side, kind = self._parse_join_parts() 2554 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2555 join = self._match(TokenType.JOIN) 2556 2557 if not skip_join_token and not join: 2558 self._retreat(index) 2559 kind = None 2560 method = None 2561 side = None 2562 2563 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2564 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2565 2566 if not skip_join_token and not join and not outer_apply and not cross_apply: 2567 return None 2568 2569 if outer_apply: 2570 side = Token(TokenType.LEFT, "LEFT") 2571 2572 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2573 2574 if method: 2575 kwargs["method"] = method.text 2576 if side: 2577 kwargs["side"] = side.text 2578 if kind: 2579 kwargs["kind"] = kind.text 2580 if hint: 2581 kwargs["hint"] = hint 2582 2583 if self._match(TokenType.ON): 2584 kwargs["on"] = self._parse_conjunction() 2585 elif self._match(TokenType.USING): 2586 kwargs["using"] = self._parse_wrapped_id_vars() 2587 elif not (kind and kind.token_type == TokenType.CROSS): 2588 index = self._index 2589 join = self._parse_join() 2590 2591 if join and self._match(TokenType.ON): 2592 kwargs["on"] = self._parse_conjunction() 2593 elif join and self._match(TokenType.USING): 2594 kwargs["using"] = self._parse_wrapped_id_vars() 2595 else: 2596 join = None 2597 self._retreat(index) 2598 2599 kwargs["this"].set("joins", [join] if join else None) 2600 2601 comments = [c for token in (method, side, kind) if token for c in token.comments] 2602 return self.expression(exp.Join, comments=comments, **kwargs) 2603 2604 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2605 this = self._parse_conjunction() 2606 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2607 return this 2608 2609 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2610 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2611 2612 return this 2613 2614 def _parse_index( 2615 self, 2616 index: t.Optional[exp.Expression] = None, 2617 ) -> t.Optional[exp.Index]: 2618 if index: 2619 unique = None 2620 primary = None 2621 amp = None 2622 2623 self._match(TokenType.ON) 2624 self._match(TokenType.TABLE) # hive 2625 table = self._parse_table_parts(schema=True) 2626 else: 2627 unique = self._match(TokenType.UNIQUE) 2628 primary = self._match_text_seq("PRIMARY") 2629 amp = self._match_text_seq("AMP") 2630 2631 if not self._match(TokenType.INDEX): 2632 return None 2633 2634 index = self._parse_id_var() 2635 table = None 2636 2637 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2638 2639 if self._match(TokenType.L_PAREN, advance=False): 2640 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2641 else: 2642 columns = None 2643 2644 return self.expression( 2645 exp.Index, 2646 this=index, 2647 table=table, 2648 using=using, 2649 columns=columns, 2650 unique=unique, 2651 primary=primary, 2652 amp=amp, 2653 partition_by=self._parse_partition_by(), 2654 where=self._parse_where(), 2655 ) 2656 2657 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2658 hints: t.List[exp.Expression] = [] 2659 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2660 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2661 hints.append( 2662 self.expression( 2663 exp.WithTableHint, 2664 expressions=self._parse_csv( 2665 lambda: self._parse_function() or self._parse_var(any_token=True) 2666 ), 2667 ) 2668 ) 2669 self._match_r_paren() 2670 else: 2671 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2672 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2673 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2674 2675 self._match_texts(("INDEX", "KEY")) 2676 if self._match(TokenType.FOR): 2677 hint.set("target", self._advance_any() and self._prev.text.upper()) 2678 2679 hint.set("expressions", self._parse_wrapped_id_vars()) 2680 hints.append(hint) 2681 2682 return hints or None 2683 2684 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2685 return ( 2686 (not schema and self._parse_function(optional_parens=False)) 2687 or self._parse_id_var(any_token=False) 2688 or self._parse_string_as_identifier() 2689 or self._parse_placeholder() 2690 ) 2691 2692 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2693 catalog = None 2694 db = None 2695 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2696 2697 while self._match(TokenType.DOT): 2698 if catalog: 2699 # This allows nesting the table in arbitrarily many dot expressions if needed 2700 table = self.expression( 2701 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2702 ) 2703 else: 2704 catalog = db 2705 db = table 2706 table = self._parse_table_part(schema=schema) or "" 2707 2708 if not table: 2709 self.raise_error(f"Expected table name but got {self._curr}") 2710 2711 return self.expression( 2712 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2713 ) 2714 2715 def _parse_table( 2716 self, 2717 schema: bool = False, 2718 joins: bool = False, 2719 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2720 parse_bracket: bool = False, 2721 ) -> t.Optional[exp.Expression]: 2722 lateral = self._parse_lateral() 2723 if lateral: 2724 return lateral 2725 2726 unnest = self._parse_unnest() 2727 if unnest: 2728 return unnest 2729 2730 values = self._parse_derived_table_values() 2731 if values: 2732 return values 2733 2734 subquery = self._parse_select(table=True) 2735 if subquery: 2736 if not subquery.args.get("pivots"): 2737 subquery.set("pivots", self._parse_pivots()) 2738 return subquery 2739 2740 bracket = parse_bracket and self._parse_bracket(None) 2741 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2742 this = t.cast( 2743 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2744 ) 2745 2746 if schema: 2747 return self._parse_schema(this=this) 2748 2749 version = self._parse_version() 2750 2751 if version: 2752 this.set("version", version) 2753 2754 if self.dialect.ALIAS_POST_TABLESAMPLE: 2755 table_sample = self._parse_table_sample() 2756 2757 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2758 if alias: 2759 this.set("alias", alias) 2760 2761 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2762 return self.expression( 2763 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2764 ) 2765 2766 this.set("hints", self._parse_table_hints()) 2767 2768 if not this.args.get("pivots"): 2769 this.set("pivots", self._parse_pivots()) 2770 2771 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2772 table_sample = self._parse_table_sample() 2773 2774 if table_sample: 2775 table_sample.set("this", this) 2776 this = table_sample 2777 2778 if joins: 2779 for join in iter(self._parse_join, None): 2780 this.append("joins", join) 2781 2782 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2783 this.set("ordinality", True) 2784 this.set("alias", self._parse_table_alias()) 2785 2786 return this 2787 2788 def _parse_version(self) -> t.Optional[exp.Version]: 2789 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2790 this = "TIMESTAMP" 2791 elif self._match(TokenType.VERSION_SNAPSHOT): 2792 this = "VERSION" 2793 else: 2794 return None 2795 2796 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2797 kind = self._prev.text.upper() 2798 start = self._parse_bitwise() 2799 self._match_texts(("TO", "AND")) 2800 end = self._parse_bitwise() 2801 expression: t.Optional[exp.Expression] = self.expression( 2802 exp.Tuple, expressions=[start, end] 2803 ) 2804 elif self._match_text_seq("CONTAINED", "IN"): 2805 kind = "CONTAINED IN" 2806 expression = self.expression( 2807 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2808 ) 2809 elif self._match(TokenType.ALL): 2810 kind = "ALL" 2811 expression = None 2812 else: 2813 self._match_text_seq("AS", "OF") 2814 kind = "AS OF" 2815 expression = self._parse_type() 2816 2817 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2818 2819 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2820 if not self._match(TokenType.UNNEST): 2821 return None 2822 2823 expressions = self._parse_wrapped_csv(self._parse_equality) 2824 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2825 2826 alias = self._parse_table_alias() if with_alias else None 2827 2828 if alias: 2829 if self.dialect.UNNEST_COLUMN_ONLY: 2830 if alias.args.get("columns"): 2831 self.raise_error("Unexpected extra column alias in unnest.") 2832 2833 alias.set("columns", [alias.this]) 2834 alias.set("this", None) 2835 2836 columns = alias.args.get("columns") or [] 2837 if offset and len(expressions) < len(columns): 2838 offset = columns.pop() 2839 2840 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2841 self._match(TokenType.ALIAS) 2842 offset = self._parse_id_var( 2843 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2844 ) or exp.to_identifier("offset") 2845 2846 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2847 2848 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2849 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2850 if not is_derived and not self._match(TokenType.VALUES): 2851 return None 2852 2853 expressions = self._parse_csv(self._parse_value) 2854 alias = self._parse_table_alias() 2855 2856 if is_derived: 2857 self._match_r_paren() 2858 2859 return self.expression( 2860 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2861 ) 2862 2863 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2864 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2865 as_modifier and self._match_text_seq("USING", "SAMPLE") 2866 ): 2867 return None 2868 2869 bucket_numerator = None 2870 bucket_denominator = None 2871 bucket_field = None 2872 percent = None 2873 size = None 2874 seed = None 2875 2876 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2877 matched_l_paren = self._match(TokenType.L_PAREN) 2878 2879 if self.TABLESAMPLE_CSV: 2880 num = None 2881 expressions = self._parse_csv(self._parse_primary) 2882 else: 2883 expressions = None 2884 num = ( 2885 self._parse_factor() 2886 if self._match(TokenType.NUMBER, advance=False) 2887 else self._parse_primary() or self._parse_placeholder() 2888 ) 2889 2890 if self._match_text_seq("BUCKET"): 2891 bucket_numerator = self._parse_number() 2892 self._match_text_seq("OUT", "OF") 2893 bucket_denominator = bucket_denominator = self._parse_number() 2894 self._match(TokenType.ON) 2895 bucket_field = self._parse_field() 2896 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2897 percent = num 2898 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2899 size = num 2900 else: 2901 percent = num 2902 2903 if matched_l_paren: 2904 self._match_r_paren() 2905 2906 if self._match(TokenType.L_PAREN): 2907 method = self._parse_var(upper=True) 2908 seed = self._match(TokenType.COMMA) and self._parse_number() 2909 self._match_r_paren() 2910 elif self._match_texts(("SEED", "REPEATABLE")): 2911 seed = self._parse_wrapped(self._parse_number) 2912 2913 return self.expression( 2914 exp.TableSample, 2915 expressions=expressions, 2916 method=method, 2917 bucket_numerator=bucket_numerator, 2918 bucket_denominator=bucket_denominator, 2919 bucket_field=bucket_field, 2920 percent=percent, 2921 size=size, 2922 seed=seed, 2923 ) 2924 2925 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2926 return list(iter(self._parse_pivot, None)) or None 2927 2928 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2929 return list(iter(self._parse_join, None)) or None 2930 2931 # https://duckdb.org/docs/sql/statements/pivot 2932 def _parse_simplified_pivot(self) -> exp.Pivot: 2933 def _parse_on() -> t.Optional[exp.Expression]: 2934 this = self._parse_bitwise() 2935 return self._parse_in(this) if self._match(TokenType.IN) else this 2936 2937 this = self._parse_table() 2938 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2939 using = self._match(TokenType.USING) and self._parse_csv( 2940 lambda: self._parse_alias(self._parse_function()) 2941 ) 2942 group = self._parse_group() 2943 return self.expression( 2944 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2945 ) 2946 2947 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2948 index = self._index 2949 include_nulls = None 2950 2951 if self._match(TokenType.PIVOT): 2952 unpivot = False 2953 elif self._match(TokenType.UNPIVOT): 2954 unpivot = True 2955 2956 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2957 if self._match_text_seq("INCLUDE", "NULLS"): 2958 include_nulls = True 2959 elif self._match_text_seq("EXCLUDE", "NULLS"): 2960 include_nulls = False 2961 else: 2962 return None 2963 2964 expressions = [] 2965 field = None 2966 2967 if not self._match(TokenType.L_PAREN): 2968 self._retreat(index) 2969 return None 2970 2971 if unpivot: 2972 expressions = self._parse_csv(self._parse_column) 2973 else: 2974 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2975 2976 if not expressions: 2977 self.raise_error("Failed to parse PIVOT's aggregation list") 2978 2979 if not self._match(TokenType.FOR): 2980 self.raise_error("Expecting FOR") 2981 2982 value = self._parse_column() 2983 2984 if not self._match(TokenType.IN): 2985 self.raise_error("Expecting IN") 2986 2987 field = self._parse_in(value, alias=True) 2988 2989 self._match_r_paren() 2990 2991 pivot = self.expression( 2992 exp.Pivot, 2993 expressions=expressions, 2994 field=field, 2995 unpivot=unpivot, 2996 include_nulls=include_nulls, 2997 ) 2998 2999 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3000 pivot.set("alias", self._parse_table_alias()) 3001 3002 if not unpivot: 3003 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3004 3005 columns: t.List[exp.Expression] = [] 3006 for fld in pivot.args["field"].expressions: 3007 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3008 for name in names: 3009 if self.PREFIXED_PIVOT_COLUMNS: 3010 name = f"{name}_{field_name}" if name else field_name 3011 else: 3012 name = f"{field_name}_{name}" if name else field_name 3013 3014 columns.append(exp.to_identifier(name)) 3015 3016 pivot.set("columns", columns) 3017 3018 return pivot 3019 3020 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3021 return [agg.alias for agg in aggregations] 3022 3023 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3024 if not skip_where_token and not self._match(TokenType.WHERE): 3025 return None 3026 3027 return self.expression( 3028 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3029 ) 3030 3031 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3032 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3033 return None 3034 3035 elements = defaultdict(list) 3036 3037 if self._match(TokenType.ALL): 3038 return self.expression(exp.Group, all=True) 3039 3040 while True: 3041 expressions = self._parse_csv(self._parse_conjunction) 3042 if expressions: 3043 elements["expressions"].extend(expressions) 3044 3045 grouping_sets = self._parse_grouping_sets() 3046 if grouping_sets: 3047 elements["grouping_sets"].extend(grouping_sets) 3048 3049 rollup = None 3050 cube = None 3051 totals = None 3052 3053 index = self._index 3054 with_ = self._match(TokenType.WITH) 3055 if self._match(TokenType.ROLLUP): 3056 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3057 elements["rollup"].extend(ensure_list(rollup)) 3058 3059 if self._match(TokenType.CUBE): 3060 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3061 elements["cube"].extend(ensure_list(cube)) 3062 3063 if self._match_text_seq("TOTALS"): 3064 totals = True 3065 elements["totals"] = True # type: ignore 3066 3067 if not (grouping_sets or rollup or cube or totals): 3068 if with_: 3069 self._retreat(index) 3070 break 3071 3072 return self.expression(exp.Group, **elements) # type: ignore 3073 3074 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3075 if not self._match(TokenType.GROUPING_SETS): 3076 return None 3077 3078 return self._parse_wrapped_csv(self._parse_grouping_set) 3079 3080 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3081 if self._match(TokenType.L_PAREN): 3082 grouping_set = self._parse_csv(self._parse_column) 3083 self._match_r_paren() 3084 return self.expression(exp.Tuple, expressions=grouping_set) 3085 3086 return self._parse_column() 3087 3088 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3089 if not skip_having_token and not self._match(TokenType.HAVING): 3090 return None 3091 return self.expression(exp.Having, this=self._parse_conjunction()) 3092 3093 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3094 if not self._match(TokenType.QUALIFY): 3095 return None 3096 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3097 3098 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3099 if skip_start_token: 3100 start = None 3101 elif self._match(TokenType.START_WITH): 3102 start = self._parse_conjunction() 3103 else: 3104 return None 3105 3106 self._match(TokenType.CONNECT_BY) 3107 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3108 exp.Prior, this=self._parse_bitwise() 3109 ) 3110 connect = self._parse_conjunction() 3111 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3112 3113 if not start and self._match(TokenType.START_WITH): 3114 start = self._parse_conjunction() 3115 3116 return self.expression(exp.Connect, start=start, connect=connect) 3117 3118 def _parse_name_as_expression(self) -> exp.Alias: 3119 return self.expression( 3120 exp.Alias, 3121 alias=self._parse_id_var(any_token=True), 3122 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3123 ) 3124 3125 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3126 if self._match_text_seq("INTERPOLATE"): 3127 return self._parse_wrapped_csv(self._parse_name_as_expression) 3128 return None 3129 3130 def _parse_order( 3131 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3132 ) -> t.Optional[exp.Expression]: 3133 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3134 return this 3135 3136 return self.expression( 3137 exp.Order, 3138 this=this, 3139 expressions=self._parse_csv(self._parse_ordered), 3140 interpolate=self._parse_interpolate(), 3141 ) 3142 3143 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3144 if not self._match(token): 3145 return None 3146 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3147 3148 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3149 this = parse_method() if parse_method else self._parse_conjunction() 3150 3151 asc = self._match(TokenType.ASC) 3152 desc = self._match(TokenType.DESC) or (asc and False) 3153 3154 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3155 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3156 3157 nulls_first = is_nulls_first or False 3158 explicitly_null_ordered = is_nulls_first or is_nulls_last 3159 3160 if ( 3161 not explicitly_null_ordered 3162 and ( 3163 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3164 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3165 ) 3166 and self.dialect.NULL_ORDERING != "nulls_are_last" 3167 ): 3168 nulls_first = True 3169 3170 if self._match_text_seq("WITH", "FILL"): 3171 with_fill = self.expression( 3172 exp.WithFill, 3173 **{ # type: ignore 3174 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3175 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3176 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3177 }, 3178 ) 3179 else: 3180 with_fill = None 3181 3182 return self.expression( 3183 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3184 ) 3185 3186 def _parse_limit( 3187 self, this: t.Optional[exp.Expression] = None, top: bool = False 3188 ) -> t.Optional[exp.Expression]: 3189 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3190 comments = self._prev_comments 3191 if top: 3192 limit_paren = self._match(TokenType.L_PAREN) 3193 expression = self._parse_term() if limit_paren else self._parse_number() 3194 3195 if limit_paren: 3196 self._match_r_paren() 3197 else: 3198 expression = self._parse_term() 3199 3200 if self._match(TokenType.COMMA): 3201 offset = expression 3202 expression = self._parse_term() 3203 else: 3204 offset = None 3205 3206 limit_exp = self.expression( 3207 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3208 ) 3209 3210 return limit_exp 3211 3212 if self._match(TokenType.FETCH): 3213 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3214 direction = self._prev.text.upper() if direction else "FIRST" 3215 3216 count = self._parse_field(tokens=self.FETCH_TOKENS) 3217 percent = self._match(TokenType.PERCENT) 3218 3219 self._match_set((TokenType.ROW, TokenType.ROWS)) 3220 3221 only = self._match_text_seq("ONLY") 3222 with_ties = self._match_text_seq("WITH", "TIES") 3223 3224 if only and with_ties: 3225 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3226 3227 return self.expression( 3228 exp.Fetch, 3229 direction=direction, 3230 count=count, 3231 percent=percent, 3232 with_ties=with_ties, 3233 ) 3234 3235 return this 3236 3237 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3238 if not self._match(TokenType.OFFSET): 3239 return this 3240 3241 count = self._parse_term() 3242 self._match_set((TokenType.ROW, TokenType.ROWS)) 3243 return self.expression(exp.Offset, this=this, expression=count) 3244 3245 def _parse_locks(self) -> t.List[exp.Lock]: 3246 locks = [] 3247 while True: 3248 if self._match_text_seq("FOR", "UPDATE"): 3249 update = True 3250 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3251 "LOCK", "IN", "SHARE", "MODE" 3252 ): 3253 update = False 3254 else: 3255 break 3256 3257 expressions = None 3258 if self._match_text_seq("OF"): 3259 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3260 3261 wait: t.Optional[bool | exp.Expression] = None 3262 if self._match_text_seq("NOWAIT"): 3263 wait = True 3264 elif self._match_text_seq("WAIT"): 3265 wait = self._parse_primary() 3266 elif self._match_text_seq("SKIP", "LOCKED"): 3267 wait = False 3268 3269 locks.append( 3270 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3271 ) 3272 3273 return locks 3274 3275 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3276 while this and self._match_set(self.SET_OPERATIONS): 3277 token_type = self._prev.token_type 3278 3279 if token_type == TokenType.UNION: 3280 operation = exp.Union 3281 elif token_type == TokenType.EXCEPT: 3282 operation = exp.Except 3283 else: 3284 operation = exp.Intersect 3285 3286 comments = self._prev.comments 3287 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3288 by_name = self._match_text_seq("BY", "NAME") 3289 expression = self._parse_select(nested=True, parse_set_operation=False) 3290 3291 this = self.expression( 3292 operation, 3293 comments=comments, 3294 this=this, 3295 distinct=distinct, 3296 by_name=by_name, 3297 expression=expression, 3298 ) 3299 3300 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3301 expression = this.expression 3302 3303 if expression: 3304 for arg in self.UNION_MODIFIERS: 3305 expr = expression.args.get(arg) 3306 if expr: 3307 this.set(arg, expr.pop()) 3308 3309 return this 3310 3311 def _parse_expression(self) -> t.Optional[exp.Expression]: 3312 return self._parse_alias(self._parse_conjunction()) 3313 3314 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3315 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3316 3317 def _parse_equality(self) -> t.Optional[exp.Expression]: 3318 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3319 3320 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3321 return self._parse_tokens(self._parse_range, self.COMPARISON) 3322 3323 def _parse_range(self) -> t.Optional[exp.Expression]: 3324 this = self._parse_bitwise() 3325 negate = self._match(TokenType.NOT) 3326 3327 if self._match_set(self.RANGE_PARSERS): 3328 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3329 if not expression: 3330 return this 3331 3332 this = expression 3333 elif self._match(TokenType.ISNULL): 3334 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3335 3336 # Postgres supports ISNULL and NOTNULL for conditions. 3337 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3338 if self._match(TokenType.NOTNULL): 3339 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3340 this = self.expression(exp.Not, this=this) 3341 3342 if negate: 3343 this = self.expression(exp.Not, this=this) 3344 3345 if self._match(TokenType.IS): 3346 this = self._parse_is(this) 3347 3348 return this 3349 3350 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3351 index = self._index - 1 3352 negate = self._match(TokenType.NOT) 3353 3354 if self._match_text_seq("DISTINCT", "FROM"): 3355 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3356 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3357 3358 expression = self._parse_null() or self._parse_boolean() 3359 if not expression: 3360 self._retreat(index) 3361 return None 3362 3363 this = self.expression(exp.Is, this=this, expression=expression) 3364 return self.expression(exp.Not, this=this) if negate else this 3365 3366 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3367 unnest = self._parse_unnest(with_alias=False) 3368 if unnest: 3369 this = self.expression(exp.In, this=this, unnest=unnest) 3370 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3371 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3372 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3373 3374 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3375 this = self.expression(exp.In, this=this, query=expressions[0]) 3376 else: 3377 this = self.expression(exp.In, this=this, expressions=expressions) 3378 3379 if matched_l_paren: 3380 self._match_r_paren(this) 3381 elif not self._match(TokenType.R_BRACKET, expression=this): 3382 self.raise_error("Expecting ]") 3383 else: 3384 this = self.expression(exp.In, this=this, field=self._parse_field()) 3385 3386 return this 3387 3388 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3389 low = self._parse_bitwise() 3390 self._match(TokenType.AND) 3391 high = self._parse_bitwise() 3392 return self.expression(exp.Between, this=this, low=low, high=high) 3393 3394 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3395 if not self._match(TokenType.ESCAPE): 3396 return this 3397 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3398 3399 def _parse_interval(self) -> t.Optional[exp.Interval]: 3400 index = self._index 3401 3402 if not self._match(TokenType.INTERVAL): 3403 return None 3404 3405 if self._match(TokenType.STRING, advance=False): 3406 this = self._parse_primary() 3407 else: 3408 this = self._parse_term() 3409 3410 if not this or ( 3411 isinstance(this, exp.Column) 3412 and not this.table 3413 and not this.this.quoted 3414 and this.name.upper() == "IS" 3415 ): 3416 self._retreat(index) 3417 return None 3418 3419 unit = self._parse_function() or self._parse_var(any_token=True, upper=True) 3420 3421 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3422 # each INTERVAL expression into this canonical form so it's easy to transpile 3423 if this and this.is_number: 3424 this = exp.Literal.string(this.name) 3425 elif this and this.is_string: 3426 parts = this.name.split() 3427 3428 if len(parts) == 2: 3429 if unit: 3430 # This is not actually a unit, it's something else (e.g. a "window side") 3431 unit = None 3432 self._retreat(self._index - 1) 3433 3434 this = exp.Literal.string(parts[0]) 3435 unit = self.expression(exp.Var, this=parts[1].upper()) 3436 3437 return self.expression(exp.Interval, this=this, unit=unit) 3438 3439 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3440 this = self._parse_term() 3441 3442 while True: 3443 if self._match_set(self.BITWISE): 3444 this = self.expression( 3445 self.BITWISE[self._prev.token_type], 3446 this=this, 3447 expression=self._parse_term(), 3448 ) 3449 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3450 this = self.expression( 3451 exp.DPipe, 3452 this=this, 3453 expression=self._parse_term(), 3454 safe=not self.dialect.STRICT_STRING_CONCAT, 3455 ) 3456 elif self._match(TokenType.DQMARK): 3457 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3458 elif self._match_pair(TokenType.LT, TokenType.LT): 3459 this = self.expression( 3460 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3461 ) 3462 elif self._match_pair(TokenType.GT, TokenType.GT): 3463 this = self.expression( 3464 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3465 ) 3466 else: 3467 break 3468 3469 return this 3470 3471 def _parse_term(self) -> t.Optional[exp.Expression]: 3472 return self._parse_tokens(self._parse_factor, self.TERM) 3473 3474 def _parse_factor(self) -> t.Optional[exp.Expression]: 3475 if self.EXPONENT: 3476 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3477 else: 3478 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3479 if isinstance(factor, exp.Div): 3480 factor.args["typed"] = self.dialect.TYPED_DIVISION 3481 factor.args["safe"] = self.dialect.SAFE_DIVISION 3482 return factor 3483 3484 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3485 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3486 3487 def _parse_unary(self) -> t.Optional[exp.Expression]: 3488 if self._match_set(self.UNARY_PARSERS): 3489 return self.UNARY_PARSERS[self._prev.token_type](self) 3490 return self._parse_at_time_zone(self._parse_type()) 3491 3492 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3493 interval = parse_interval and self._parse_interval() 3494 if interval: 3495 return interval 3496 3497 index = self._index 3498 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3499 this = self._parse_column() 3500 3501 if data_type: 3502 if isinstance(this, exp.Literal): 3503 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3504 if parser: 3505 return parser(self, this, data_type) 3506 return self.expression(exp.Cast, this=this, to=data_type) 3507 if not data_type.expressions: 3508 self._retreat(index) 3509 return self._parse_column() 3510 return self._parse_column_ops(data_type) 3511 3512 return this and self._parse_column_ops(this) 3513 3514 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3515 this = self._parse_type() 3516 if not this: 3517 return None 3518 3519 return self.expression( 3520 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3521 ) 3522 3523 def _parse_types( 3524 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3525 ) -> t.Optional[exp.Expression]: 3526 index = self._index 3527 3528 prefix = self._match_text_seq("SYSUDTLIB", ".") 3529 3530 if not self._match_set(self.TYPE_TOKENS): 3531 identifier = allow_identifiers and self._parse_id_var( 3532 any_token=False, tokens=(TokenType.VAR,) 3533 ) 3534 3535 if identifier: 3536 tokens = self.dialect.tokenize(identifier.name) 3537 3538 if len(tokens) != 1: 3539 self.raise_error("Unexpected identifier", self._prev) 3540 3541 if tokens[0].token_type in self.TYPE_TOKENS: 3542 self._prev = tokens[0] 3543 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3544 type_name = identifier.name 3545 3546 while self._match(TokenType.DOT): 3547 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3548 3549 return exp.DataType.build(type_name, udt=True) 3550 else: 3551 return None 3552 else: 3553 return None 3554 3555 type_token = self._prev.token_type 3556 3557 if type_token == TokenType.PSEUDO_TYPE: 3558 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3559 3560 if type_token == TokenType.OBJECT_IDENTIFIER: 3561 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3562 3563 nested = type_token in self.NESTED_TYPE_TOKENS 3564 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3565 expressions = None 3566 maybe_func = False 3567 3568 if self._match(TokenType.L_PAREN): 3569 if is_struct: 3570 expressions = self._parse_csv(self._parse_struct_types) 3571 elif nested: 3572 expressions = self._parse_csv( 3573 lambda: self._parse_types( 3574 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3575 ) 3576 ) 3577 elif type_token in self.ENUM_TYPE_TOKENS: 3578 expressions = self._parse_csv(self._parse_equality) 3579 else: 3580 expressions = self._parse_csv(self._parse_type_size) 3581 3582 if not expressions or not self._match(TokenType.R_PAREN): 3583 self._retreat(index) 3584 return None 3585 3586 maybe_func = True 3587 3588 this: t.Optional[exp.Expression] = None 3589 values: t.Optional[t.List[exp.Expression]] = None 3590 3591 if nested and self._match(TokenType.LT): 3592 if is_struct: 3593 expressions = self._parse_csv(self._parse_struct_types) 3594 else: 3595 expressions = self._parse_csv( 3596 lambda: self._parse_types( 3597 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3598 ) 3599 ) 3600 3601 if not self._match(TokenType.GT): 3602 self.raise_error("Expecting >") 3603 3604 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3605 values = self._parse_csv(self._parse_conjunction) 3606 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3607 3608 if type_token in self.TIMESTAMPS: 3609 if self._match_text_seq("WITH", "TIME", "ZONE"): 3610 maybe_func = False 3611 tz_type = ( 3612 exp.DataType.Type.TIMETZ 3613 if type_token in self.TIMES 3614 else exp.DataType.Type.TIMESTAMPTZ 3615 ) 3616 this = exp.DataType(this=tz_type, expressions=expressions) 3617 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3618 maybe_func = False 3619 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3620 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3621 maybe_func = False 3622 elif type_token == TokenType.INTERVAL: 3623 unit = self._parse_var() 3624 3625 if self._match_text_seq("TO"): 3626 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3627 else: 3628 span = None 3629 3630 if span or not unit: 3631 this = self.expression( 3632 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3633 ) 3634 else: 3635 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3636 3637 if maybe_func and check_func: 3638 index2 = self._index 3639 peek = self._parse_string() 3640 3641 if not peek: 3642 self._retreat(index) 3643 return None 3644 3645 self._retreat(index2) 3646 3647 if not this: 3648 if self._match_text_seq("UNSIGNED"): 3649 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3650 if not unsigned_type_token: 3651 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3652 3653 type_token = unsigned_type_token or type_token 3654 3655 this = exp.DataType( 3656 this=exp.DataType.Type[type_token.value], 3657 expressions=expressions, 3658 nested=nested, 3659 values=values, 3660 prefix=prefix, 3661 ) 3662 3663 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3664 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3665 3666 return this 3667 3668 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3669 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3670 self._match(TokenType.COLON) 3671 return self._parse_column_def(this) 3672 3673 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3674 if not self._match_text_seq("AT", "TIME", "ZONE"): 3675 return this 3676 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3677 3678 def _parse_column(self) -> t.Optional[exp.Expression]: 3679 this = self._parse_field() 3680 if isinstance(this, exp.Identifier): 3681 this = self.expression(exp.Column, this=this) 3682 elif not this: 3683 return self._parse_bracket(this) 3684 return self._parse_column_ops(this) 3685 3686 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3687 this = self._parse_bracket(this) 3688 3689 while self._match_set(self.COLUMN_OPERATORS): 3690 op_token = self._prev.token_type 3691 op = self.COLUMN_OPERATORS.get(op_token) 3692 3693 if op_token == TokenType.DCOLON: 3694 field = self._parse_types() 3695 if not field: 3696 self.raise_error("Expected type") 3697 elif op and self._curr: 3698 self._advance() 3699 value = self._prev.text 3700 field = ( 3701 exp.Literal.number(value) 3702 if self._prev.token_type == TokenType.NUMBER 3703 else exp.Literal.string(value) 3704 ) 3705 else: 3706 field = self._parse_field(anonymous_func=True, any_token=True) 3707 3708 if isinstance(field, exp.Func): 3709 # bigquery allows function calls like x.y.count(...) 3710 # SAFE.SUBSTR(...) 3711 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3712 this = self._replace_columns_with_dots(this) 3713 3714 if op: 3715 this = op(self, this, field) 3716 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3717 this = self.expression( 3718 exp.Column, 3719 this=field, 3720 table=this.this, 3721 db=this.args.get("table"), 3722 catalog=this.args.get("db"), 3723 ) 3724 else: 3725 this = self.expression(exp.Dot, this=this, expression=field) 3726 this = self._parse_bracket(this) 3727 return this 3728 3729 def _parse_primary(self) -> t.Optional[exp.Expression]: 3730 if self._match_set(self.PRIMARY_PARSERS): 3731 token_type = self._prev.token_type 3732 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3733 3734 if token_type == TokenType.STRING: 3735 expressions = [primary] 3736 while self._match(TokenType.STRING): 3737 expressions.append(exp.Literal.string(self._prev.text)) 3738 3739 if len(expressions) > 1: 3740 return self.expression(exp.Concat, expressions=expressions) 3741 3742 return primary 3743 3744 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3745 return exp.Literal.number(f"0.{self._prev.text}") 3746 3747 if self._match(TokenType.L_PAREN): 3748 comments = self._prev_comments 3749 query = self._parse_select() 3750 3751 if query: 3752 expressions = [query] 3753 else: 3754 expressions = self._parse_expressions() 3755 3756 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3757 3758 if isinstance(this, exp.Subqueryable): 3759 this = self._parse_set_operations( 3760 self._parse_subquery(this=this, parse_alias=False) 3761 ) 3762 elif len(expressions) > 1: 3763 this = self.expression(exp.Tuple, expressions=expressions) 3764 else: 3765 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3766 3767 if this: 3768 this.add_comments(comments) 3769 3770 self._match_r_paren(expression=this) 3771 return this 3772 3773 return None 3774 3775 def _parse_field( 3776 self, 3777 any_token: bool = False, 3778 tokens: t.Optional[t.Collection[TokenType]] = None, 3779 anonymous_func: bool = False, 3780 ) -> t.Optional[exp.Expression]: 3781 return ( 3782 self._parse_primary() 3783 or self._parse_function(anonymous=anonymous_func) 3784 or self._parse_id_var(any_token=any_token, tokens=tokens) 3785 ) 3786 3787 def _parse_function( 3788 self, 3789 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3790 anonymous: bool = False, 3791 optional_parens: bool = True, 3792 ) -> t.Optional[exp.Expression]: 3793 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3794 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3795 fn_syntax = False 3796 if ( 3797 self._match(TokenType.L_BRACE, advance=False) 3798 and self._next 3799 and self._next.text.upper() == "FN" 3800 ): 3801 self._advance(2) 3802 fn_syntax = True 3803 3804 func = self._parse_function_call( 3805 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3806 ) 3807 3808 if fn_syntax: 3809 self._match(TokenType.R_BRACE) 3810 3811 return func 3812 3813 def _parse_function_call( 3814 self, 3815 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3816 anonymous: bool = False, 3817 optional_parens: bool = True, 3818 ) -> t.Optional[exp.Expression]: 3819 if not self._curr: 3820 return None 3821 3822 comments = self._curr.comments 3823 token_type = self._curr.token_type 3824 this = self._curr.text 3825 upper = this.upper() 3826 3827 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3828 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3829 self._advance() 3830 return parser(self) 3831 3832 if not self._next or self._next.token_type != TokenType.L_PAREN: 3833 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3834 self._advance() 3835 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3836 3837 return None 3838 3839 if token_type not in self.FUNC_TOKENS: 3840 return None 3841 3842 self._advance(2) 3843 3844 parser = self.FUNCTION_PARSERS.get(upper) 3845 if parser and not anonymous: 3846 this = parser(self) 3847 else: 3848 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3849 3850 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3851 this = self.expression(subquery_predicate, this=self._parse_select()) 3852 self._match_r_paren() 3853 return this 3854 3855 if functions is None: 3856 functions = self.FUNCTIONS 3857 3858 function = functions.get(upper) 3859 3860 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3861 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3862 3863 if function and not anonymous: 3864 if "dialect" in function.__code__.co_varnames: 3865 func = function(args, dialect=self.dialect) 3866 else: 3867 func = function(args) 3868 3869 func = self.validate_expression(func, args) 3870 if not self.dialect.NORMALIZE_FUNCTIONS: 3871 func.meta["name"] = this 3872 3873 this = func 3874 else: 3875 this = self.expression(exp.Anonymous, this=this, expressions=args) 3876 3877 if isinstance(this, exp.Expression): 3878 this.add_comments(comments) 3879 3880 self._match_r_paren(this) 3881 return self._parse_window(this) 3882 3883 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3884 return self._parse_column_def(self._parse_id_var()) 3885 3886 def _parse_user_defined_function( 3887 self, kind: t.Optional[TokenType] = None 3888 ) -> t.Optional[exp.Expression]: 3889 this = self._parse_id_var() 3890 3891 while self._match(TokenType.DOT): 3892 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3893 3894 if not self._match(TokenType.L_PAREN): 3895 return this 3896 3897 expressions = self._parse_csv(self._parse_function_parameter) 3898 self._match_r_paren() 3899 return self.expression( 3900 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3901 ) 3902 3903 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3904 literal = self._parse_primary() 3905 if literal: 3906 return self.expression(exp.Introducer, this=token.text, expression=literal) 3907 3908 return self.expression(exp.Identifier, this=token.text) 3909 3910 def _parse_session_parameter(self) -> exp.SessionParameter: 3911 kind = None 3912 this = self._parse_id_var() or self._parse_primary() 3913 3914 if this and self._match(TokenType.DOT): 3915 kind = this.name 3916 this = self._parse_var() or self._parse_primary() 3917 3918 return self.expression(exp.SessionParameter, this=this, kind=kind) 3919 3920 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3921 index = self._index 3922 3923 if self._match(TokenType.L_PAREN): 3924 expressions = t.cast( 3925 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3926 ) 3927 3928 if not self._match(TokenType.R_PAREN): 3929 self._retreat(index) 3930 else: 3931 expressions = [self._parse_id_var()] 3932 3933 if self._match_set(self.LAMBDAS): 3934 return self.LAMBDAS[self._prev.token_type](self, expressions) 3935 3936 self._retreat(index) 3937 3938 this: t.Optional[exp.Expression] 3939 3940 if self._match(TokenType.DISTINCT): 3941 this = self.expression( 3942 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3943 ) 3944 else: 3945 this = self._parse_select_or_expression(alias=alias) 3946 3947 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3948 3949 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3950 index = self._index 3951 3952 if not self.errors: 3953 try: 3954 if self._parse_select(nested=True): 3955 return this 3956 except ParseError: 3957 pass 3958 finally: 3959 self.errors.clear() 3960 self._retreat(index) 3961 3962 if not self._match(TokenType.L_PAREN): 3963 return this 3964 3965 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3966 3967 self._match_r_paren() 3968 return self.expression(exp.Schema, this=this, expressions=args) 3969 3970 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3971 return self._parse_column_def(self._parse_field(any_token=True)) 3972 3973 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3974 # column defs are not really columns, they're identifiers 3975 if isinstance(this, exp.Column): 3976 this = this.this 3977 3978 kind = self._parse_types(schema=True) 3979 3980 if self._match_text_seq("FOR", "ORDINALITY"): 3981 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3982 3983 constraints: t.List[exp.Expression] = [] 3984 3985 if not kind and self._match(TokenType.ALIAS): 3986 constraints.append( 3987 self.expression( 3988 exp.ComputedColumnConstraint, 3989 this=self._parse_conjunction(), 3990 persisted=self._match_text_seq("PERSISTED"), 3991 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3992 ) 3993 ) 3994 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3995 self._match(TokenType.ALIAS) 3996 constraints.append( 3997 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3998 ) 3999 4000 while True: 4001 constraint = self._parse_column_constraint() 4002 if not constraint: 4003 break 4004 constraints.append(constraint) 4005 4006 if not kind and not constraints: 4007 return this 4008 4009 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4010 4011 def _parse_auto_increment( 4012 self, 4013 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4014 start = None 4015 increment = None 4016 4017 if self._match(TokenType.L_PAREN, advance=False): 4018 args = self._parse_wrapped_csv(self._parse_bitwise) 4019 start = seq_get(args, 0) 4020 increment = seq_get(args, 1) 4021 elif self._match_text_seq("START"): 4022 start = self._parse_bitwise() 4023 self._match_text_seq("INCREMENT") 4024 increment = self._parse_bitwise() 4025 4026 if start and increment: 4027 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4028 4029 return exp.AutoIncrementColumnConstraint() 4030 4031 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4032 if not self._match_text_seq("REFRESH"): 4033 self._retreat(self._index - 1) 4034 return None 4035 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4036 4037 def _parse_compress(self) -> exp.CompressColumnConstraint: 4038 if self._match(TokenType.L_PAREN, advance=False): 4039 return self.expression( 4040 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4041 ) 4042 4043 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4044 4045 def _parse_generated_as_identity( 4046 self, 4047 ) -> ( 4048 exp.GeneratedAsIdentityColumnConstraint 4049 | exp.ComputedColumnConstraint 4050 | exp.GeneratedAsRowColumnConstraint 4051 ): 4052 if self._match_text_seq("BY", "DEFAULT"): 4053 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4054 this = self.expression( 4055 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4056 ) 4057 else: 4058 self._match_text_seq("ALWAYS") 4059 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4060 4061 self._match(TokenType.ALIAS) 4062 4063 if self._match_text_seq("ROW"): 4064 start = self._match_text_seq("START") 4065 if not start: 4066 self._match(TokenType.END) 4067 hidden = self._match_text_seq("HIDDEN") 4068 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4069 4070 identity = self._match_text_seq("IDENTITY") 4071 4072 if self._match(TokenType.L_PAREN): 4073 if self._match(TokenType.START_WITH): 4074 this.set("start", self._parse_bitwise()) 4075 if self._match_text_seq("INCREMENT", "BY"): 4076 this.set("increment", self._parse_bitwise()) 4077 if self._match_text_seq("MINVALUE"): 4078 this.set("minvalue", self._parse_bitwise()) 4079 if self._match_text_seq("MAXVALUE"): 4080 this.set("maxvalue", self._parse_bitwise()) 4081 4082 if self._match_text_seq("CYCLE"): 4083 this.set("cycle", True) 4084 elif self._match_text_seq("NO", "CYCLE"): 4085 this.set("cycle", False) 4086 4087 if not identity: 4088 this.set("expression", self._parse_bitwise()) 4089 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4090 args = self._parse_csv(self._parse_bitwise) 4091 this.set("start", seq_get(args, 0)) 4092 this.set("increment", seq_get(args, 1)) 4093 4094 self._match_r_paren() 4095 4096 return this 4097 4098 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4099 self._match_text_seq("LENGTH") 4100 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4101 4102 def _parse_not_constraint( 4103 self, 4104 ) -> t.Optional[exp.Expression]: 4105 if self._match_text_seq("NULL"): 4106 return self.expression(exp.NotNullColumnConstraint) 4107 if self._match_text_seq("CASESPECIFIC"): 4108 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4109 if self._match_text_seq("FOR", "REPLICATION"): 4110 return self.expression(exp.NotForReplicationColumnConstraint) 4111 return None 4112 4113 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4114 if self._match(TokenType.CONSTRAINT): 4115 this = self._parse_id_var() 4116 else: 4117 this = None 4118 4119 if self._match_texts(self.CONSTRAINT_PARSERS): 4120 return self.expression( 4121 exp.ColumnConstraint, 4122 this=this, 4123 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4124 ) 4125 4126 return this 4127 4128 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4129 if not self._match(TokenType.CONSTRAINT): 4130 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4131 4132 this = self._parse_id_var() 4133 expressions = [] 4134 4135 while True: 4136 constraint = self._parse_unnamed_constraint() or self._parse_function() 4137 if not constraint: 4138 break 4139 expressions.append(constraint) 4140 4141 return self.expression(exp.Constraint, this=this, expressions=expressions) 4142 4143 def _parse_unnamed_constraint( 4144 self, constraints: t.Optional[t.Collection[str]] = None 4145 ) -> t.Optional[exp.Expression]: 4146 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4147 constraints or self.CONSTRAINT_PARSERS 4148 ): 4149 return None 4150 4151 constraint = self._prev.text.upper() 4152 if constraint not in self.CONSTRAINT_PARSERS: 4153 self.raise_error(f"No parser found for schema constraint {constraint}.") 4154 4155 return self.CONSTRAINT_PARSERS[constraint](self) 4156 4157 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4158 self._match_text_seq("KEY") 4159 return self.expression( 4160 exp.UniqueColumnConstraint, 4161 this=self._parse_schema(self._parse_id_var(any_token=False)), 4162 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4163 ) 4164 4165 def _parse_key_constraint_options(self) -> t.List[str]: 4166 options = [] 4167 while True: 4168 if not self._curr: 4169 break 4170 4171 if self._match(TokenType.ON): 4172 action = None 4173 on = self._advance_any() and self._prev.text 4174 4175 if self._match_text_seq("NO", "ACTION"): 4176 action = "NO ACTION" 4177 elif self._match_text_seq("CASCADE"): 4178 action = "CASCADE" 4179 elif self._match_text_seq("RESTRICT"): 4180 action = "RESTRICT" 4181 elif self._match_pair(TokenType.SET, TokenType.NULL): 4182 action = "SET NULL" 4183 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4184 action = "SET DEFAULT" 4185 else: 4186 self.raise_error("Invalid key constraint") 4187 4188 options.append(f"ON {on} {action}") 4189 elif self._match_text_seq("NOT", "ENFORCED"): 4190 options.append("NOT ENFORCED") 4191 elif self._match_text_seq("DEFERRABLE"): 4192 options.append("DEFERRABLE") 4193 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4194 options.append("INITIALLY DEFERRED") 4195 elif self._match_text_seq("NORELY"): 4196 options.append("NORELY") 4197 elif self._match_text_seq("MATCH", "FULL"): 4198 options.append("MATCH FULL") 4199 else: 4200 break 4201 4202 return options 4203 4204 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4205 if match and not self._match(TokenType.REFERENCES): 4206 return None 4207 4208 expressions = None 4209 this = self._parse_table(schema=True) 4210 options = self._parse_key_constraint_options() 4211 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4212 4213 def _parse_foreign_key(self) -> exp.ForeignKey: 4214 expressions = self._parse_wrapped_id_vars() 4215 reference = self._parse_references() 4216 options = {} 4217 4218 while self._match(TokenType.ON): 4219 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4220 self.raise_error("Expected DELETE or UPDATE") 4221 4222 kind = self._prev.text.lower() 4223 4224 if self._match_text_seq("NO", "ACTION"): 4225 action = "NO ACTION" 4226 elif self._match(TokenType.SET): 4227 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4228 action = "SET " + self._prev.text.upper() 4229 else: 4230 self._advance() 4231 action = self._prev.text.upper() 4232 4233 options[kind] = action 4234 4235 return self.expression( 4236 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4237 ) 4238 4239 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4240 return self._parse_field() 4241 4242 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4243 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4244 4245 id_vars = self._parse_wrapped_id_vars() 4246 return self.expression( 4247 exp.PeriodForSystemTimeConstraint, 4248 this=seq_get(id_vars, 0), 4249 expression=seq_get(id_vars, 1), 4250 ) 4251 4252 def _parse_primary_key( 4253 self, wrapped_optional: bool = False, in_props: bool = False 4254 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4255 desc = ( 4256 self._match_set((TokenType.ASC, TokenType.DESC)) 4257 and self._prev.token_type == TokenType.DESC 4258 ) 4259 4260 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4261 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4262 4263 expressions = self._parse_wrapped_csv( 4264 self._parse_primary_key_part, optional=wrapped_optional 4265 ) 4266 options = self._parse_key_constraint_options() 4267 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4268 4269 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4270 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4271 4272 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4273 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4274 return this 4275 4276 bracket_kind = self._prev.token_type 4277 expressions = self._parse_csv( 4278 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4279 ) 4280 4281 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4282 self.raise_error("Expected ]") 4283 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4284 self.raise_error("Expected }") 4285 4286 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4287 if bracket_kind == TokenType.L_BRACE: 4288 this = self.expression(exp.Struct, expressions=expressions) 4289 elif not this or this.name.upper() == "ARRAY": 4290 this = self.expression(exp.Array, expressions=expressions) 4291 else: 4292 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4293 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4294 4295 self._add_comments(this) 4296 return self._parse_bracket(this) 4297 4298 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4299 if self._match(TokenType.COLON): 4300 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4301 return this 4302 4303 def _parse_case(self) -> t.Optional[exp.Expression]: 4304 ifs = [] 4305 default = None 4306 4307 comments = self._prev_comments 4308 expression = self._parse_conjunction() 4309 4310 while self._match(TokenType.WHEN): 4311 this = self._parse_conjunction() 4312 self._match(TokenType.THEN) 4313 then = self._parse_conjunction() 4314 ifs.append(self.expression(exp.If, this=this, true=then)) 4315 4316 if self._match(TokenType.ELSE): 4317 default = self._parse_conjunction() 4318 4319 if not self._match(TokenType.END): 4320 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4321 default = exp.column("interval") 4322 else: 4323 self.raise_error("Expected END after CASE", self._prev) 4324 4325 return self._parse_window( 4326 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4327 ) 4328 4329 def _parse_if(self) -> t.Optional[exp.Expression]: 4330 if self._match(TokenType.L_PAREN): 4331 args = self._parse_csv(self._parse_conjunction) 4332 this = self.validate_expression(exp.If.from_arg_list(args), args) 4333 self._match_r_paren() 4334 else: 4335 index = self._index - 1 4336 condition = self._parse_conjunction() 4337 4338 if not condition: 4339 self._retreat(index) 4340 return None 4341 4342 self._match(TokenType.THEN) 4343 true = self._parse_conjunction() 4344 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4345 self._match(TokenType.END) 4346 this = self.expression(exp.If, this=condition, true=true, false=false) 4347 4348 return self._parse_window(this) 4349 4350 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4351 if not self._match_text_seq("VALUE", "FOR"): 4352 self._retreat(self._index - 1) 4353 return None 4354 4355 return self.expression( 4356 exp.NextValueFor, 4357 this=self._parse_column(), 4358 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4359 ) 4360 4361 def _parse_extract(self) -> exp.Extract: 4362 this = self._parse_function() or self._parse_var() or self._parse_type() 4363 4364 if self._match(TokenType.FROM): 4365 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4366 4367 if not self._match(TokenType.COMMA): 4368 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4369 4370 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4371 4372 def _parse_any_value(self) -> exp.AnyValue: 4373 this = self._parse_lambda() 4374 is_max = None 4375 having = None 4376 4377 if self._match(TokenType.HAVING): 4378 self._match_texts(("MAX", "MIN")) 4379 is_max = self._prev.text == "MAX" 4380 having = self._parse_column() 4381 4382 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4383 4384 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4385 this = self._parse_conjunction() 4386 4387 if not self._match(TokenType.ALIAS): 4388 if self._match(TokenType.COMMA): 4389 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4390 4391 self.raise_error("Expected AS after CAST") 4392 4393 fmt = None 4394 to = self._parse_types() 4395 4396 if self._match(TokenType.FORMAT): 4397 fmt_string = self._parse_string() 4398 fmt = self._parse_at_time_zone(fmt_string) 4399 4400 if not to: 4401 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4402 if to.this in exp.DataType.TEMPORAL_TYPES: 4403 this = self.expression( 4404 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4405 this=this, 4406 format=exp.Literal.string( 4407 format_time( 4408 fmt_string.this if fmt_string else "", 4409 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4410 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4411 ) 4412 ), 4413 ) 4414 4415 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4416 this.set("zone", fmt.args["zone"]) 4417 return this 4418 elif not to: 4419 self.raise_error("Expected TYPE after CAST") 4420 elif isinstance(to, exp.Identifier): 4421 to = exp.DataType.build(to.name, udt=True) 4422 elif to.this == exp.DataType.Type.CHAR: 4423 if self._match(TokenType.CHARACTER_SET): 4424 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4425 4426 return self.expression( 4427 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4428 ) 4429 4430 def _parse_string_agg(self) -> exp.Expression: 4431 if self._match(TokenType.DISTINCT): 4432 args: t.List[t.Optional[exp.Expression]] = [ 4433 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4434 ] 4435 if self._match(TokenType.COMMA): 4436 args.extend(self._parse_csv(self._parse_conjunction)) 4437 else: 4438 args = self._parse_csv(self._parse_conjunction) # type: ignore 4439 4440 index = self._index 4441 if not self._match(TokenType.R_PAREN) and args: 4442 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4443 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4444 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4445 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4446 4447 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4448 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4449 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4450 if not self._match_text_seq("WITHIN", "GROUP"): 4451 self._retreat(index) 4452 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4453 4454 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4455 order = self._parse_order(this=seq_get(args, 0)) 4456 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4457 4458 def _parse_convert( 4459 self, strict: bool, safe: t.Optional[bool] = None 4460 ) -> t.Optional[exp.Expression]: 4461 this = self._parse_bitwise() 4462 4463 if self._match(TokenType.USING): 4464 to: t.Optional[exp.Expression] = self.expression( 4465 exp.CharacterSet, this=self._parse_var() 4466 ) 4467 elif self._match(TokenType.COMMA): 4468 to = self._parse_types() 4469 else: 4470 to = None 4471 4472 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4473 4474 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4475 """ 4476 There are generally two variants of the DECODE function: 4477 4478 - DECODE(bin, charset) 4479 - DECODE(expression, search, result [, search, result] ... [, default]) 4480 4481 The second variant will always be parsed into a CASE expression. Note that NULL 4482 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4483 instead of relying on pattern matching. 4484 """ 4485 args = self._parse_csv(self._parse_conjunction) 4486 4487 if len(args) < 3: 4488 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4489 4490 expression, *expressions = args 4491 if not expression: 4492 return None 4493 4494 ifs = [] 4495 for search, result in zip(expressions[::2], expressions[1::2]): 4496 if not search or not result: 4497 return None 4498 4499 if isinstance(search, exp.Literal): 4500 ifs.append( 4501 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4502 ) 4503 elif isinstance(search, exp.Null): 4504 ifs.append( 4505 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4506 ) 4507 else: 4508 cond = exp.or_( 4509 exp.EQ(this=expression.copy(), expression=search), 4510 exp.and_( 4511 exp.Is(this=expression.copy(), expression=exp.Null()), 4512 exp.Is(this=search.copy(), expression=exp.Null()), 4513 copy=False, 4514 ), 4515 copy=False, 4516 ) 4517 ifs.append(exp.If(this=cond, true=result)) 4518 4519 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4520 4521 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4522 self._match_text_seq("KEY") 4523 key = self._parse_column() 4524 self._match_set((TokenType.COLON, TokenType.COMMA)) 4525 self._match_text_seq("VALUE") 4526 value = self._parse_bitwise() 4527 4528 if not key and not value: 4529 return None 4530 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4531 4532 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4533 if not this or not self._match_text_seq("FORMAT", "JSON"): 4534 return this 4535 4536 return self.expression(exp.FormatJson, this=this) 4537 4538 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4539 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4540 for value in values: 4541 if self._match_text_seq(value, "ON", on): 4542 return f"{value} ON {on}" 4543 4544 return None 4545 4546 def _parse_json_object(self) -> exp.JSONObject: 4547 star = self._parse_star() 4548 expressions = ( 4549 [star] 4550 if star 4551 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4552 ) 4553 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4554 4555 unique_keys = None 4556 if self._match_text_seq("WITH", "UNIQUE"): 4557 unique_keys = True 4558 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4559 unique_keys = False 4560 4561 self._match_text_seq("KEYS") 4562 4563 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4564 self._parse_type() 4565 ) 4566 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4567 4568 return self.expression( 4569 exp.JSONObject, 4570 expressions=expressions, 4571 null_handling=null_handling, 4572 unique_keys=unique_keys, 4573 return_type=return_type, 4574 encoding=encoding, 4575 ) 4576 4577 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4578 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4579 if not self._match_text_seq("NESTED"): 4580 this = self._parse_id_var() 4581 kind = self._parse_types(allow_identifiers=False) 4582 nested = None 4583 else: 4584 this = None 4585 kind = None 4586 nested = True 4587 4588 path = self._match_text_seq("PATH") and self._parse_string() 4589 nested_schema = nested and self._parse_json_schema() 4590 4591 return self.expression( 4592 exp.JSONColumnDef, 4593 this=this, 4594 kind=kind, 4595 path=path, 4596 nested_schema=nested_schema, 4597 ) 4598 4599 def _parse_json_schema(self) -> exp.JSONSchema: 4600 self._match_text_seq("COLUMNS") 4601 return self.expression( 4602 exp.JSONSchema, 4603 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4604 ) 4605 4606 def _parse_json_table(self) -> exp.JSONTable: 4607 this = self._parse_format_json(self._parse_bitwise()) 4608 path = self._match(TokenType.COMMA) and self._parse_string() 4609 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4610 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4611 schema = self._parse_json_schema() 4612 4613 return exp.JSONTable( 4614 this=this, 4615 schema=schema, 4616 path=path, 4617 error_handling=error_handling, 4618 empty_handling=empty_handling, 4619 ) 4620 4621 def _parse_match_against(self) -> exp.MatchAgainst: 4622 expressions = self._parse_csv(self._parse_column) 4623 4624 self._match_text_seq(")", "AGAINST", "(") 4625 4626 this = self._parse_string() 4627 4628 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4629 modifier = "IN NATURAL LANGUAGE MODE" 4630 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4631 modifier = f"{modifier} WITH QUERY EXPANSION" 4632 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4633 modifier = "IN BOOLEAN MODE" 4634 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4635 modifier = "WITH QUERY EXPANSION" 4636 else: 4637 modifier = None 4638 4639 return self.expression( 4640 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4641 ) 4642 4643 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4644 def _parse_open_json(self) -> exp.OpenJSON: 4645 this = self._parse_bitwise() 4646 path = self._match(TokenType.COMMA) and self._parse_string() 4647 4648 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4649 this = self._parse_field(any_token=True) 4650 kind = self._parse_types() 4651 path = self._parse_string() 4652 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4653 4654 return self.expression( 4655 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4656 ) 4657 4658 expressions = None 4659 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4660 self._match_l_paren() 4661 expressions = self._parse_csv(_parse_open_json_column_def) 4662 4663 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4664 4665 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4666 args = self._parse_csv(self._parse_bitwise) 4667 4668 if self._match(TokenType.IN): 4669 return self.expression( 4670 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4671 ) 4672 4673 if haystack_first: 4674 haystack = seq_get(args, 0) 4675 needle = seq_get(args, 1) 4676 else: 4677 needle = seq_get(args, 0) 4678 haystack = seq_get(args, 1) 4679 4680 return self.expression( 4681 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4682 ) 4683 4684 def _parse_predict(self) -> exp.Predict: 4685 self._match_text_seq("MODEL") 4686 this = self._parse_table() 4687 4688 self._match(TokenType.COMMA) 4689 self._match_text_seq("TABLE") 4690 4691 return self.expression( 4692 exp.Predict, 4693 this=this, 4694 expression=self._parse_table(), 4695 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4696 ) 4697 4698 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4699 args = self._parse_csv(self._parse_table) 4700 return exp.JoinHint(this=func_name.upper(), expressions=args) 4701 4702 def _parse_substring(self) -> exp.Substring: 4703 # Postgres supports the form: substring(string [from int] [for int]) 4704 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4705 4706 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4707 4708 if self._match(TokenType.FROM): 4709 args.append(self._parse_bitwise()) 4710 if self._match(TokenType.FOR): 4711 args.append(self._parse_bitwise()) 4712 4713 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4714 4715 def _parse_trim(self) -> exp.Trim: 4716 # https://www.w3resource.com/sql/character-functions/trim.php 4717 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4718 4719 position = None 4720 collation = None 4721 expression = None 4722 4723 if self._match_texts(self.TRIM_TYPES): 4724 position = self._prev.text.upper() 4725 4726 this = self._parse_bitwise() 4727 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4728 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4729 expression = self._parse_bitwise() 4730 4731 if invert_order: 4732 this, expression = expression, this 4733 4734 if self._match(TokenType.COLLATE): 4735 collation = self._parse_bitwise() 4736 4737 return self.expression( 4738 exp.Trim, this=this, position=position, expression=expression, collation=collation 4739 ) 4740 4741 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4742 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4743 4744 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4745 return self._parse_window(self._parse_id_var(), alias=True) 4746 4747 def _parse_respect_or_ignore_nulls( 4748 self, this: t.Optional[exp.Expression] 4749 ) -> t.Optional[exp.Expression]: 4750 if self._match_text_seq("IGNORE", "NULLS"): 4751 return self.expression(exp.IgnoreNulls, this=this) 4752 if self._match_text_seq("RESPECT", "NULLS"): 4753 return self.expression(exp.RespectNulls, this=this) 4754 return this 4755 4756 def _parse_window( 4757 self, this: t.Optional[exp.Expression], alias: bool = False 4758 ) -> t.Optional[exp.Expression]: 4759 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4760 self._match(TokenType.WHERE) 4761 this = self.expression( 4762 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4763 ) 4764 self._match_r_paren() 4765 4766 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4767 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4768 if self._match_text_seq("WITHIN", "GROUP"): 4769 order = self._parse_wrapped(self._parse_order) 4770 this = self.expression(exp.WithinGroup, this=this, expression=order) 4771 4772 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4773 # Some dialects choose to implement and some do not. 4774 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4775 4776 # There is some code above in _parse_lambda that handles 4777 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4778 4779 # The below changes handle 4780 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4781 4782 # Oracle allows both formats 4783 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4784 # and Snowflake chose to do the same for familiarity 4785 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4786 this = self._parse_respect_or_ignore_nulls(this) 4787 4788 # bigquery select from window x AS (partition by ...) 4789 if alias: 4790 over = None 4791 self._match(TokenType.ALIAS) 4792 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4793 return this 4794 else: 4795 over = self._prev.text.upper() 4796 4797 if not self._match(TokenType.L_PAREN): 4798 return self.expression( 4799 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4800 ) 4801 4802 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4803 4804 first = self._match(TokenType.FIRST) 4805 if self._match_text_seq("LAST"): 4806 first = False 4807 4808 partition, order = self._parse_partition_and_order() 4809 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4810 4811 if kind: 4812 self._match(TokenType.BETWEEN) 4813 start = self._parse_window_spec() 4814 self._match(TokenType.AND) 4815 end = self._parse_window_spec() 4816 4817 spec = self.expression( 4818 exp.WindowSpec, 4819 kind=kind, 4820 start=start["value"], 4821 start_side=start["side"], 4822 end=end["value"], 4823 end_side=end["side"], 4824 ) 4825 else: 4826 spec = None 4827 4828 self._match_r_paren() 4829 4830 window = self.expression( 4831 exp.Window, 4832 this=this, 4833 partition_by=partition, 4834 order=order, 4835 spec=spec, 4836 alias=window_alias, 4837 over=over, 4838 first=first, 4839 ) 4840 4841 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4842 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4843 return self._parse_window(window, alias=alias) 4844 4845 return window 4846 4847 def _parse_partition_and_order( 4848 self, 4849 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4850 return self._parse_partition_by(), self._parse_order() 4851 4852 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4853 self._match(TokenType.BETWEEN) 4854 4855 return { 4856 "value": ( 4857 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4858 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4859 or self._parse_bitwise() 4860 ), 4861 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4862 } 4863 4864 def _parse_alias( 4865 self, this: t.Optional[exp.Expression], explicit: bool = False 4866 ) -> t.Optional[exp.Expression]: 4867 any_token = self._match(TokenType.ALIAS) 4868 comments = self._prev_comments 4869 4870 if explicit and not any_token: 4871 return this 4872 4873 if self._match(TokenType.L_PAREN): 4874 aliases = self.expression( 4875 exp.Aliases, 4876 comments=comments, 4877 this=this, 4878 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4879 ) 4880 self._match_r_paren(aliases) 4881 return aliases 4882 4883 alias = self._parse_id_var(any_token) 4884 4885 if alias: 4886 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4887 4888 return this 4889 4890 def _parse_id_var( 4891 self, 4892 any_token: bool = True, 4893 tokens: t.Optional[t.Collection[TokenType]] = None, 4894 ) -> t.Optional[exp.Expression]: 4895 identifier = self._parse_identifier() 4896 4897 if identifier: 4898 return identifier 4899 4900 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4901 quoted = self._prev.token_type == TokenType.STRING 4902 return exp.Identifier(this=self._prev.text, quoted=quoted) 4903 4904 return None 4905 4906 def _parse_string(self) -> t.Optional[exp.Expression]: 4907 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4908 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4909 return self._parse_placeholder() 4910 4911 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4912 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4913 4914 def _parse_number(self) -> t.Optional[exp.Expression]: 4915 if self._match(TokenType.NUMBER): 4916 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4917 return self._parse_placeholder() 4918 4919 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4920 if self._match(TokenType.IDENTIFIER): 4921 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4922 return self._parse_placeholder() 4923 4924 def _parse_var( 4925 self, 4926 any_token: bool = False, 4927 tokens: t.Optional[t.Collection[TokenType]] = None, 4928 upper: bool = False, 4929 ) -> t.Optional[exp.Expression]: 4930 if ( 4931 (any_token and self._advance_any()) 4932 or self._match(TokenType.VAR) 4933 or (self._match_set(tokens) if tokens else False) 4934 ): 4935 return self.expression( 4936 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 4937 ) 4938 return self._parse_placeholder() 4939 4940 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4941 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4942 self._advance() 4943 return self._prev 4944 return None 4945 4946 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4947 return self._parse_var() or self._parse_string() 4948 4949 def _parse_null(self) -> t.Optional[exp.Expression]: 4950 if self._match_set(self.NULL_TOKENS): 4951 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4952 return self._parse_placeholder() 4953 4954 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4955 if self._match(TokenType.TRUE): 4956 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4957 if self._match(TokenType.FALSE): 4958 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4959 return self._parse_placeholder() 4960 4961 def _parse_star(self) -> t.Optional[exp.Expression]: 4962 if self._match(TokenType.STAR): 4963 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4964 return self._parse_placeholder() 4965 4966 def _parse_parameter(self) -> exp.Parameter: 4967 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4968 return ( 4969 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4970 ) 4971 4972 self._match(TokenType.L_BRACE) 4973 this = _parse_parameter_part() 4974 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4975 self._match(TokenType.R_BRACE) 4976 4977 return self.expression(exp.Parameter, this=this, expression=expression) 4978 4979 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4980 if self._match_set(self.PLACEHOLDER_PARSERS): 4981 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4982 if placeholder: 4983 return placeholder 4984 self._advance(-1) 4985 return None 4986 4987 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4988 if not self._match(TokenType.EXCEPT): 4989 return None 4990 if self._match(TokenType.L_PAREN, advance=False): 4991 return self._parse_wrapped_csv(self._parse_column) 4992 4993 except_column = self._parse_column() 4994 return [except_column] if except_column else None 4995 4996 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4997 if not self._match(TokenType.REPLACE): 4998 return None 4999 if self._match(TokenType.L_PAREN, advance=False): 5000 return self._parse_wrapped_csv(self._parse_expression) 5001 5002 replace_expression = self._parse_expression() 5003 return [replace_expression] if replace_expression else None 5004 5005 def _parse_csv( 5006 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5007 ) -> t.List[exp.Expression]: 5008 parse_result = parse_method() 5009 items = [parse_result] if parse_result is not None else [] 5010 5011 while self._match(sep): 5012 self._add_comments(parse_result) 5013 parse_result = parse_method() 5014 if parse_result is not None: 5015 items.append(parse_result) 5016 5017 return items 5018 5019 def _parse_tokens( 5020 self, parse_method: t.Callable, expressions: t.Dict 5021 ) -> t.Optional[exp.Expression]: 5022 this = parse_method() 5023 5024 while self._match_set(expressions): 5025 this = self.expression( 5026 expressions[self._prev.token_type], 5027 this=this, 5028 comments=self._prev_comments, 5029 expression=parse_method(), 5030 ) 5031 5032 return this 5033 5034 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5035 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5036 5037 def _parse_wrapped_csv( 5038 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5039 ) -> t.List[exp.Expression]: 5040 return self._parse_wrapped( 5041 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5042 ) 5043 5044 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5045 wrapped = self._match(TokenType.L_PAREN) 5046 if not wrapped and not optional: 5047 self.raise_error("Expecting (") 5048 parse_result = parse_method() 5049 if wrapped: 5050 self._match_r_paren() 5051 return parse_result 5052 5053 def _parse_expressions(self) -> t.List[exp.Expression]: 5054 return self._parse_csv(self._parse_expression) 5055 5056 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5057 return self._parse_select() or self._parse_set_operations( 5058 self._parse_expression() if alias else self._parse_conjunction() 5059 ) 5060 5061 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5062 return self._parse_query_modifiers( 5063 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5064 ) 5065 5066 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5067 this = None 5068 if self._match_texts(self.TRANSACTION_KIND): 5069 this = self._prev.text 5070 5071 self._match_texts(("TRANSACTION", "WORK")) 5072 5073 modes = [] 5074 while True: 5075 mode = [] 5076 while self._match(TokenType.VAR): 5077 mode.append(self._prev.text) 5078 5079 if mode: 5080 modes.append(" ".join(mode)) 5081 if not self._match(TokenType.COMMA): 5082 break 5083 5084 return self.expression(exp.Transaction, this=this, modes=modes) 5085 5086 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5087 chain = None 5088 savepoint = None 5089 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5090 5091 self._match_texts(("TRANSACTION", "WORK")) 5092 5093 if self._match_text_seq("TO"): 5094 self._match_text_seq("SAVEPOINT") 5095 savepoint = self._parse_id_var() 5096 5097 if self._match(TokenType.AND): 5098 chain = not self._match_text_seq("NO") 5099 self._match_text_seq("CHAIN") 5100 5101 if is_rollback: 5102 return self.expression(exp.Rollback, savepoint=savepoint) 5103 5104 return self.expression(exp.Commit, chain=chain) 5105 5106 def _parse_refresh(self) -> exp.Refresh: 5107 self._match(TokenType.TABLE) 5108 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5109 5110 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5111 if not self._match_text_seq("ADD"): 5112 return None 5113 5114 self._match(TokenType.COLUMN) 5115 exists_column = self._parse_exists(not_=True) 5116 expression = self._parse_field_def() 5117 5118 if expression: 5119 expression.set("exists", exists_column) 5120 5121 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5122 if self._match_texts(("FIRST", "AFTER")): 5123 position = self._prev.text 5124 column_position = self.expression( 5125 exp.ColumnPosition, this=self._parse_column(), position=position 5126 ) 5127 expression.set("position", column_position) 5128 5129 return expression 5130 5131 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5132 drop = self._match(TokenType.DROP) and self._parse_drop() 5133 if drop and not isinstance(drop, exp.Command): 5134 drop.set("kind", drop.args.get("kind", "COLUMN")) 5135 return drop 5136 5137 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5138 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5139 return self.expression( 5140 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5141 ) 5142 5143 def _parse_add_constraint(self) -> exp.AddConstraint: 5144 this = None 5145 kind = self._prev.token_type 5146 5147 if kind == TokenType.CONSTRAINT: 5148 this = self._parse_id_var() 5149 5150 if self._match_text_seq("CHECK"): 5151 expression = self._parse_wrapped(self._parse_conjunction) 5152 enforced = self._match_text_seq("ENFORCED") 5153 5154 return self.expression( 5155 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5156 ) 5157 5158 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5159 expression = self._parse_foreign_key() 5160 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5161 expression = self._parse_primary_key() 5162 else: 5163 expression = None 5164 5165 return self.expression(exp.AddConstraint, this=this, expression=expression) 5166 5167 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5168 index = self._index - 1 5169 5170 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5171 return self._parse_csv(self._parse_add_constraint) 5172 5173 self._retreat(index) 5174 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5175 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5176 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5177 5178 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5179 self._match(TokenType.COLUMN) 5180 column = self._parse_field(any_token=True) 5181 5182 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5183 return self.expression(exp.AlterColumn, this=column, drop=True) 5184 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5185 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5186 5187 self._match_text_seq("SET", "DATA") 5188 return self.expression( 5189 exp.AlterColumn, 5190 this=column, 5191 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5192 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5193 using=self._match(TokenType.USING) and self._parse_conjunction(), 5194 ) 5195 5196 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5197 index = self._index - 1 5198 5199 partition_exists = self._parse_exists() 5200 if self._match(TokenType.PARTITION, advance=False): 5201 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5202 5203 self._retreat(index) 5204 return self._parse_csv(self._parse_drop_column) 5205 5206 def _parse_alter_table_rename(self) -> exp.RenameTable: 5207 self._match_text_seq("TO") 5208 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5209 5210 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5211 start = self._prev 5212 5213 if not self._match(TokenType.TABLE): 5214 return self._parse_as_command(start) 5215 5216 exists = self._parse_exists() 5217 only = self._match_text_seq("ONLY") 5218 this = self._parse_table(schema=True) 5219 5220 if self._next: 5221 self._advance() 5222 5223 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5224 if parser: 5225 actions = ensure_list(parser(self)) 5226 5227 if not self._curr: 5228 return self.expression( 5229 exp.AlterTable, 5230 this=this, 5231 exists=exists, 5232 actions=actions, 5233 only=only, 5234 ) 5235 5236 return self._parse_as_command(start) 5237 5238 def _parse_merge(self) -> exp.Merge: 5239 self._match(TokenType.INTO) 5240 target = self._parse_table() 5241 5242 if target and self._match(TokenType.ALIAS, advance=False): 5243 target.set("alias", self._parse_table_alias()) 5244 5245 self._match(TokenType.USING) 5246 using = self._parse_table() 5247 5248 self._match(TokenType.ON) 5249 on = self._parse_conjunction() 5250 5251 return self.expression( 5252 exp.Merge, 5253 this=target, 5254 using=using, 5255 on=on, 5256 expressions=self._parse_when_matched(), 5257 ) 5258 5259 def _parse_when_matched(self) -> t.List[exp.When]: 5260 whens = [] 5261 5262 while self._match(TokenType.WHEN): 5263 matched = not self._match(TokenType.NOT) 5264 self._match_text_seq("MATCHED") 5265 source = ( 5266 False 5267 if self._match_text_seq("BY", "TARGET") 5268 else self._match_text_seq("BY", "SOURCE") 5269 ) 5270 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5271 5272 self._match(TokenType.THEN) 5273 5274 if self._match(TokenType.INSERT): 5275 _this = self._parse_star() 5276 if _this: 5277 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5278 else: 5279 then = self.expression( 5280 exp.Insert, 5281 this=self._parse_value(), 5282 expression=self._match(TokenType.VALUES) and self._parse_value(), 5283 ) 5284 elif self._match(TokenType.UPDATE): 5285 expressions = self._parse_star() 5286 if expressions: 5287 then = self.expression(exp.Update, expressions=expressions) 5288 else: 5289 then = self.expression( 5290 exp.Update, 5291 expressions=self._match(TokenType.SET) 5292 and self._parse_csv(self._parse_equality), 5293 ) 5294 elif self._match(TokenType.DELETE): 5295 then = self.expression(exp.Var, this=self._prev.text) 5296 else: 5297 then = None 5298 5299 whens.append( 5300 self.expression( 5301 exp.When, 5302 matched=matched, 5303 source=source, 5304 condition=condition, 5305 then=then, 5306 ) 5307 ) 5308 return whens 5309 5310 def _parse_show(self) -> t.Optional[exp.Expression]: 5311 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5312 if parser: 5313 return parser(self) 5314 return self._parse_as_command(self._prev) 5315 5316 def _parse_set_item_assignment( 5317 self, kind: t.Optional[str] = None 5318 ) -> t.Optional[exp.Expression]: 5319 index = self._index 5320 5321 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5322 return self._parse_set_transaction(global_=kind == "GLOBAL") 5323 5324 left = self._parse_primary() or self._parse_id_var() 5325 assignment_delimiter = self._match_texts(("=", "TO")) 5326 5327 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5328 self._retreat(index) 5329 return None 5330 5331 right = self._parse_statement() or self._parse_id_var() 5332 this = self.expression(exp.EQ, this=left, expression=right) 5333 5334 return self.expression(exp.SetItem, this=this, kind=kind) 5335 5336 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5337 self._match_text_seq("TRANSACTION") 5338 characteristics = self._parse_csv( 5339 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5340 ) 5341 return self.expression( 5342 exp.SetItem, 5343 expressions=characteristics, 5344 kind="TRANSACTION", 5345 **{"global": global_}, # type: ignore 5346 ) 5347 5348 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5349 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5350 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5351 5352 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5353 index = self._index 5354 set_ = self.expression( 5355 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5356 ) 5357 5358 if self._curr: 5359 self._retreat(index) 5360 return self._parse_as_command(self._prev) 5361 5362 return set_ 5363 5364 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5365 for option in options: 5366 if self._match_text_seq(*option.split(" ")): 5367 return exp.var(option) 5368 return None 5369 5370 def _parse_as_command(self, start: Token) -> exp.Command: 5371 while self._curr: 5372 self._advance() 5373 text = self._find_sql(start, self._prev) 5374 size = len(start.text) 5375 return exp.Command(this=text[:size], expression=text[size:]) 5376 5377 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5378 settings = [] 5379 5380 self._match_l_paren() 5381 kind = self._parse_id_var() 5382 5383 if self._match(TokenType.L_PAREN): 5384 while True: 5385 key = self._parse_id_var() 5386 value = self._parse_primary() 5387 5388 if not key and value is None: 5389 break 5390 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5391 self._match(TokenType.R_PAREN) 5392 5393 self._match_r_paren() 5394 5395 return self.expression( 5396 exp.DictProperty, 5397 this=this, 5398 kind=kind.this if kind else None, 5399 settings=settings, 5400 ) 5401 5402 def _parse_dict_range(self, this: str) -> exp.DictRange: 5403 self._match_l_paren() 5404 has_min = self._match_text_seq("MIN") 5405 if has_min: 5406 min = self._parse_var() or self._parse_primary() 5407 self._match_text_seq("MAX") 5408 max = self._parse_var() or self._parse_primary() 5409 else: 5410 max = self._parse_var() or self._parse_primary() 5411 min = exp.Literal.number(0) 5412 self._match_r_paren() 5413 return self.expression(exp.DictRange, this=this, min=min, max=max) 5414 5415 def _parse_comprehension( 5416 self, this: t.Optional[exp.Expression] 5417 ) -> t.Optional[exp.Comprehension]: 5418 index = self._index 5419 expression = self._parse_column() 5420 if not self._match(TokenType.IN): 5421 self._retreat(index - 1) 5422 return None 5423 iterator = self._parse_column() 5424 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5425 return self.expression( 5426 exp.Comprehension, 5427 this=this, 5428 expression=expression, 5429 iterator=iterator, 5430 condition=condition, 5431 ) 5432 5433 def _find_parser( 5434 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5435 ) -> t.Optional[t.Callable]: 5436 if not self._curr: 5437 return None 5438 5439 index = self._index 5440 this = [] 5441 while True: 5442 # The current token might be multiple words 5443 curr = self._curr.text.upper() 5444 key = curr.split(" ") 5445 this.append(curr) 5446 5447 self._advance() 5448 result, trie = in_trie(trie, key) 5449 if result == TrieResult.FAILED: 5450 break 5451 5452 if result == TrieResult.EXISTS: 5453 subparser = parsers[" ".join(this)] 5454 return subparser 5455 5456 self._retreat(index) 5457 return None 5458 5459 def _match(self, token_type, advance=True, expression=None): 5460 if not self._curr: 5461 return None 5462 5463 if self._curr.token_type == token_type: 5464 if advance: 5465 self._advance() 5466 self._add_comments(expression) 5467 return True 5468 5469 return None 5470 5471 def _match_set(self, types, advance=True): 5472 if not self._curr: 5473 return None 5474 5475 if self._curr.token_type in types: 5476 if advance: 5477 self._advance() 5478 return True 5479 5480 return None 5481 5482 def _match_pair(self, token_type_a, token_type_b, advance=True): 5483 if not self._curr or not self._next: 5484 return None 5485 5486 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5487 if advance: 5488 self._advance(2) 5489 return True 5490 5491 return None 5492 5493 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5494 if not self._match(TokenType.L_PAREN, expression=expression): 5495 self.raise_error("Expecting (") 5496 5497 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5498 if not self._match(TokenType.R_PAREN, expression=expression): 5499 self.raise_error("Expecting )") 5500 5501 def _match_texts(self, texts, advance=True): 5502 if self._curr and self._curr.text.upper() in texts: 5503 if advance: 5504 self._advance() 5505 return True 5506 return False 5507 5508 def _match_text_seq(self, *texts, advance=True): 5509 index = self._index 5510 for text in texts: 5511 if self._curr and self._curr.text.upper() == text: 5512 self._advance() 5513 else: 5514 self._retreat(index) 5515 return False 5516 5517 if not advance: 5518 self._retreat(index) 5519 5520 return True 5521 5522 @t.overload 5523 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5524 ... 5525 5526 @t.overload 5527 def _replace_columns_with_dots( 5528 self, this: t.Optional[exp.Expression] 5529 ) -> t.Optional[exp.Expression]: 5530 ... 5531 5532 def _replace_columns_with_dots(self, this): 5533 if isinstance(this, exp.Dot): 5534 exp.replace_children(this, self._replace_columns_with_dots) 5535 elif isinstance(this, exp.Column): 5536 exp.replace_children(this, self._replace_columns_with_dots) 5537 table = this.args.get("table") 5538 this = ( 5539 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5540 ) 5541 5542 return this 5543 5544 def _replace_lambda( 5545 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5546 ) -> t.Optional[exp.Expression]: 5547 if not node: 5548 return node 5549 5550 for column in node.find_all(exp.Column): 5551 if column.parts[0].name in lambda_variables: 5552 dot_or_id = column.to_dot() if column.table else column.this 5553 parent = column.parent 5554 5555 while isinstance(parent, exp.Dot): 5556 if not isinstance(parent.parent, exp.Dot): 5557 parent.replace(dot_or_id) 5558 break 5559 parent = parent.parent 5560 else: 5561 if column is node: 5562 node = dot_or_id 5563 else: 5564 column.replace(dot_or_id) 5565 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
74class Parser(metaclass=_Parser): 75 """ 76 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 77 78 Args: 79 error_level: The desired error level. 80 Default: ErrorLevel.IMMEDIATE 81 error_message_context: Determines the amount of context to capture from a 82 query string when displaying the error message (in number of characters). 83 Default: 100 84 max_errors: Maximum number of error messages to include in a raised ParseError. 85 This is only relevant if error_level is ErrorLevel.RAISE. 86 Default: 3 87 """ 88 89 FUNCTIONS: t.Dict[str, t.Callable] = { 90 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 91 "CONCAT": lambda args, dialect: exp.Concat( 92 expressions=args, 93 safe=not dialect.STRICT_STRING_CONCAT, 94 coalesce=dialect.CONCAT_COALESCE, 95 ), 96 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 97 expressions=args, 98 safe=not dialect.STRICT_STRING_CONCAT, 99 coalesce=dialect.CONCAT_COALESCE, 100 ), 101 "DATE_TO_DATE_STR": lambda args: exp.Cast( 102 this=seq_get(args, 0), 103 to=exp.DataType(this=exp.DataType.Type.TEXT), 104 ), 105 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 106 "LIKE": parse_like, 107 "LOG": parse_logarithm, 108 "TIME_TO_TIME_STR": lambda args: exp.Cast( 109 this=seq_get(args, 0), 110 to=exp.DataType(this=exp.DataType.Type.TEXT), 111 ), 112 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 113 this=exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 start=exp.Literal.number(1), 118 length=exp.Literal.number(10), 119 ), 120 "VAR_MAP": parse_var_map, 121 } 122 123 NO_PAREN_FUNCTIONS = { 124 TokenType.CURRENT_DATE: exp.CurrentDate, 125 TokenType.CURRENT_DATETIME: exp.CurrentDate, 126 TokenType.CURRENT_TIME: exp.CurrentTime, 127 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 128 TokenType.CURRENT_USER: exp.CurrentUser, 129 } 130 131 STRUCT_TYPE_TOKENS = { 132 TokenType.NESTED, 133 TokenType.STRUCT, 134 } 135 136 NESTED_TYPE_TOKENS = { 137 TokenType.ARRAY, 138 TokenType.LOWCARDINALITY, 139 TokenType.MAP, 140 TokenType.NULLABLE, 141 *STRUCT_TYPE_TOKENS, 142 } 143 144 ENUM_TYPE_TOKENS = { 145 TokenType.ENUM, 146 TokenType.ENUM8, 147 TokenType.ENUM16, 148 } 149 150 TYPE_TOKENS = { 151 TokenType.BIT, 152 TokenType.BOOLEAN, 153 TokenType.TINYINT, 154 TokenType.UTINYINT, 155 TokenType.SMALLINT, 156 TokenType.USMALLINT, 157 TokenType.INT, 158 TokenType.UINT, 159 TokenType.BIGINT, 160 TokenType.UBIGINT, 161 TokenType.INT128, 162 TokenType.UINT128, 163 TokenType.INT256, 164 TokenType.UINT256, 165 TokenType.MEDIUMINT, 166 TokenType.UMEDIUMINT, 167 TokenType.FIXEDSTRING, 168 TokenType.FLOAT, 169 TokenType.DOUBLE, 170 TokenType.CHAR, 171 TokenType.NCHAR, 172 TokenType.VARCHAR, 173 TokenType.NVARCHAR, 174 TokenType.TEXT, 175 TokenType.MEDIUMTEXT, 176 TokenType.LONGTEXT, 177 TokenType.MEDIUMBLOB, 178 TokenType.LONGBLOB, 179 TokenType.BINARY, 180 TokenType.VARBINARY, 181 TokenType.JSON, 182 TokenType.JSONB, 183 TokenType.INTERVAL, 184 TokenType.TINYBLOB, 185 TokenType.TINYTEXT, 186 TokenType.TIME, 187 TokenType.TIMETZ, 188 TokenType.TIMESTAMP, 189 TokenType.TIMESTAMP_S, 190 TokenType.TIMESTAMP_MS, 191 TokenType.TIMESTAMP_NS, 192 TokenType.TIMESTAMPTZ, 193 TokenType.TIMESTAMPLTZ, 194 TokenType.DATETIME, 195 TokenType.DATETIME64, 196 TokenType.DATE, 197 TokenType.INT4RANGE, 198 TokenType.INT4MULTIRANGE, 199 TokenType.INT8RANGE, 200 TokenType.INT8MULTIRANGE, 201 TokenType.NUMRANGE, 202 TokenType.NUMMULTIRANGE, 203 TokenType.TSRANGE, 204 TokenType.TSMULTIRANGE, 205 TokenType.TSTZRANGE, 206 TokenType.TSTZMULTIRANGE, 207 TokenType.DATERANGE, 208 TokenType.DATEMULTIRANGE, 209 TokenType.DECIMAL, 210 TokenType.UDECIMAL, 211 TokenType.BIGDECIMAL, 212 TokenType.UUID, 213 TokenType.GEOGRAPHY, 214 TokenType.GEOMETRY, 215 TokenType.HLLSKETCH, 216 TokenType.HSTORE, 217 TokenType.PSEUDO_TYPE, 218 TokenType.SUPER, 219 TokenType.SERIAL, 220 TokenType.SMALLSERIAL, 221 TokenType.BIGSERIAL, 222 TokenType.XML, 223 TokenType.YEAR, 224 TokenType.UNIQUEIDENTIFIER, 225 TokenType.USERDEFINED, 226 TokenType.MONEY, 227 TokenType.SMALLMONEY, 228 TokenType.ROWVERSION, 229 TokenType.IMAGE, 230 TokenType.VARIANT, 231 TokenType.OBJECT, 232 TokenType.OBJECT_IDENTIFIER, 233 TokenType.INET, 234 TokenType.IPADDRESS, 235 TokenType.IPPREFIX, 236 TokenType.UNKNOWN, 237 TokenType.NULL, 238 *ENUM_TYPE_TOKENS, 239 *NESTED_TYPE_TOKENS, 240 } 241 242 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 243 TokenType.BIGINT: TokenType.UBIGINT, 244 TokenType.INT: TokenType.UINT, 245 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 246 TokenType.SMALLINT: TokenType.USMALLINT, 247 TokenType.TINYINT: TokenType.UTINYINT, 248 TokenType.DECIMAL: TokenType.UDECIMAL, 249 } 250 251 SUBQUERY_PREDICATES = { 252 TokenType.ANY: exp.Any, 253 TokenType.ALL: exp.All, 254 TokenType.EXISTS: exp.Exists, 255 TokenType.SOME: exp.Any, 256 } 257 258 RESERVED_TOKENS = { 259 *Tokenizer.SINGLE_TOKENS.values(), 260 TokenType.SELECT, 261 } 262 263 DB_CREATABLES = { 264 TokenType.DATABASE, 265 TokenType.SCHEMA, 266 TokenType.TABLE, 267 TokenType.VIEW, 268 TokenType.MODEL, 269 TokenType.DICTIONARY, 270 } 271 272 CREATABLES = { 273 TokenType.COLUMN, 274 TokenType.CONSTRAINT, 275 TokenType.FUNCTION, 276 TokenType.INDEX, 277 TokenType.PROCEDURE, 278 TokenType.FOREIGN_KEY, 279 *DB_CREATABLES, 280 } 281 282 # Tokens that can represent identifiers 283 ID_VAR_TOKENS = { 284 TokenType.VAR, 285 TokenType.ANTI, 286 TokenType.APPLY, 287 TokenType.ASC, 288 TokenType.AUTO_INCREMENT, 289 TokenType.BEGIN, 290 TokenType.CACHE, 291 TokenType.CASE, 292 TokenType.COLLATE, 293 TokenType.COMMAND, 294 TokenType.COMMENT, 295 TokenType.COMMIT, 296 TokenType.CONSTRAINT, 297 TokenType.DEFAULT, 298 TokenType.DELETE, 299 TokenType.DESC, 300 TokenType.DESCRIBE, 301 TokenType.DICTIONARY, 302 TokenType.DIV, 303 TokenType.END, 304 TokenType.EXECUTE, 305 TokenType.ESCAPE, 306 TokenType.FALSE, 307 TokenType.FIRST, 308 TokenType.FILTER, 309 TokenType.FINAL, 310 TokenType.FORMAT, 311 TokenType.FULL, 312 TokenType.IS, 313 TokenType.ISNULL, 314 TokenType.INTERVAL, 315 TokenType.KEEP, 316 TokenType.KILL, 317 TokenType.LEFT, 318 TokenType.LOAD, 319 TokenType.MERGE, 320 TokenType.NATURAL, 321 TokenType.NEXT, 322 TokenType.OFFSET, 323 TokenType.OPERATOR, 324 TokenType.ORDINALITY, 325 TokenType.OVERLAPS, 326 TokenType.OVERWRITE, 327 TokenType.PARTITION, 328 TokenType.PERCENT, 329 TokenType.PIVOT, 330 TokenType.PRAGMA, 331 TokenType.RANGE, 332 TokenType.RECURSIVE, 333 TokenType.REFERENCES, 334 TokenType.REFRESH, 335 TokenType.REPLACE, 336 TokenType.RIGHT, 337 TokenType.ROW, 338 TokenType.ROWS, 339 TokenType.SEMI, 340 TokenType.SET, 341 TokenType.SETTINGS, 342 TokenType.SHOW, 343 TokenType.TEMPORARY, 344 TokenType.TOP, 345 TokenType.TRUE, 346 TokenType.UNIQUE, 347 TokenType.UNPIVOT, 348 TokenType.UPDATE, 349 TokenType.USE, 350 TokenType.VOLATILE, 351 TokenType.WINDOW, 352 *CREATABLES, 353 *SUBQUERY_PREDICATES, 354 *TYPE_TOKENS, 355 *NO_PAREN_FUNCTIONS, 356 } 357 358 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 359 360 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASOF, 364 TokenType.FULL, 365 TokenType.LEFT, 366 TokenType.LOCK, 367 TokenType.NATURAL, 368 TokenType.OFFSET, 369 TokenType.RIGHT, 370 TokenType.SEMI, 371 TokenType.WINDOW, 372 } 373 374 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 375 376 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 377 378 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 379 380 FUNC_TOKENS = { 381 TokenType.COLLATE, 382 TokenType.COMMAND, 383 TokenType.CURRENT_DATE, 384 TokenType.CURRENT_DATETIME, 385 TokenType.CURRENT_TIMESTAMP, 386 TokenType.CURRENT_TIME, 387 TokenType.CURRENT_USER, 388 TokenType.FILTER, 389 TokenType.FIRST, 390 TokenType.FORMAT, 391 TokenType.GLOB, 392 TokenType.IDENTIFIER, 393 TokenType.INDEX, 394 TokenType.ISNULL, 395 TokenType.ILIKE, 396 TokenType.INSERT, 397 TokenType.LIKE, 398 TokenType.MERGE, 399 TokenType.OFFSET, 400 TokenType.PRIMARY_KEY, 401 TokenType.RANGE, 402 TokenType.REPLACE, 403 TokenType.RLIKE, 404 TokenType.ROW, 405 TokenType.UNNEST, 406 TokenType.VAR, 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.DATE, 410 TokenType.DATETIME, 411 TokenType.TABLE, 412 TokenType.TIMESTAMP, 413 TokenType.TIMESTAMPTZ, 414 TokenType.WINDOW, 415 TokenType.XOR, 416 *TYPE_TOKENS, 417 *SUBQUERY_PREDICATES, 418 } 419 420 CONJUNCTION = { 421 TokenType.AND: exp.And, 422 TokenType.OR: exp.Or, 423 } 424 425 EQUALITY = { 426 TokenType.COLON_EQ: exp.PropertyEQ, 427 TokenType.EQ: exp.EQ, 428 TokenType.NEQ: exp.NEQ, 429 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 430 } 431 432 COMPARISON = { 433 TokenType.GT: exp.GT, 434 TokenType.GTE: exp.GTE, 435 TokenType.LT: exp.LT, 436 TokenType.LTE: exp.LTE, 437 } 438 439 BITWISE = { 440 TokenType.AMP: exp.BitwiseAnd, 441 TokenType.CARET: exp.BitwiseXor, 442 TokenType.PIPE: exp.BitwiseOr, 443 } 444 445 TERM = { 446 TokenType.DASH: exp.Sub, 447 TokenType.PLUS: exp.Add, 448 TokenType.MOD: exp.Mod, 449 TokenType.COLLATE: exp.Collate, 450 } 451 452 FACTOR = { 453 TokenType.DIV: exp.IntDiv, 454 TokenType.LR_ARROW: exp.Distance, 455 TokenType.SLASH: exp.Div, 456 TokenType.STAR: exp.Mul, 457 } 458 459 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 460 461 TIMES = { 462 TokenType.TIME, 463 TokenType.TIMETZ, 464 } 465 466 TIMESTAMPS = { 467 TokenType.TIMESTAMP, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TIMESTAMPLTZ, 470 *TIMES, 471 } 472 473 SET_OPERATIONS = { 474 TokenType.UNION, 475 TokenType.INTERSECT, 476 TokenType.EXCEPT, 477 } 478 479 JOIN_METHODS = { 480 TokenType.NATURAL, 481 TokenType.ASOF, 482 } 483 484 JOIN_SIDES = { 485 TokenType.LEFT, 486 TokenType.RIGHT, 487 TokenType.FULL, 488 } 489 490 JOIN_KINDS = { 491 TokenType.INNER, 492 TokenType.OUTER, 493 TokenType.CROSS, 494 TokenType.SEMI, 495 TokenType.ANTI, 496 } 497 498 JOIN_HINTS: t.Set[str] = set() 499 500 LAMBDAS = { 501 TokenType.ARROW: lambda self, expressions: self.expression( 502 exp.Lambda, 503 this=self._replace_lambda( 504 self._parse_conjunction(), 505 {node.name for node in expressions}, 506 ), 507 expressions=expressions, 508 ), 509 TokenType.FARROW: lambda self, expressions: self.expression( 510 exp.Kwarg, 511 this=exp.var(expressions[0].name), 512 expression=self._parse_conjunction(), 513 ), 514 } 515 516 COLUMN_OPERATORS = { 517 TokenType.DOT: None, 518 TokenType.DCOLON: lambda self, this, to: self.expression( 519 exp.Cast if self.STRICT_CAST else exp.TryCast, 520 this=this, 521 to=to, 522 ), 523 TokenType.ARROW: lambda self, this, path: self.expression( 524 exp.JSONExtract, 525 this=this, 526 expression=path, 527 ), 528 TokenType.DARROW: lambda self, this, path: self.expression( 529 exp.JSONExtractScalar, 530 this=this, 531 expression=path, 532 ), 533 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 534 exp.JSONBExtract, 535 this=this, 536 expression=path, 537 ), 538 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 539 exp.JSONBExtractScalar, 540 this=this, 541 expression=path, 542 ), 543 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 544 exp.JSONBContains, 545 this=this, 546 expression=key, 547 ), 548 } 549 550 EXPRESSION_PARSERS = { 551 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 552 exp.Column: lambda self: self._parse_column(), 553 exp.Condition: lambda self: self._parse_conjunction(), 554 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 555 exp.Expression: lambda self: self._parse_statement(), 556 exp.From: lambda self: self._parse_from(), 557 exp.Group: lambda self: self._parse_group(), 558 exp.Having: lambda self: self._parse_having(), 559 exp.Identifier: lambda self: self._parse_id_var(), 560 exp.Join: lambda self: self._parse_join(), 561 exp.Lambda: lambda self: self._parse_lambda(), 562 exp.Lateral: lambda self: self._parse_lateral(), 563 exp.Limit: lambda self: self._parse_limit(), 564 exp.Offset: lambda self: self._parse_offset(), 565 exp.Order: lambda self: self._parse_order(), 566 exp.Ordered: lambda self: self._parse_ordered(), 567 exp.Properties: lambda self: self._parse_properties(), 568 exp.Qualify: lambda self: self._parse_qualify(), 569 exp.Returning: lambda self: self._parse_returning(), 570 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 571 exp.Table: lambda self: self._parse_table_parts(), 572 exp.TableAlias: lambda self: self._parse_table_alias(), 573 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 574 exp.Where: lambda self: self._parse_where(), 575 exp.Window: lambda self: self._parse_named_window(), 576 exp.With: lambda self: self._parse_with(), 577 "JOIN_TYPE": lambda self: self._parse_join_parts(), 578 } 579 580 STATEMENT_PARSERS = { 581 TokenType.ALTER: lambda self: self._parse_alter(), 582 TokenType.BEGIN: lambda self: self._parse_transaction(), 583 TokenType.CACHE: lambda self: self._parse_cache(), 584 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 585 TokenType.COMMENT: lambda self: self._parse_comment(), 586 TokenType.CREATE: lambda self: self._parse_create(), 587 TokenType.DELETE: lambda self: self._parse_delete(), 588 TokenType.DESC: lambda self: self._parse_describe(), 589 TokenType.DESCRIBE: lambda self: self._parse_describe(), 590 TokenType.DROP: lambda self: self._parse_drop(), 591 TokenType.INSERT: lambda self: self._parse_insert(), 592 TokenType.KILL: lambda self: self._parse_kill(), 593 TokenType.LOAD: lambda self: self._parse_load(), 594 TokenType.MERGE: lambda self: self._parse_merge(), 595 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 596 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 597 TokenType.REFRESH: lambda self: self._parse_refresh(), 598 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 599 TokenType.SET: lambda self: self._parse_set(), 600 TokenType.UNCACHE: lambda self: self._parse_uncache(), 601 TokenType.UPDATE: lambda self: self._parse_update(), 602 TokenType.USE: lambda self: self.expression( 603 exp.Use, 604 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 605 and exp.var(self._prev.text), 606 this=self._parse_table(schema=False), 607 ), 608 } 609 610 UNARY_PARSERS = { 611 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 612 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 613 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 614 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 615 } 616 617 PRIMARY_PARSERS = { 618 TokenType.STRING: lambda self, token: self.expression( 619 exp.Literal, this=token.text, is_string=True 620 ), 621 TokenType.NUMBER: lambda self, token: self.expression( 622 exp.Literal, this=token.text, is_string=False 623 ), 624 TokenType.STAR: lambda self, _: self.expression( 625 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 626 ), 627 TokenType.NULL: lambda self, _: self.expression(exp.Null), 628 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 629 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 630 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 631 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 632 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 633 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 634 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 635 exp.National, this=token.text 636 ), 637 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 638 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 639 exp.RawString, this=token.text 640 ), 641 TokenType.UNICODE_STRING: lambda self, token: self.expression( 642 exp.UnicodeString, 643 this=token.text, 644 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 645 ), 646 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 647 } 648 649 PLACEHOLDER_PARSERS = { 650 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 651 TokenType.PARAMETER: lambda self: self._parse_parameter(), 652 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 653 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 654 else None, 655 } 656 657 RANGE_PARSERS = { 658 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 659 TokenType.GLOB: binary_range_parser(exp.Glob), 660 TokenType.ILIKE: binary_range_parser(exp.ILike), 661 TokenType.IN: lambda self, this: self._parse_in(this), 662 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 663 TokenType.IS: lambda self, this: self._parse_is(this), 664 TokenType.LIKE: binary_range_parser(exp.Like), 665 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 666 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 667 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 668 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 669 } 670 671 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 672 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 673 "AUTO": lambda self: self._parse_auto_property(), 674 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 675 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 676 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 677 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 678 "CHECKSUM": lambda self: self._parse_checksum(), 679 "CLUSTER BY": lambda self: self._parse_cluster(), 680 "CLUSTERED": lambda self: self._parse_clustered_by(), 681 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 682 exp.CollateProperty, **kwargs 683 ), 684 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 685 "COPY": lambda self: self._parse_copy_property(), 686 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 687 "DEFINER": lambda self: self._parse_definer(), 688 "DETERMINISTIC": lambda self: self.expression( 689 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 690 ), 691 "DISTKEY": lambda self: self._parse_distkey(), 692 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 693 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 694 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 695 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 696 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 697 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 698 "FREESPACE": lambda self: self._parse_freespace(), 699 "HEAP": lambda self: self.expression(exp.HeapProperty), 700 "IMMUTABLE": lambda self: self.expression( 701 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 702 ), 703 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 704 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 705 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 706 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 707 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 708 "LIKE": lambda self: self._parse_create_like(), 709 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 710 "LOCK": lambda self: self._parse_locking(), 711 "LOCKING": lambda self: self._parse_locking(), 712 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 713 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 714 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 715 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 716 "NO": lambda self: self._parse_no_property(), 717 "ON": lambda self: self._parse_on_property(), 718 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 719 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 720 "PARTITION": lambda self: self._parse_partitioned_of(), 721 "PARTITION BY": lambda self: self._parse_partitioned_by(), 722 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 723 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 724 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 725 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 726 "REMOTE": lambda self: self._parse_remote_with_connection(), 727 "RETURNS": lambda self: self._parse_returns(), 728 "ROW": lambda self: self._parse_row(), 729 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 730 "SAMPLE": lambda self: self.expression( 731 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 732 ), 733 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 734 "SETTINGS": lambda self: self.expression( 735 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 736 ), 737 "SORTKEY": lambda self: self._parse_sortkey(), 738 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 739 "STABLE": lambda self: self.expression( 740 exp.StabilityProperty, this=exp.Literal.string("STABLE") 741 ), 742 "STORED": lambda self: self._parse_stored(), 743 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 744 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 745 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 746 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 747 "TO": lambda self: self._parse_to_table(), 748 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 749 "TRANSFORM": lambda self: self.expression( 750 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 751 ), 752 "TTL": lambda self: self._parse_ttl(), 753 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 754 "VOLATILE": lambda self: self._parse_volatile_property(), 755 "WITH": lambda self: self._parse_with_property(), 756 } 757 758 CONSTRAINT_PARSERS = { 759 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 760 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 761 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 762 "CHARACTER SET": lambda self: self.expression( 763 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 764 ), 765 "CHECK": lambda self: self.expression( 766 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 767 ), 768 "COLLATE": lambda self: self.expression( 769 exp.CollateColumnConstraint, this=self._parse_var() 770 ), 771 "COMMENT": lambda self: self.expression( 772 exp.CommentColumnConstraint, this=self._parse_string() 773 ), 774 "COMPRESS": lambda self: self._parse_compress(), 775 "CLUSTERED": lambda self: self.expression( 776 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 777 ), 778 "NONCLUSTERED": lambda self: self.expression( 779 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 780 ), 781 "DEFAULT": lambda self: self.expression( 782 exp.DefaultColumnConstraint, this=self._parse_bitwise() 783 ), 784 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 785 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 786 "FORMAT": lambda self: self.expression( 787 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 788 ), 789 "GENERATED": lambda self: self._parse_generated_as_identity(), 790 "IDENTITY": lambda self: self._parse_auto_increment(), 791 "INLINE": lambda self: self._parse_inline(), 792 "LIKE": lambda self: self._parse_create_like(), 793 "NOT": lambda self: self._parse_not_constraint(), 794 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 795 "ON": lambda self: ( 796 self._match(TokenType.UPDATE) 797 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 798 ) 799 or self.expression(exp.OnProperty, this=self._parse_id_var()), 800 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 801 "PERIOD": lambda self: self._parse_period_for_system_time(), 802 "PRIMARY KEY": lambda self: self._parse_primary_key(), 803 "REFERENCES": lambda self: self._parse_references(match=False), 804 "TITLE": lambda self: self.expression( 805 exp.TitleColumnConstraint, this=self._parse_var_or_string() 806 ), 807 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 808 "UNIQUE": lambda self: self._parse_unique(), 809 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 810 "WITH": lambda self: self.expression( 811 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 812 ), 813 } 814 815 ALTER_PARSERS = { 816 "ADD": lambda self: self._parse_alter_table_add(), 817 "ALTER": lambda self: self._parse_alter_table_alter(), 818 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 819 "DROP": lambda self: self._parse_alter_table_drop(), 820 "RENAME": lambda self: self._parse_alter_table_rename(), 821 } 822 823 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 824 825 NO_PAREN_FUNCTION_PARSERS = { 826 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 827 "CASE": lambda self: self._parse_case(), 828 "IF": lambda self: self._parse_if(), 829 "NEXT": lambda self: self._parse_next_value_for(), 830 } 831 832 INVALID_FUNC_NAME_TOKENS = { 833 TokenType.IDENTIFIER, 834 TokenType.STRING, 835 } 836 837 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 838 839 FUNCTION_PARSERS = { 840 "ANY_VALUE": lambda self: self._parse_any_value(), 841 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 842 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 843 "DECODE": lambda self: self._parse_decode(), 844 "EXTRACT": lambda self: self._parse_extract(), 845 "JSON_OBJECT": lambda self: self._parse_json_object(), 846 "JSON_TABLE": lambda self: self._parse_json_table(), 847 "MATCH": lambda self: self._parse_match_against(), 848 "OPENJSON": lambda self: self._parse_open_json(), 849 "POSITION": lambda self: self._parse_position(), 850 "PREDICT": lambda self: self._parse_predict(), 851 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 852 "STRING_AGG": lambda self: self._parse_string_agg(), 853 "SUBSTRING": lambda self: self._parse_substring(), 854 "TRIM": lambda self: self._parse_trim(), 855 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 856 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 857 } 858 859 QUERY_MODIFIER_PARSERS = { 860 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 861 TokenType.WHERE: lambda self: ("where", self._parse_where()), 862 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 863 TokenType.HAVING: lambda self: ("having", self._parse_having()), 864 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 865 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 866 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 867 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 868 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 869 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 870 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 871 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 872 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 873 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 874 TokenType.CLUSTER_BY: lambda self: ( 875 "cluster", 876 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 877 ), 878 TokenType.DISTRIBUTE_BY: lambda self: ( 879 "distribute", 880 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 881 ), 882 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 883 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 884 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 885 } 886 887 SET_PARSERS = { 888 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 889 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 890 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 891 "TRANSACTION": lambda self: self._parse_set_transaction(), 892 } 893 894 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 895 896 TYPE_LITERAL_PARSERS = { 897 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 898 } 899 900 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 901 902 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 903 904 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 905 906 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 907 TRANSACTION_CHARACTERISTICS = { 908 "ISOLATION LEVEL REPEATABLE READ", 909 "ISOLATION LEVEL READ COMMITTED", 910 "ISOLATION LEVEL READ UNCOMMITTED", 911 "ISOLATION LEVEL SERIALIZABLE", 912 "READ WRITE", 913 "READ ONLY", 914 } 915 916 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 917 918 CLONE_KEYWORDS = {"CLONE", "COPY"} 919 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 920 921 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 922 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 923 924 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 925 926 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 927 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 928 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 929 930 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 931 932 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 933 934 DISTINCT_TOKENS = {TokenType.DISTINCT} 935 936 NULL_TOKENS = {TokenType.NULL} 937 938 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 939 940 STRICT_CAST = True 941 942 PREFIXED_PIVOT_COLUMNS = False 943 IDENTIFY_PIVOT_STRINGS = False 944 945 LOG_DEFAULTS_TO_LN = False 946 947 # Whether or not ADD is present for each column added by ALTER TABLE 948 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 949 950 # Whether or not the table sample clause expects CSV syntax 951 TABLESAMPLE_CSV = False 952 953 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 954 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 955 956 # Whether the TRIM function expects the characters to trim as its first argument 957 TRIM_PATTERN_FIRST = False 958 959 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 960 MODIFIERS_ATTACHED_TO_UNION = True 961 UNION_MODIFIERS = {"order", "limit", "offset"} 962 963 __slots__ = ( 964 "error_level", 965 "error_message_context", 966 "max_errors", 967 "dialect", 968 "sql", 969 "errors", 970 "_tokens", 971 "_index", 972 "_curr", 973 "_next", 974 "_prev", 975 "_prev_comments", 976 ) 977 978 # Autofilled 979 SHOW_TRIE: t.Dict = {} 980 SET_TRIE: t.Dict = {} 981 982 def __init__( 983 self, 984 error_level: t.Optional[ErrorLevel] = None, 985 error_message_context: int = 100, 986 max_errors: int = 3, 987 dialect: DialectType = None, 988 ): 989 from sqlglot.dialects import Dialect 990 991 self.error_level = error_level or ErrorLevel.IMMEDIATE 992 self.error_message_context = error_message_context 993 self.max_errors = max_errors 994 self.dialect = Dialect.get_or_raise(dialect) 995 self.reset() 996 997 def reset(self): 998 self.sql = "" 999 self.errors = [] 1000 self._tokens = [] 1001 self._index = 0 1002 self._curr = None 1003 self._next = None 1004 self._prev = None 1005 self._prev_comments = None 1006 1007 def parse( 1008 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1009 ) -> t.List[t.Optional[exp.Expression]]: 1010 """ 1011 Parses a list of tokens and returns a list of syntax trees, one tree 1012 per parsed SQL statement. 1013 1014 Args: 1015 raw_tokens: The list of tokens. 1016 sql: The original SQL string, used to produce helpful debug messages. 1017 1018 Returns: 1019 The list of the produced syntax trees. 1020 """ 1021 return self._parse( 1022 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1023 ) 1024 1025 def parse_into( 1026 self, 1027 expression_types: exp.IntoType, 1028 raw_tokens: t.List[Token], 1029 sql: t.Optional[str] = None, 1030 ) -> t.List[t.Optional[exp.Expression]]: 1031 """ 1032 Parses a list of tokens into a given Expression type. If a collection of Expression 1033 types is given instead, this method will try to parse the token list into each one 1034 of them, stopping at the first for which the parsing succeeds. 1035 1036 Args: 1037 expression_types: The expression type(s) to try and parse the token list into. 1038 raw_tokens: The list of tokens. 1039 sql: The original SQL string, used to produce helpful debug messages. 1040 1041 Returns: 1042 The target Expression. 1043 """ 1044 errors = [] 1045 for expression_type in ensure_list(expression_types): 1046 parser = self.EXPRESSION_PARSERS.get(expression_type) 1047 if not parser: 1048 raise TypeError(f"No parser registered for {expression_type}") 1049 1050 try: 1051 return self._parse(parser, raw_tokens, sql) 1052 except ParseError as e: 1053 e.errors[0]["into_expression"] = expression_type 1054 errors.append(e) 1055 1056 raise ParseError( 1057 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1058 errors=merge_errors(errors), 1059 ) from errors[-1] 1060 1061 def _parse( 1062 self, 1063 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1064 raw_tokens: t.List[Token], 1065 sql: t.Optional[str] = None, 1066 ) -> t.List[t.Optional[exp.Expression]]: 1067 self.reset() 1068 self.sql = sql or "" 1069 1070 total = len(raw_tokens) 1071 chunks: t.List[t.List[Token]] = [[]] 1072 1073 for i, token in enumerate(raw_tokens): 1074 if token.token_type == TokenType.SEMICOLON: 1075 if i < total - 1: 1076 chunks.append([]) 1077 else: 1078 chunks[-1].append(token) 1079 1080 expressions = [] 1081 1082 for tokens in chunks: 1083 self._index = -1 1084 self._tokens = tokens 1085 self._advance() 1086 1087 expressions.append(parse_method(self)) 1088 1089 if self._index < len(self._tokens): 1090 self.raise_error("Invalid expression / Unexpected token") 1091 1092 self.check_errors() 1093 1094 return expressions 1095 1096 def check_errors(self) -> None: 1097 """Logs or raises any found errors, depending on the chosen error level setting.""" 1098 if self.error_level == ErrorLevel.WARN: 1099 for error in self.errors: 1100 logger.error(str(error)) 1101 elif self.error_level == ErrorLevel.RAISE and self.errors: 1102 raise ParseError( 1103 concat_messages(self.errors, self.max_errors), 1104 errors=merge_errors(self.errors), 1105 ) 1106 1107 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1108 """ 1109 Appends an error in the list of recorded errors or raises it, depending on the chosen 1110 error level setting. 1111 """ 1112 token = token or self._curr or self._prev or Token.string("") 1113 start = token.start 1114 end = token.end + 1 1115 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1116 highlight = self.sql[start:end] 1117 end_context = self.sql[end : end + self.error_message_context] 1118 1119 error = ParseError.new( 1120 f"{message}. Line {token.line}, Col: {token.col}.\n" 1121 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1122 description=message, 1123 line=token.line, 1124 col=token.col, 1125 start_context=start_context, 1126 highlight=highlight, 1127 end_context=end_context, 1128 ) 1129 1130 if self.error_level == ErrorLevel.IMMEDIATE: 1131 raise error 1132 1133 self.errors.append(error) 1134 1135 def expression( 1136 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1137 ) -> E: 1138 """ 1139 Creates a new, validated Expression. 1140 1141 Args: 1142 exp_class: The expression class to instantiate. 1143 comments: An optional list of comments to attach to the expression. 1144 kwargs: The arguments to set for the expression along with their respective values. 1145 1146 Returns: 1147 The target expression. 1148 """ 1149 instance = exp_class(**kwargs) 1150 instance.add_comments(comments) if comments else self._add_comments(instance) 1151 return self.validate_expression(instance) 1152 1153 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1154 if expression and self._prev_comments: 1155 expression.add_comments(self._prev_comments) 1156 self._prev_comments = None 1157 1158 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1159 """ 1160 Validates an Expression, making sure that all its mandatory arguments are set. 1161 1162 Args: 1163 expression: The expression to validate. 1164 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1165 1166 Returns: 1167 The validated expression. 1168 """ 1169 if self.error_level != ErrorLevel.IGNORE: 1170 for error_message in expression.error_messages(args): 1171 self.raise_error(error_message) 1172 1173 return expression 1174 1175 def _find_sql(self, start: Token, end: Token) -> str: 1176 return self.sql[start.start : end.end + 1] 1177 1178 def _is_connected(self) -> bool: 1179 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1180 1181 def _advance(self, times: int = 1) -> None: 1182 self._index += times 1183 self._curr = seq_get(self._tokens, self._index) 1184 self._next = seq_get(self._tokens, self._index + 1) 1185 1186 if self._index > 0: 1187 self._prev = self._tokens[self._index - 1] 1188 self._prev_comments = self._prev.comments 1189 else: 1190 self._prev = None 1191 self._prev_comments = None 1192 1193 def _retreat(self, index: int) -> None: 1194 if index != self._index: 1195 self._advance(index - self._index) 1196 1197 def _parse_command(self) -> exp.Command: 1198 return self.expression( 1199 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1200 ) 1201 1202 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1203 start = self._prev 1204 exists = self._parse_exists() if allow_exists else None 1205 1206 self._match(TokenType.ON) 1207 1208 kind = self._match_set(self.CREATABLES) and self._prev 1209 if not kind: 1210 return self._parse_as_command(start) 1211 1212 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1213 this = self._parse_user_defined_function(kind=kind.token_type) 1214 elif kind.token_type == TokenType.TABLE: 1215 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1216 elif kind.token_type == TokenType.COLUMN: 1217 this = self._parse_column() 1218 else: 1219 this = self._parse_id_var() 1220 1221 self._match(TokenType.IS) 1222 1223 return self.expression( 1224 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1225 ) 1226 1227 def _parse_to_table( 1228 self, 1229 ) -> exp.ToTableProperty: 1230 table = self._parse_table_parts(schema=True) 1231 return self.expression(exp.ToTableProperty, this=table) 1232 1233 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1234 def _parse_ttl(self) -> exp.Expression: 1235 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1236 this = self._parse_bitwise() 1237 1238 if self._match_text_seq("DELETE"): 1239 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1240 if self._match_text_seq("RECOMPRESS"): 1241 return self.expression( 1242 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1243 ) 1244 if self._match_text_seq("TO", "DISK"): 1245 return self.expression( 1246 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1247 ) 1248 if self._match_text_seq("TO", "VOLUME"): 1249 return self.expression( 1250 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1251 ) 1252 1253 return this 1254 1255 expressions = self._parse_csv(_parse_ttl_action) 1256 where = self._parse_where() 1257 group = self._parse_group() 1258 1259 aggregates = None 1260 if group and self._match(TokenType.SET): 1261 aggregates = self._parse_csv(self._parse_set_item) 1262 1263 return self.expression( 1264 exp.MergeTreeTTL, 1265 expressions=expressions, 1266 where=where, 1267 group=group, 1268 aggregates=aggregates, 1269 ) 1270 1271 def _parse_statement(self) -> t.Optional[exp.Expression]: 1272 if self._curr is None: 1273 return None 1274 1275 if self._match_set(self.STATEMENT_PARSERS): 1276 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1277 1278 if self._match_set(Tokenizer.COMMANDS): 1279 return self._parse_command() 1280 1281 expression = self._parse_expression() 1282 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1283 return self._parse_query_modifiers(expression) 1284 1285 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1286 start = self._prev 1287 temporary = self._match(TokenType.TEMPORARY) 1288 materialized = self._match_text_seq("MATERIALIZED") 1289 1290 kind = self._match_set(self.CREATABLES) and self._prev.text 1291 if not kind: 1292 return self._parse_as_command(start) 1293 1294 return self.expression( 1295 exp.Drop, 1296 comments=start.comments, 1297 exists=exists or self._parse_exists(), 1298 this=self._parse_table(schema=True), 1299 kind=kind, 1300 temporary=temporary, 1301 materialized=materialized, 1302 cascade=self._match_text_seq("CASCADE"), 1303 constraints=self._match_text_seq("CONSTRAINTS"), 1304 purge=self._match_text_seq("PURGE"), 1305 ) 1306 1307 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1308 return ( 1309 self._match_text_seq("IF") 1310 and (not not_ or self._match(TokenType.NOT)) 1311 and self._match(TokenType.EXISTS) 1312 ) 1313 1314 def _parse_create(self) -> exp.Create | exp.Command: 1315 # Note: this can't be None because we've matched a statement parser 1316 start = self._prev 1317 comments = self._prev_comments 1318 1319 replace = start.text.upper() == "REPLACE" or self._match_pair( 1320 TokenType.OR, TokenType.REPLACE 1321 ) 1322 unique = self._match(TokenType.UNIQUE) 1323 1324 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1325 self._advance() 1326 1327 properties = None 1328 create_token = self._match_set(self.CREATABLES) and self._prev 1329 1330 if not create_token: 1331 # exp.Properties.Location.POST_CREATE 1332 properties = self._parse_properties() 1333 create_token = self._match_set(self.CREATABLES) and self._prev 1334 1335 if not properties or not create_token: 1336 return self._parse_as_command(start) 1337 1338 exists = self._parse_exists(not_=True) 1339 this = None 1340 expression: t.Optional[exp.Expression] = None 1341 indexes = None 1342 no_schema_binding = None 1343 begin = None 1344 end = None 1345 clone = None 1346 1347 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1348 nonlocal properties 1349 if properties and temp_props: 1350 properties.expressions.extend(temp_props.expressions) 1351 elif temp_props: 1352 properties = temp_props 1353 1354 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1355 this = self._parse_user_defined_function(kind=create_token.token_type) 1356 1357 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1358 extend_props(self._parse_properties()) 1359 1360 self._match(TokenType.ALIAS) 1361 1362 if self._match(TokenType.COMMAND): 1363 expression = self._parse_as_command(self._prev) 1364 else: 1365 begin = self._match(TokenType.BEGIN) 1366 return_ = self._match_text_seq("RETURN") 1367 1368 if self._match(TokenType.STRING, advance=False): 1369 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1370 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1371 expression = self._parse_string() 1372 extend_props(self._parse_properties()) 1373 else: 1374 expression = self._parse_statement() 1375 1376 end = self._match_text_seq("END") 1377 1378 if return_: 1379 expression = self.expression(exp.Return, this=expression) 1380 elif create_token.token_type == TokenType.INDEX: 1381 this = self._parse_index(index=self._parse_id_var()) 1382 elif create_token.token_type in self.DB_CREATABLES: 1383 table_parts = self._parse_table_parts(schema=True) 1384 1385 # exp.Properties.Location.POST_NAME 1386 self._match(TokenType.COMMA) 1387 extend_props(self._parse_properties(before=True)) 1388 1389 this = self._parse_schema(this=table_parts) 1390 1391 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1392 extend_props(self._parse_properties()) 1393 1394 self._match(TokenType.ALIAS) 1395 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1396 # exp.Properties.Location.POST_ALIAS 1397 extend_props(self._parse_properties()) 1398 1399 expression = self._parse_ddl_select() 1400 1401 if create_token.token_type == TokenType.TABLE: 1402 # exp.Properties.Location.POST_EXPRESSION 1403 extend_props(self._parse_properties()) 1404 1405 indexes = [] 1406 while True: 1407 index = self._parse_index() 1408 1409 # exp.Properties.Location.POST_INDEX 1410 extend_props(self._parse_properties()) 1411 1412 if not index: 1413 break 1414 else: 1415 self._match(TokenType.COMMA) 1416 indexes.append(index) 1417 elif create_token.token_type == TokenType.VIEW: 1418 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1419 no_schema_binding = True 1420 1421 shallow = self._match_text_seq("SHALLOW") 1422 1423 if self._match_texts(self.CLONE_KEYWORDS): 1424 copy = self._prev.text.lower() == "copy" 1425 clone = self.expression( 1426 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1427 ) 1428 1429 return self.expression( 1430 exp.Create, 1431 comments=comments, 1432 this=this, 1433 kind=create_token.text.upper(), 1434 replace=replace, 1435 unique=unique, 1436 expression=expression, 1437 exists=exists, 1438 properties=properties, 1439 indexes=indexes, 1440 no_schema_binding=no_schema_binding, 1441 begin=begin, 1442 end=end, 1443 clone=clone, 1444 ) 1445 1446 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1447 # only used for teradata currently 1448 self._match(TokenType.COMMA) 1449 1450 kwargs = { 1451 "no": self._match_text_seq("NO"), 1452 "dual": self._match_text_seq("DUAL"), 1453 "before": self._match_text_seq("BEFORE"), 1454 "default": self._match_text_seq("DEFAULT"), 1455 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1456 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1457 "after": self._match_text_seq("AFTER"), 1458 "minimum": self._match_texts(("MIN", "MINIMUM")), 1459 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1460 } 1461 1462 if self._match_texts(self.PROPERTY_PARSERS): 1463 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1464 try: 1465 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1466 except TypeError: 1467 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1468 1469 return None 1470 1471 def _parse_property(self) -> t.Optional[exp.Expression]: 1472 if self._match_texts(self.PROPERTY_PARSERS): 1473 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1474 1475 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1476 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1477 1478 if self._match_text_seq("COMPOUND", "SORTKEY"): 1479 return self._parse_sortkey(compound=True) 1480 1481 if self._match_text_seq("SQL", "SECURITY"): 1482 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1483 1484 index = self._index 1485 key = self._parse_column() 1486 1487 if not self._match(TokenType.EQ): 1488 self._retreat(index) 1489 return None 1490 1491 return self.expression( 1492 exp.Property, 1493 this=key.to_dot() if isinstance(key, exp.Column) else key, 1494 value=self._parse_column() or self._parse_var(any_token=True), 1495 ) 1496 1497 def _parse_stored(self) -> exp.FileFormatProperty: 1498 self._match(TokenType.ALIAS) 1499 1500 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1501 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1502 1503 return self.expression( 1504 exp.FileFormatProperty, 1505 this=self.expression( 1506 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1507 ) 1508 if input_format or output_format 1509 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1510 ) 1511 1512 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1513 self._match(TokenType.EQ) 1514 self._match(TokenType.ALIAS) 1515 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1516 1517 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1518 properties = [] 1519 while True: 1520 if before: 1521 prop = self._parse_property_before() 1522 else: 1523 prop = self._parse_property() 1524 1525 if not prop: 1526 break 1527 for p in ensure_list(prop): 1528 properties.append(p) 1529 1530 if properties: 1531 return self.expression(exp.Properties, expressions=properties) 1532 1533 return None 1534 1535 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1536 return self.expression( 1537 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1538 ) 1539 1540 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1541 if self._index >= 2: 1542 pre_volatile_token = self._tokens[self._index - 2] 1543 else: 1544 pre_volatile_token = None 1545 1546 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1547 return exp.VolatileProperty() 1548 1549 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1550 1551 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1552 self._match_pair(TokenType.EQ, TokenType.ON) 1553 1554 prop = self.expression(exp.WithSystemVersioningProperty) 1555 if self._match(TokenType.L_PAREN): 1556 self._match_text_seq("HISTORY_TABLE", "=") 1557 prop.set("this", self._parse_table_parts()) 1558 1559 if self._match(TokenType.COMMA): 1560 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1561 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1562 1563 self._match_r_paren() 1564 1565 return prop 1566 1567 def _parse_with_property( 1568 self, 1569 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1570 if self._match(TokenType.L_PAREN, advance=False): 1571 return self._parse_wrapped_csv(self._parse_property) 1572 1573 if self._match_text_seq("JOURNAL"): 1574 return self._parse_withjournaltable() 1575 1576 if self._match_text_seq("DATA"): 1577 return self._parse_withdata(no=False) 1578 elif self._match_text_seq("NO", "DATA"): 1579 return self._parse_withdata(no=True) 1580 1581 if not self._next: 1582 return None 1583 1584 return self._parse_withisolatedloading() 1585 1586 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1587 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1588 self._match(TokenType.EQ) 1589 1590 user = self._parse_id_var() 1591 self._match(TokenType.PARAMETER) 1592 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1593 1594 if not user or not host: 1595 return None 1596 1597 return exp.DefinerProperty(this=f"{user}@{host}") 1598 1599 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1600 self._match(TokenType.TABLE) 1601 self._match(TokenType.EQ) 1602 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1603 1604 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1605 return self.expression(exp.LogProperty, no=no) 1606 1607 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1608 return self.expression(exp.JournalProperty, **kwargs) 1609 1610 def _parse_checksum(self) -> exp.ChecksumProperty: 1611 self._match(TokenType.EQ) 1612 1613 on = None 1614 if self._match(TokenType.ON): 1615 on = True 1616 elif self._match_text_seq("OFF"): 1617 on = False 1618 1619 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1620 1621 def _parse_cluster(self) -> exp.Cluster: 1622 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1623 1624 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1625 self._match_text_seq("BY") 1626 1627 self._match_l_paren() 1628 expressions = self._parse_csv(self._parse_column) 1629 self._match_r_paren() 1630 1631 if self._match_text_seq("SORTED", "BY"): 1632 self._match_l_paren() 1633 sorted_by = self._parse_csv(self._parse_ordered) 1634 self._match_r_paren() 1635 else: 1636 sorted_by = None 1637 1638 self._match(TokenType.INTO) 1639 buckets = self._parse_number() 1640 self._match_text_seq("BUCKETS") 1641 1642 return self.expression( 1643 exp.ClusteredByProperty, 1644 expressions=expressions, 1645 sorted_by=sorted_by, 1646 buckets=buckets, 1647 ) 1648 1649 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1650 if not self._match_text_seq("GRANTS"): 1651 self._retreat(self._index - 1) 1652 return None 1653 1654 return self.expression(exp.CopyGrantsProperty) 1655 1656 def _parse_freespace(self) -> exp.FreespaceProperty: 1657 self._match(TokenType.EQ) 1658 return self.expression( 1659 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1660 ) 1661 1662 def _parse_mergeblockratio( 1663 self, no: bool = False, default: bool = False 1664 ) -> exp.MergeBlockRatioProperty: 1665 if self._match(TokenType.EQ): 1666 return self.expression( 1667 exp.MergeBlockRatioProperty, 1668 this=self._parse_number(), 1669 percent=self._match(TokenType.PERCENT), 1670 ) 1671 1672 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1673 1674 def _parse_datablocksize( 1675 self, 1676 default: t.Optional[bool] = None, 1677 minimum: t.Optional[bool] = None, 1678 maximum: t.Optional[bool] = None, 1679 ) -> exp.DataBlocksizeProperty: 1680 self._match(TokenType.EQ) 1681 size = self._parse_number() 1682 1683 units = None 1684 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1685 units = self._prev.text 1686 1687 return self.expression( 1688 exp.DataBlocksizeProperty, 1689 size=size, 1690 units=units, 1691 default=default, 1692 minimum=minimum, 1693 maximum=maximum, 1694 ) 1695 1696 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1697 self._match(TokenType.EQ) 1698 always = self._match_text_seq("ALWAYS") 1699 manual = self._match_text_seq("MANUAL") 1700 never = self._match_text_seq("NEVER") 1701 default = self._match_text_seq("DEFAULT") 1702 1703 autotemp = None 1704 if self._match_text_seq("AUTOTEMP"): 1705 autotemp = self._parse_schema() 1706 1707 return self.expression( 1708 exp.BlockCompressionProperty, 1709 always=always, 1710 manual=manual, 1711 never=never, 1712 default=default, 1713 autotemp=autotemp, 1714 ) 1715 1716 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1717 no = self._match_text_seq("NO") 1718 concurrent = self._match_text_seq("CONCURRENT") 1719 self._match_text_seq("ISOLATED", "LOADING") 1720 for_all = self._match_text_seq("FOR", "ALL") 1721 for_insert = self._match_text_seq("FOR", "INSERT") 1722 for_none = self._match_text_seq("FOR", "NONE") 1723 return self.expression( 1724 exp.IsolatedLoadingProperty, 1725 no=no, 1726 concurrent=concurrent, 1727 for_all=for_all, 1728 for_insert=for_insert, 1729 for_none=for_none, 1730 ) 1731 1732 def _parse_locking(self) -> exp.LockingProperty: 1733 if self._match(TokenType.TABLE): 1734 kind = "TABLE" 1735 elif self._match(TokenType.VIEW): 1736 kind = "VIEW" 1737 elif self._match(TokenType.ROW): 1738 kind = "ROW" 1739 elif self._match_text_seq("DATABASE"): 1740 kind = "DATABASE" 1741 else: 1742 kind = None 1743 1744 if kind in ("DATABASE", "TABLE", "VIEW"): 1745 this = self._parse_table_parts() 1746 else: 1747 this = None 1748 1749 if self._match(TokenType.FOR): 1750 for_or_in = "FOR" 1751 elif self._match(TokenType.IN): 1752 for_or_in = "IN" 1753 else: 1754 for_or_in = None 1755 1756 if self._match_text_seq("ACCESS"): 1757 lock_type = "ACCESS" 1758 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1759 lock_type = "EXCLUSIVE" 1760 elif self._match_text_seq("SHARE"): 1761 lock_type = "SHARE" 1762 elif self._match_text_seq("READ"): 1763 lock_type = "READ" 1764 elif self._match_text_seq("WRITE"): 1765 lock_type = "WRITE" 1766 elif self._match_text_seq("CHECKSUM"): 1767 lock_type = "CHECKSUM" 1768 else: 1769 lock_type = None 1770 1771 override = self._match_text_seq("OVERRIDE") 1772 1773 return self.expression( 1774 exp.LockingProperty, 1775 this=this, 1776 kind=kind, 1777 for_or_in=for_or_in, 1778 lock_type=lock_type, 1779 override=override, 1780 ) 1781 1782 def _parse_partition_by(self) -> t.List[exp.Expression]: 1783 if self._match(TokenType.PARTITION_BY): 1784 return self._parse_csv(self._parse_conjunction) 1785 return [] 1786 1787 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1788 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1789 if self._match_text_seq("MINVALUE"): 1790 return exp.var("MINVALUE") 1791 if self._match_text_seq("MAXVALUE"): 1792 return exp.var("MAXVALUE") 1793 return self._parse_bitwise() 1794 1795 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1796 expression = None 1797 from_expressions = None 1798 to_expressions = None 1799 1800 if self._match(TokenType.IN): 1801 this = self._parse_wrapped_csv(self._parse_bitwise) 1802 elif self._match(TokenType.FROM): 1803 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1804 self._match_text_seq("TO") 1805 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1806 elif self._match_text_seq("WITH", "(", "MODULUS"): 1807 this = self._parse_number() 1808 self._match_text_seq(",", "REMAINDER") 1809 expression = self._parse_number() 1810 self._match_r_paren() 1811 else: 1812 self.raise_error("Failed to parse partition bound spec.") 1813 1814 return self.expression( 1815 exp.PartitionBoundSpec, 1816 this=this, 1817 expression=expression, 1818 from_expressions=from_expressions, 1819 to_expressions=to_expressions, 1820 ) 1821 1822 # https://www.postgresql.org/docs/current/sql-createtable.html 1823 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1824 if not self._match_text_seq("OF"): 1825 self._retreat(self._index - 1) 1826 return None 1827 1828 this = self._parse_table(schema=True) 1829 1830 if self._match(TokenType.DEFAULT): 1831 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1832 elif self._match_text_seq("FOR", "VALUES"): 1833 expression = self._parse_partition_bound_spec() 1834 else: 1835 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1836 1837 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1838 1839 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1840 self._match(TokenType.EQ) 1841 return self.expression( 1842 exp.PartitionedByProperty, 1843 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1844 ) 1845 1846 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1847 if self._match_text_seq("AND", "STATISTICS"): 1848 statistics = True 1849 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1850 statistics = False 1851 else: 1852 statistics = None 1853 1854 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1855 1856 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1857 if self._match_text_seq("PRIMARY", "INDEX"): 1858 return exp.NoPrimaryIndexProperty() 1859 return None 1860 1861 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1862 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1863 return exp.OnCommitProperty() 1864 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1865 return exp.OnCommitProperty(delete=True) 1866 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1867 1868 def _parse_distkey(self) -> exp.DistKeyProperty: 1869 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1870 1871 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1872 table = self._parse_table(schema=True) 1873 1874 options = [] 1875 while self._match_texts(("INCLUDING", "EXCLUDING")): 1876 this = self._prev.text.upper() 1877 1878 id_var = self._parse_id_var() 1879 if not id_var: 1880 return None 1881 1882 options.append( 1883 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1884 ) 1885 1886 return self.expression(exp.LikeProperty, this=table, expressions=options) 1887 1888 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1889 return self.expression( 1890 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1891 ) 1892 1893 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1894 self._match(TokenType.EQ) 1895 return self.expression( 1896 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1897 ) 1898 1899 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1900 self._match_text_seq("WITH", "CONNECTION") 1901 return self.expression( 1902 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1903 ) 1904 1905 def _parse_returns(self) -> exp.ReturnsProperty: 1906 value: t.Optional[exp.Expression] 1907 is_table = self._match(TokenType.TABLE) 1908 1909 if is_table: 1910 if self._match(TokenType.LT): 1911 value = self.expression( 1912 exp.Schema, 1913 this="TABLE", 1914 expressions=self._parse_csv(self._parse_struct_types), 1915 ) 1916 if not self._match(TokenType.GT): 1917 self.raise_error("Expecting >") 1918 else: 1919 value = self._parse_schema(exp.var("TABLE")) 1920 else: 1921 value = self._parse_types() 1922 1923 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1924 1925 def _parse_describe(self) -> exp.Describe: 1926 kind = self._match_set(self.CREATABLES) and self._prev.text 1927 this = self._parse_table(schema=True) 1928 properties = self._parse_properties() 1929 expressions = properties.expressions if properties else None 1930 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1931 1932 def _parse_insert(self) -> exp.Insert: 1933 comments = ensure_list(self._prev_comments) 1934 overwrite = self._match(TokenType.OVERWRITE) 1935 ignore = self._match(TokenType.IGNORE) 1936 local = self._match_text_seq("LOCAL") 1937 alternative = None 1938 1939 if self._match_text_seq("DIRECTORY"): 1940 this: t.Optional[exp.Expression] = self.expression( 1941 exp.Directory, 1942 this=self._parse_var_or_string(), 1943 local=local, 1944 row_format=self._parse_row_format(match_row=True), 1945 ) 1946 else: 1947 if self._match(TokenType.OR): 1948 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1949 1950 self._match(TokenType.INTO) 1951 comments += ensure_list(self._prev_comments) 1952 self._match(TokenType.TABLE) 1953 this = self._parse_table(schema=True) 1954 1955 returning = self._parse_returning() 1956 1957 return self.expression( 1958 exp.Insert, 1959 comments=comments, 1960 this=this, 1961 by_name=self._match_text_seq("BY", "NAME"), 1962 exists=self._parse_exists(), 1963 partition=self._parse_partition(), 1964 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1965 and self._parse_conjunction(), 1966 expression=self._parse_ddl_select(), 1967 conflict=self._parse_on_conflict(), 1968 returning=returning or self._parse_returning(), 1969 overwrite=overwrite, 1970 alternative=alternative, 1971 ignore=ignore, 1972 ) 1973 1974 def _parse_kill(self) -> exp.Kill: 1975 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1976 1977 return self.expression( 1978 exp.Kill, 1979 this=self._parse_primary(), 1980 kind=kind, 1981 ) 1982 1983 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1984 conflict = self._match_text_seq("ON", "CONFLICT") 1985 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1986 1987 if not conflict and not duplicate: 1988 return None 1989 1990 nothing = None 1991 expressions = None 1992 key = None 1993 constraint = None 1994 1995 if conflict: 1996 if self._match_text_seq("ON", "CONSTRAINT"): 1997 constraint = self._parse_id_var() 1998 else: 1999 key = self._parse_csv(self._parse_value) 2000 2001 self._match_text_seq("DO") 2002 if self._match_text_seq("NOTHING"): 2003 nothing = True 2004 else: 2005 self._match(TokenType.UPDATE) 2006 self._match(TokenType.SET) 2007 expressions = self._parse_csv(self._parse_equality) 2008 2009 return self.expression( 2010 exp.OnConflict, 2011 duplicate=duplicate, 2012 expressions=expressions, 2013 nothing=nothing, 2014 key=key, 2015 constraint=constraint, 2016 ) 2017 2018 def _parse_returning(self) -> t.Optional[exp.Returning]: 2019 if not self._match(TokenType.RETURNING): 2020 return None 2021 return self.expression( 2022 exp.Returning, 2023 expressions=self._parse_csv(self._parse_expression), 2024 into=self._match(TokenType.INTO) and self._parse_table_part(), 2025 ) 2026 2027 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2028 if not self._match(TokenType.FORMAT): 2029 return None 2030 return self._parse_row_format() 2031 2032 def _parse_row_format( 2033 self, match_row: bool = False 2034 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2035 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2036 return None 2037 2038 if self._match_text_seq("SERDE"): 2039 this = self._parse_string() 2040 2041 serde_properties = None 2042 if self._match(TokenType.SERDE_PROPERTIES): 2043 serde_properties = self.expression( 2044 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2045 ) 2046 2047 return self.expression( 2048 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2049 ) 2050 2051 self._match_text_seq("DELIMITED") 2052 2053 kwargs = {} 2054 2055 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2056 kwargs["fields"] = self._parse_string() 2057 if self._match_text_seq("ESCAPED", "BY"): 2058 kwargs["escaped"] = self._parse_string() 2059 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2060 kwargs["collection_items"] = self._parse_string() 2061 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2062 kwargs["map_keys"] = self._parse_string() 2063 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2064 kwargs["lines"] = self._parse_string() 2065 if self._match_text_seq("NULL", "DEFINED", "AS"): 2066 kwargs["null"] = self._parse_string() 2067 2068 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2069 2070 def _parse_load(self) -> exp.LoadData | exp.Command: 2071 if self._match_text_seq("DATA"): 2072 local = self._match_text_seq("LOCAL") 2073 self._match_text_seq("INPATH") 2074 inpath = self._parse_string() 2075 overwrite = self._match(TokenType.OVERWRITE) 2076 self._match_pair(TokenType.INTO, TokenType.TABLE) 2077 2078 return self.expression( 2079 exp.LoadData, 2080 this=self._parse_table(schema=True), 2081 local=local, 2082 overwrite=overwrite, 2083 inpath=inpath, 2084 partition=self._parse_partition(), 2085 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2086 serde=self._match_text_seq("SERDE") and self._parse_string(), 2087 ) 2088 return self._parse_as_command(self._prev) 2089 2090 def _parse_delete(self) -> exp.Delete: 2091 # This handles MySQL's "Multiple-Table Syntax" 2092 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2093 tables = None 2094 comments = self._prev_comments 2095 if not self._match(TokenType.FROM, advance=False): 2096 tables = self._parse_csv(self._parse_table) or None 2097 2098 returning = self._parse_returning() 2099 2100 return self.expression( 2101 exp.Delete, 2102 comments=comments, 2103 tables=tables, 2104 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2105 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2106 where=self._parse_where(), 2107 returning=returning or self._parse_returning(), 2108 limit=self._parse_limit(), 2109 ) 2110 2111 def _parse_update(self) -> exp.Update: 2112 comments = self._prev_comments 2113 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2114 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2115 returning = self._parse_returning() 2116 return self.expression( 2117 exp.Update, 2118 comments=comments, 2119 **{ # type: ignore 2120 "this": this, 2121 "expressions": expressions, 2122 "from": self._parse_from(joins=True), 2123 "where": self._parse_where(), 2124 "returning": returning or self._parse_returning(), 2125 "order": self._parse_order(), 2126 "limit": self._parse_limit(), 2127 }, 2128 ) 2129 2130 def _parse_uncache(self) -> exp.Uncache: 2131 if not self._match(TokenType.TABLE): 2132 self.raise_error("Expecting TABLE after UNCACHE") 2133 2134 return self.expression( 2135 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2136 ) 2137 2138 def _parse_cache(self) -> exp.Cache: 2139 lazy = self._match_text_seq("LAZY") 2140 self._match(TokenType.TABLE) 2141 table = self._parse_table(schema=True) 2142 2143 options = [] 2144 if self._match_text_seq("OPTIONS"): 2145 self._match_l_paren() 2146 k = self._parse_string() 2147 self._match(TokenType.EQ) 2148 v = self._parse_string() 2149 options = [k, v] 2150 self._match_r_paren() 2151 2152 self._match(TokenType.ALIAS) 2153 return self.expression( 2154 exp.Cache, 2155 this=table, 2156 lazy=lazy, 2157 options=options, 2158 expression=self._parse_select(nested=True), 2159 ) 2160 2161 def _parse_partition(self) -> t.Optional[exp.Partition]: 2162 if not self._match(TokenType.PARTITION): 2163 return None 2164 2165 return self.expression( 2166 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2167 ) 2168 2169 def _parse_value(self) -> exp.Tuple: 2170 if self._match(TokenType.L_PAREN): 2171 expressions = self._parse_csv(self._parse_expression) 2172 self._match_r_paren() 2173 return self.expression(exp.Tuple, expressions=expressions) 2174 2175 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2176 # https://prestodb.io/docs/current/sql/values.html 2177 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2178 2179 def _parse_projections(self) -> t.List[exp.Expression]: 2180 return self._parse_expressions() 2181 2182 def _parse_select( 2183 self, 2184 nested: bool = False, 2185 table: bool = False, 2186 parse_subquery_alias: bool = True, 2187 parse_set_operation: bool = True, 2188 ) -> t.Optional[exp.Expression]: 2189 cte = self._parse_with() 2190 2191 if cte: 2192 this = self._parse_statement() 2193 2194 if not this: 2195 self.raise_error("Failed to parse any statement following CTE") 2196 return cte 2197 2198 if "with" in this.arg_types: 2199 this.set("with", cte) 2200 else: 2201 self.raise_error(f"{this.key} does not support CTE") 2202 this = cte 2203 2204 return this 2205 2206 # duckdb supports leading with FROM x 2207 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2208 2209 if self._match(TokenType.SELECT): 2210 comments = self._prev_comments 2211 2212 hint = self._parse_hint() 2213 all_ = self._match(TokenType.ALL) 2214 distinct = self._match_set(self.DISTINCT_TOKENS) 2215 2216 kind = ( 2217 self._match(TokenType.ALIAS) 2218 and self._match_texts(("STRUCT", "VALUE")) 2219 and self._prev.text.upper() 2220 ) 2221 2222 if distinct: 2223 distinct = self.expression( 2224 exp.Distinct, 2225 on=self._parse_value() if self._match(TokenType.ON) else None, 2226 ) 2227 2228 if all_ and distinct: 2229 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2230 2231 limit = self._parse_limit(top=True) 2232 projections = self._parse_projections() 2233 2234 this = self.expression( 2235 exp.Select, 2236 kind=kind, 2237 hint=hint, 2238 distinct=distinct, 2239 expressions=projections, 2240 limit=limit, 2241 ) 2242 this.comments = comments 2243 2244 into = self._parse_into() 2245 if into: 2246 this.set("into", into) 2247 2248 if not from_: 2249 from_ = self._parse_from() 2250 2251 if from_: 2252 this.set("from", from_) 2253 2254 this = self._parse_query_modifiers(this) 2255 elif (table or nested) and self._match(TokenType.L_PAREN): 2256 if self._match(TokenType.PIVOT): 2257 this = self._parse_simplified_pivot() 2258 elif self._match(TokenType.FROM): 2259 this = exp.select("*").from_( 2260 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2261 ) 2262 else: 2263 this = ( 2264 self._parse_table() 2265 if table 2266 else self._parse_select(nested=True, parse_set_operation=False) 2267 ) 2268 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2269 2270 self._match_r_paren() 2271 2272 # We return early here so that the UNION isn't attached to the subquery by the 2273 # following call to _parse_set_operations, but instead becomes the parent node 2274 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2275 elif self._match(TokenType.VALUES): 2276 this = self.expression( 2277 exp.Values, 2278 expressions=self._parse_csv(self._parse_value), 2279 alias=self._parse_table_alias(), 2280 ) 2281 elif from_: 2282 this = exp.select("*").from_(from_.this, copy=False) 2283 else: 2284 this = None 2285 2286 if parse_set_operation: 2287 return self._parse_set_operations(this) 2288 return this 2289 2290 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2291 if not skip_with_token and not self._match(TokenType.WITH): 2292 return None 2293 2294 comments = self._prev_comments 2295 recursive = self._match(TokenType.RECURSIVE) 2296 2297 expressions = [] 2298 while True: 2299 expressions.append(self._parse_cte()) 2300 2301 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2302 break 2303 else: 2304 self._match(TokenType.WITH) 2305 2306 return self.expression( 2307 exp.With, comments=comments, expressions=expressions, recursive=recursive 2308 ) 2309 2310 def _parse_cte(self) -> exp.CTE: 2311 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2312 if not alias or not alias.this: 2313 self.raise_error("Expected CTE to have alias") 2314 2315 self._match(TokenType.ALIAS) 2316 return self.expression( 2317 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2318 ) 2319 2320 def _parse_table_alias( 2321 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2322 ) -> t.Optional[exp.TableAlias]: 2323 any_token = self._match(TokenType.ALIAS) 2324 alias = ( 2325 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2326 or self._parse_string_as_identifier() 2327 ) 2328 2329 index = self._index 2330 if self._match(TokenType.L_PAREN): 2331 columns = self._parse_csv(self._parse_function_parameter) 2332 self._match_r_paren() if columns else self._retreat(index) 2333 else: 2334 columns = None 2335 2336 if not alias and not columns: 2337 return None 2338 2339 return self.expression(exp.TableAlias, this=alias, columns=columns) 2340 2341 def _parse_subquery( 2342 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2343 ) -> t.Optional[exp.Subquery]: 2344 if not this: 2345 return None 2346 2347 return self.expression( 2348 exp.Subquery, 2349 this=this, 2350 pivots=self._parse_pivots(), 2351 alias=self._parse_table_alias() if parse_alias else None, 2352 ) 2353 2354 def _parse_query_modifiers( 2355 self, this: t.Optional[exp.Expression] 2356 ) -> t.Optional[exp.Expression]: 2357 if isinstance(this, self.MODIFIABLES): 2358 for join in iter(self._parse_join, None): 2359 this.append("joins", join) 2360 for lateral in iter(self._parse_lateral, None): 2361 this.append("laterals", lateral) 2362 2363 while True: 2364 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2365 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2366 key, expression = parser(self) 2367 2368 if expression: 2369 this.set(key, expression) 2370 if key == "limit": 2371 offset = expression.args.pop("offset", None) 2372 if offset: 2373 this.set("offset", exp.Offset(expression=offset)) 2374 continue 2375 break 2376 return this 2377 2378 def _parse_hint(self) -> t.Optional[exp.Hint]: 2379 if self._match(TokenType.HINT): 2380 hints = [] 2381 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2382 hints.extend(hint) 2383 2384 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2385 self.raise_error("Expected */ after HINT") 2386 2387 return self.expression(exp.Hint, expressions=hints) 2388 2389 return None 2390 2391 def _parse_into(self) -> t.Optional[exp.Into]: 2392 if not self._match(TokenType.INTO): 2393 return None 2394 2395 temp = self._match(TokenType.TEMPORARY) 2396 unlogged = self._match_text_seq("UNLOGGED") 2397 self._match(TokenType.TABLE) 2398 2399 return self.expression( 2400 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2401 ) 2402 2403 def _parse_from( 2404 self, joins: bool = False, skip_from_token: bool = False 2405 ) -> t.Optional[exp.From]: 2406 if not skip_from_token and not self._match(TokenType.FROM): 2407 return None 2408 2409 return self.expression( 2410 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2411 ) 2412 2413 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2414 if not self._match(TokenType.MATCH_RECOGNIZE): 2415 return None 2416 2417 self._match_l_paren() 2418 2419 partition = self._parse_partition_by() 2420 order = self._parse_order() 2421 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2422 2423 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2424 rows = exp.var("ONE ROW PER MATCH") 2425 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2426 text = "ALL ROWS PER MATCH" 2427 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2428 text += f" SHOW EMPTY MATCHES" 2429 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2430 text += f" OMIT EMPTY MATCHES" 2431 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2432 text += f" WITH UNMATCHED ROWS" 2433 rows = exp.var(text) 2434 else: 2435 rows = None 2436 2437 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2438 text = "AFTER MATCH SKIP" 2439 if self._match_text_seq("PAST", "LAST", "ROW"): 2440 text += f" PAST LAST ROW" 2441 elif self._match_text_seq("TO", "NEXT", "ROW"): 2442 text += f" TO NEXT ROW" 2443 elif self._match_text_seq("TO", "FIRST"): 2444 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2445 elif self._match_text_seq("TO", "LAST"): 2446 text += f" TO LAST {self._advance_any().text}" # type: ignore 2447 after = exp.var(text) 2448 else: 2449 after = None 2450 2451 if self._match_text_seq("PATTERN"): 2452 self._match_l_paren() 2453 2454 if not self._curr: 2455 self.raise_error("Expecting )", self._curr) 2456 2457 paren = 1 2458 start = self._curr 2459 2460 while self._curr and paren > 0: 2461 if self._curr.token_type == TokenType.L_PAREN: 2462 paren += 1 2463 if self._curr.token_type == TokenType.R_PAREN: 2464 paren -= 1 2465 2466 end = self._prev 2467 self._advance() 2468 2469 if paren > 0: 2470 self.raise_error("Expecting )", self._curr) 2471 2472 pattern = exp.var(self._find_sql(start, end)) 2473 else: 2474 pattern = None 2475 2476 define = ( 2477 self._parse_csv(self._parse_name_as_expression) 2478 if self._match_text_seq("DEFINE") 2479 else None 2480 ) 2481 2482 self._match_r_paren() 2483 2484 return self.expression( 2485 exp.MatchRecognize, 2486 partition_by=partition, 2487 order=order, 2488 measures=measures, 2489 rows=rows, 2490 after=after, 2491 pattern=pattern, 2492 define=define, 2493 alias=self._parse_table_alias(), 2494 ) 2495 2496 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2497 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2498 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2499 2500 if outer_apply or cross_apply: 2501 this = self._parse_select(table=True) 2502 view = None 2503 outer = not cross_apply 2504 elif self._match(TokenType.LATERAL): 2505 this = self._parse_select(table=True) 2506 view = self._match(TokenType.VIEW) 2507 outer = self._match(TokenType.OUTER) 2508 else: 2509 return None 2510 2511 if not this: 2512 this = ( 2513 self._parse_unnest() 2514 or self._parse_function() 2515 or self._parse_id_var(any_token=False) 2516 ) 2517 2518 while self._match(TokenType.DOT): 2519 this = exp.Dot( 2520 this=this, 2521 expression=self._parse_function() or self._parse_id_var(any_token=False), 2522 ) 2523 2524 if view: 2525 table = self._parse_id_var(any_token=False) 2526 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2527 table_alias: t.Optional[exp.TableAlias] = self.expression( 2528 exp.TableAlias, this=table, columns=columns 2529 ) 2530 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2531 # We move the alias from the lateral's child node to the lateral itself 2532 table_alias = this.args["alias"].pop() 2533 else: 2534 table_alias = self._parse_table_alias() 2535 2536 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2537 2538 def _parse_join_parts( 2539 self, 2540 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2541 return ( 2542 self._match_set(self.JOIN_METHODS) and self._prev, 2543 self._match_set(self.JOIN_SIDES) and self._prev, 2544 self._match_set(self.JOIN_KINDS) and self._prev, 2545 ) 2546 2547 def _parse_join( 2548 self, skip_join_token: bool = False, parse_bracket: bool = False 2549 ) -> t.Optional[exp.Join]: 2550 if self._match(TokenType.COMMA): 2551 return self.expression(exp.Join, this=self._parse_table()) 2552 2553 index = self._index 2554 method, side, kind = self._parse_join_parts() 2555 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2556 join = self._match(TokenType.JOIN) 2557 2558 if not skip_join_token and not join: 2559 self._retreat(index) 2560 kind = None 2561 method = None 2562 side = None 2563 2564 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2565 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2566 2567 if not skip_join_token and not join and not outer_apply and not cross_apply: 2568 return None 2569 2570 if outer_apply: 2571 side = Token(TokenType.LEFT, "LEFT") 2572 2573 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2574 2575 if method: 2576 kwargs["method"] = method.text 2577 if side: 2578 kwargs["side"] = side.text 2579 if kind: 2580 kwargs["kind"] = kind.text 2581 if hint: 2582 kwargs["hint"] = hint 2583 2584 if self._match(TokenType.ON): 2585 kwargs["on"] = self._parse_conjunction() 2586 elif self._match(TokenType.USING): 2587 kwargs["using"] = self._parse_wrapped_id_vars() 2588 elif not (kind and kind.token_type == TokenType.CROSS): 2589 index = self._index 2590 join = self._parse_join() 2591 2592 if join and self._match(TokenType.ON): 2593 kwargs["on"] = self._parse_conjunction() 2594 elif join and self._match(TokenType.USING): 2595 kwargs["using"] = self._parse_wrapped_id_vars() 2596 else: 2597 join = None 2598 self._retreat(index) 2599 2600 kwargs["this"].set("joins", [join] if join else None) 2601 2602 comments = [c for token in (method, side, kind) if token for c in token.comments] 2603 return self.expression(exp.Join, comments=comments, **kwargs) 2604 2605 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2606 this = self._parse_conjunction() 2607 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2608 return this 2609 2610 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2611 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2612 2613 return this 2614 2615 def _parse_index( 2616 self, 2617 index: t.Optional[exp.Expression] = None, 2618 ) -> t.Optional[exp.Index]: 2619 if index: 2620 unique = None 2621 primary = None 2622 amp = None 2623 2624 self._match(TokenType.ON) 2625 self._match(TokenType.TABLE) # hive 2626 table = self._parse_table_parts(schema=True) 2627 else: 2628 unique = self._match(TokenType.UNIQUE) 2629 primary = self._match_text_seq("PRIMARY") 2630 amp = self._match_text_seq("AMP") 2631 2632 if not self._match(TokenType.INDEX): 2633 return None 2634 2635 index = self._parse_id_var() 2636 table = None 2637 2638 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2639 2640 if self._match(TokenType.L_PAREN, advance=False): 2641 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2642 else: 2643 columns = None 2644 2645 return self.expression( 2646 exp.Index, 2647 this=index, 2648 table=table, 2649 using=using, 2650 columns=columns, 2651 unique=unique, 2652 primary=primary, 2653 amp=amp, 2654 partition_by=self._parse_partition_by(), 2655 where=self._parse_where(), 2656 ) 2657 2658 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2659 hints: t.List[exp.Expression] = [] 2660 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2661 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2662 hints.append( 2663 self.expression( 2664 exp.WithTableHint, 2665 expressions=self._parse_csv( 2666 lambda: self._parse_function() or self._parse_var(any_token=True) 2667 ), 2668 ) 2669 ) 2670 self._match_r_paren() 2671 else: 2672 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2673 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2674 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2675 2676 self._match_texts(("INDEX", "KEY")) 2677 if self._match(TokenType.FOR): 2678 hint.set("target", self._advance_any() and self._prev.text.upper()) 2679 2680 hint.set("expressions", self._parse_wrapped_id_vars()) 2681 hints.append(hint) 2682 2683 return hints or None 2684 2685 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2686 return ( 2687 (not schema and self._parse_function(optional_parens=False)) 2688 or self._parse_id_var(any_token=False) 2689 or self._parse_string_as_identifier() 2690 or self._parse_placeholder() 2691 ) 2692 2693 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2694 catalog = None 2695 db = None 2696 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2697 2698 while self._match(TokenType.DOT): 2699 if catalog: 2700 # This allows nesting the table in arbitrarily many dot expressions if needed 2701 table = self.expression( 2702 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2703 ) 2704 else: 2705 catalog = db 2706 db = table 2707 table = self._parse_table_part(schema=schema) or "" 2708 2709 if not table: 2710 self.raise_error(f"Expected table name but got {self._curr}") 2711 2712 return self.expression( 2713 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2714 ) 2715 2716 def _parse_table( 2717 self, 2718 schema: bool = False, 2719 joins: bool = False, 2720 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2721 parse_bracket: bool = False, 2722 ) -> t.Optional[exp.Expression]: 2723 lateral = self._parse_lateral() 2724 if lateral: 2725 return lateral 2726 2727 unnest = self._parse_unnest() 2728 if unnest: 2729 return unnest 2730 2731 values = self._parse_derived_table_values() 2732 if values: 2733 return values 2734 2735 subquery = self._parse_select(table=True) 2736 if subquery: 2737 if not subquery.args.get("pivots"): 2738 subquery.set("pivots", self._parse_pivots()) 2739 return subquery 2740 2741 bracket = parse_bracket and self._parse_bracket(None) 2742 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2743 this = t.cast( 2744 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2745 ) 2746 2747 if schema: 2748 return self._parse_schema(this=this) 2749 2750 version = self._parse_version() 2751 2752 if version: 2753 this.set("version", version) 2754 2755 if self.dialect.ALIAS_POST_TABLESAMPLE: 2756 table_sample = self._parse_table_sample() 2757 2758 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2759 if alias: 2760 this.set("alias", alias) 2761 2762 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2763 return self.expression( 2764 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2765 ) 2766 2767 this.set("hints", self._parse_table_hints()) 2768 2769 if not this.args.get("pivots"): 2770 this.set("pivots", self._parse_pivots()) 2771 2772 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2773 table_sample = self._parse_table_sample() 2774 2775 if table_sample: 2776 table_sample.set("this", this) 2777 this = table_sample 2778 2779 if joins: 2780 for join in iter(self._parse_join, None): 2781 this.append("joins", join) 2782 2783 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2784 this.set("ordinality", True) 2785 this.set("alias", self._parse_table_alias()) 2786 2787 return this 2788 2789 def _parse_version(self) -> t.Optional[exp.Version]: 2790 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2791 this = "TIMESTAMP" 2792 elif self._match(TokenType.VERSION_SNAPSHOT): 2793 this = "VERSION" 2794 else: 2795 return None 2796 2797 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2798 kind = self._prev.text.upper() 2799 start = self._parse_bitwise() 2800 self._match_texts(("TO", "AND")) 2801 end = self._parse_bitwise() 2802 expression: t.Optional[exp.Expression] = self.expression( 2803 exp.Tuple, expressions=[start, end] 2804 ) 2805 elif self._match_text_seq("CONTAINED", "IN"): 2806 kind = "CONTAINED IN" 2807 expression = self.expression( 2808 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2809 ) 2810 elif self._match(TokenType.ALL): 2811 kind = "ALL" 2812 expression = None 2813 else: 2814 self._match_text_seq("AS", "OF") 2815 kind = "AS OF" 2816 expression = self._parse_type() 2817 2818 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2819 2820 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2821 if not self._match(TokenType.UNNEST): 2822 return None 2823 2824 expressions = self._parse_wrapped_csv(self._parse_equality) 2825 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2826 2827 alias = self._parse_table_alias() if with_alias else None 2828 2829 if alias: 2830 if self.dialect.UNNEST_COLUMN_ONLY: 2831 if alias.args.get("columns"): 2832 self.raise_error("Unexpected extra column alias in unnest.") 2833 2834 alias.set("columns", [alias.this]) 2835 alias.set("this", None) 2836 2837 columns = alias.args.get("columns") or [] 2838 if offset and len(expressions) < len(columns): 2839 offset = columns.pop() 2840 2841 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2842 self._match(TokenType.ALIAS) 2843 offset = self._parse_id_var( 2844 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2845 ) or exp.to_identifier("offset") 2846 2847 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2848 2849 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2850 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2851 if not is_derived and not self._match(TokenType.VALUES): 2852 return None 2853 2854 expressions = self._parse_csv(self._parse_value) 2855 alias = self._parse_table_alias() 2856 2857 if is_derived: 2858 self._match_r_paren() 2859 2860 return self.expression( 2861 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2862 ) 2863 2864 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2865 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2866 as_modifier and self._match_text_seq("USING", "SAMPLE") 2867 ): 2868 return None 2869 2870 bucket_numerator = None 2871 bucket_denominator = None 2872 bucket_field = None 2873 percent = None 2874 size = None 2875 seed = None 2876 2877 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2878 matched_l_paren = self._match(TokenType.L_PAREN) 2879 2880 if self.TABLESAMPLE_CSV: 2881 num = None 2882 expressions = self._parse_csv(self._parse_primary) 2883 else: 2884 expressions = None 2885 num = ( 2886 self._parse_factor() 2887 if self._match(TokenType.NUMBER, advance=False) 2888 else self._parse_primary() or self._parse_placeholder() 2889 ) 2890 2891 if self._match_text_seq("BUCKET"): 2892 bucket_numerator = self._parse_number() 2893 self._match_text_seq("OUT", "OF") 2894 bucket_denominator = bucket_denominator = self._parse_number() 2895 self._match(TokenType.ON) 2896 bucket_field = self._parse_field() 2897 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2898 percent = num 2899 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2900 size = num 2901 else: 2902 percent = num 2903 2904 if matched_l_paren: 2905 self._match_r_paren() 2906 2907 if self._match(TokenType.L_PAREN): 2908 method = self._parse_var(upper=True) 2909 seed = self._match(TokenType.COMMA) and self._parse_number() 2910 self._match_r_paren() 2911 elif self._match_texts(("SEED", "REPEATABLE")): 2912 seed = self._parse_wrapped(self._parse_number) 2913 2914 return self.expression( 2915 exp.TableSample, 2916 expressions=expressions, 2917 method=method, 2918 bucket_numerator=bucket_numerator, 2919 bucket_denominator=bucket_denominator, 2920 bucket_field=bucket_field, 2921 percent=percent, 2922 size=size, 2923 seed=seed, 2924 ) 2925 2926 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2927 return list(iter(self._parse_pivot, None)) or None 2928 2929 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2930 return list(iter(self._parse_join, None)) or None 2931 2932 # https://duckdb.org/docs/sql/statements/pivot 2933 def _parse_simplified_pivot(self) -> exp.Pivot: 2934 def _parse_on() -> t.Optional[exp.Expression]: 2935 this = self._parse_bitwise() 2936 return self._parse_in(this) if self._match(TokenType.IN) else this 2937 2938 this = self._parse_table() 2939 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2940 using = self._match(TokenType.USING) and self._parse_csv( 2941 lambda: self._parse_alias(self._parse_function()) 2942 ) 2943 group = self._parse_group() 2944 return self.expression( 2945 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2946 ) 2947 2948 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2949 index = self._index 2950 include_nulls = None 2951 2952 if self._match(TokenType.PIVOT): 2953 unpivot = False 2954 elif self._match(TokenType.UNPIVOT): 2955 unpivot = True 2956 2957 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2958 if self._match_text_seq("INCLUDE", "NULLS"): 2959 include_nulls = True 2960 elif self._match_text_seq("EXCLUDE", "NULLS"): 2961 include_nulls = False 2962 else: 2963 return None 2964 2965 expressions = [] 2966 field = None 2967 2968 if not self._match(TokenType.L_PAREN): 2969 self._retreat(index) 2970 return None 2971 2972 if unpivot: 2973 expressions = self._parse_csv(self._parse_column) 2974 else: 2975 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2976 2977 if not expressions: 2978 self.raise_error("Failed to parse PIVOT's aggregation list") 2979 2980 if not self._match(TokenType.FOR): 2981 self.raise_error("Expecting FOR") 2982 2983 value = self._parse_column() 2984 2985 if not self._match(TokenType.IN): 2986 self.raise_error("Expecting IN") 2987 2988 field = self._parse_in(value, alias=True) 2989 2990 self._match_r_paren() 2991 2992 pivot = self.expression( 2993 exp.Pivot, 2994 expressions=expressions, 2995 field=field, 2996 unpivot=unpivot, 2997 include_nulls=include_nulls, 2998 ) 2999 3000 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3001 pivot.set("alias", self._parse_table_alias()) 3002 3003 if not unpivot: 3004 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3005 3006 columns: t.List[exp.Expression] = [] 3007 for fld in pivot.args["field"].expressions: 3008 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3009 for name in names: 3010 if self.PREFIXED_PIVOT_COLUMNS: 3011 name = f"{name}_{field_name}" if name else field_name 3012 else: 3013 name = f"{field_name}_{name}" if name else field_name 3014 3015 columns.append(exp.to_identifier(name)) 3016 3017 pivot.set("columns", columns) 3018 3019 return pivot 3020 3021 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3022 return [agg.alias for agg in aggregations] 3023 3024 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3025 if not skip_where_token and not self._match(TokenType.WHERE): 3026 return None 3027 3028 return self.expression( 3029 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3030 ) 3031 3032 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3033 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3034 return None 3035 3036 elements = defaultdict(list) 3037 3038 if self._match(TokenType.ALL): 3039 return self.expression(exp.Group, all=True) 3040 3041 while True: 3042 expressions = self._parse_csv(self._parse_conjunction) 3043 if expressions: 3044 elements["expressions"].extend(expressions) 3045 3046 grouping_sets = self._parse_grouping_sets() 3047 if grouping_sets: 3048 elements["grouping_sets"].extend(grouping_sets) 3049 3050 rollup = None 3051 cube = None 3052 totals = None 3053 3054 index = self._index 3055 with_ = self._match(TokenType.WITH) 3056 if self._match(TokenType.ROLLUP): 3057 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3058 elements["rollup"].extend(ensure_list(rollup)) 3059 3060 if self._match(TokenType.CUBE): 3061 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3062 elements["cube"].extend(ensure_list(cube)) 3063 3064 if self._match_text_seq("TOTALS"): 3065 totals = True 3066 elements["totals"] = True # type: ignore 3067 3068 if not (grouping_sets or rollup or cube or totals): 3069 if with_: 3070 self._retreat(index) 3071 break 3072 3073 return self.expression(exp.Group, **elements) # type: ignore 3074 3075 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3076 if not self._match(TokenType.GROUPING_SETS): 3077 return None 3078 3079 return self._parse_wrapped_csv(self._parse_grouping_set) 3080 3081 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3082 if self._match(TokenType.L_PAREN): 3083 grouping_set = self._parse_csv(self._parse_column) 3084 self._match_r_paren() 3085 return self.expression(exp.Tuple, expressions=grouping_set) 3086 3087 return self._parse_column() 3088 3089 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3090 if not skip_having_token and not self._match(TokenType.HAVING): 3091 return None 3092 return self.expression(exp.Having, this=self._parse_conjunction()) 3093 3094 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3095 if not self._match(TokenType.QUALIFY): 3096 return None 3097 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3098 3099 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3100 if skip_start_token: 3101 start = None 3102 elif self._match(TokenType.START_WITH): 3103 start = self._parse_conjunction() 3104 else: 3105 return None 3106 3107 self._match(TokenType.CONNECT_BY) 3108 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3109 exp.Prior, this=self._parse_bitwise() 3110 ) 3111 connect = self._parse_conjunction() 3112 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3113 3114 if not start and self._match(TokenType.START_WITH): 3115 start = self._parse_conjunction() 3116 3117 return self.expression(exp.Connect, start=start, connect=connect) 3118 3119 def _parse_name_as_expression(self) -> exp.Alias: 3120 return self.expression( 3121 exp.Alias, 3122 alias=self._parse_id_var(any_token=True), 3123 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3124 ) 3125 3126 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3127 if self._match_text_seq("INTERPOLATE"): 3128 return self._parse_wrapped_csv(self._parse_name_as_expression) 3129 return None 3130 3131 def _parse_order( 3132 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3133 ) -> t.Optional[exp.Expression]: 3134 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3135 return this 3136 3137 return self.expression( 3138 exp.Order, 3139 this=this, 3140 expressions=self._parse_csv(self._parse_ordered), 3141 interpolate=self._parse_interpolate(), 3142 ) 3143 3144 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3145 if not self._match(token): 3146 return None 3147 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3148 3149 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3150 this = parse_method() if parse_method else self._parse_conjunction() 3151 3152 asc = self._match(TokenType.ASC) 3153 desc = self._match(TokenType.DESC) or (asc and False) 3154 3155 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3156 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3157 3158 nulls_first = is_nulls_first or False 3159 explicitly_null_ordered = is_nulls_first or is_nulls_last 3160 3161 if ( 3162 not explicitly_null_ordered 3163 and ( 3164 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3165 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3166 ) 3167 and self.dialect.NULL_ORDERING != "nulls_are_last" 3168 ): 3169 nulls_first = True 3170 3171 if self._match_text_seq("WITH", "FILL"): 3172 with_fill = self.expression( 3173 exp.WithFill, 3174 **{ # type: ignore 3175 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3176 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3177 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3178 }, 3179 ) 3180 else: 3181 with_fill = None 3182 3183 return self.expression( 3184 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3185 ) 3186 3187 def _parse_limit( 3188 self, this: t.Optional[exp.Expression] = None, top: bool = False 3189 ) -> t.Optional[exp.Expression]: 3190 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3191 comments = self._prev_comments 3192 if top: 3193 limit_paren = self._match(TokenType.L_PAREN) 3194 expression = self._parse_term() if limit_paren else self._parse_number() 3195 3196 if limit_paren: 3197 self._match_r_paren() 3198 else: 3199 expression = self._parse_term() 3200 3201 if self._match(TokenType.COMMA): 3202 offset = expression 3203 expression = self._parse_term() 3204 else: 3205 offset = None 3206 3207 limit_exp = self.expression( 3208 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3209 ) 3210 3211 return limit_exp 3212 3213 if self._match(TokenType.FETCH): 3214 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3215 direction = self._prev.text.upper() if direction else "FIRST" 3216 3217 count = self._parse_field(tokens=self.FETCH_TOKENS) 3218 percent = self._match(TokenType.PERCENT) 3219 3220 self._match_set((TokenType.ROW, TokenType.ROWS)) 3221 3222 only = self._match_text_seq("ONLY") 3223 with_ties = self._match_text_seq("WITH", "TIES") 3224 3225 if only and with_ties: 3226 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3227 3228 return self.expression( 3229 exp.Fetch, 3230 direction=direction, 3231 count=count, 3232 percent=percent, 3233 with_ties=with_ties, 3234 ) 3235 3236 return this 3237 3238 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3239 if not self._match(TokenType.OFFSET): 3240 return this 3241 3242 count = self._parse_term() 3243 self._match_set((TokenType.ROW, TokenType.ROWS)) 3244 return self.expression(exp.Offset, this=this, expression=count) 3245 3246 def _parse_locks(self) -> t.List[exp.Lock]: 3247 locks = [] 3248 while True: 3249 if self._match_text_seq("FOR", "UPDATE"): 3250 update = True 3251 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3252 "LOCK", "IN", "SHARE", "MODE" 3253 ): 3254 update = False 3255 else: 3256 break 3257 3258 expressions = None 3259 if self._match_text_seq("OF"): 3260 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3261 3262 wait: t.Optional[bool | exp.Expression] = None 3263 if self._match_text_seq("NOWAIT"): 3264 wait = True 3265 elif self._match_text_seq("WAIT"): 3266 wait = self._parse_primary() 3267 elif self._match_text_seq("SKIP", "LOCKED"): 3268 wait = False 3269 3270 locks.append( 3271 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3272 ) 3273 3274 return locks 3275 3276 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3277 while this and self._match_set(self.SET_OPERATIONS): 3278 token_type = self._prev.token_type 3279 3280 if token_type == TokenType.UNION: 3281 operation = exp.Union 3282 elif token_type == TokenType.EXCEPT: 3283 operation = exp.Except 3284 else: 3285 operation = exp.Intersect 3286 3287 comments = self._prev.comments 3288 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3289 by_name = self._match_text_seq("BY", "NAME") 3290 expression = self._parse_select(nested=True, parse_set_operation=False) 3291 3292 this = self.expression( 3293 operation, 3294 comments=comments, 3295 this=this, 3296 distinct=distinct, 3297 by_name=by_name, 3298 expression=expression, 3299 ) 3300 3301 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3302 expression = this.expression 3303 3304 if expression: 3305 for arg in self.UNION_MODIFIERS: 3306 expr = expression.args.get(arg) 3307 if expr: 3308 this.set(arg, expr.pop()) 3309 3310 return this 3311 3312 def _parse_expression(self) -> t.Optional[exp.Expression]: 3313 return self._parse_alias(self._parse_conjunction()) 3314 3315 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3316 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3317 3318 def _parse_equality(self) -> t.Optional[exp.Expression]: 3319 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3320 3321 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3322 return self._parse_tokens(self._parse_range, self.COMPARISON) 3323 3324 def _parse_range(self) -> t.Optional[exp.Expression]: 3325 this = self._parse_bitwise() 3326 negate = self._match(TokenType.NOT) 3327 3328 if self._match_set(self.RANGE_PARSERS): 3329 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3330 if not expression: 3331 return this 3332 3333 this = expression 3334 elif self._match(TokenType.ISNULL): 3335 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3336 3337 # Postgres supports ISNULL and NOTNULL for conditions. 3338 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3339 if self._match(TokenType.NOTNULL): 3340 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3341 this = self.expression(exp.Not, this=this) 3342 3343 if negate: 3344 this = self.expression(exp.Not, this=this) 3345 3346 if self._match(TokenType.IS): 3347 this = self._parse_is(this) 3348 3349 return this 3350 3351 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3352 index = self._index - 1 3353 negate = self._match(TokenType.NOT) 3354 3355 if self._match_text_seq("DISTINCT", "FROM"): 3356 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3357 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3358 3359 expression = self._parse_null() or self._parse_boolean() 3360 if not expression: 3361 self._retreat(index) 3362 return None 3363 3364 this = self.expression(exp.Is, this=this, expression=expression) 3365 return self.expression(exp.Not, this=this) if negate else this 3366 3367 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3368 unnest = self._parse_unnest(with_alias=False) 3369 if unnest: 3370 this = self.expression(exp.In, this=this, unnest=unnest) 3371 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3372 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3373 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3374 3375 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3376 this = self.expression(exp.In, this=this, query=expressions[0]) 3377 else: 3378 this = self.expression(exp.In, this=this, expressions=expressions) 3379 3380 if matched_l_paren: 3381 self._match_r_paren(this) 3382 elif not self._match(TokenType.R_BRACKET, expression=this): 3383 self.raise_error("Expecting ]") 3384 else: 3385 this = self.expression(exp.In, this=this, field=self._parse_field()) 3386 3387 return this 3388 3389 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3390 low = self._parse_bitwise() 3391 self._match(TokenType.AND) 3392 high = self._parse_bitwise() 3393 return self.expression(exp.Between, this=this, low=low, high=high) 3394 3395 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3396 if not self._match(TokenType.ESCAPE): 3397 return this 3398 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3399 3400 def _parse_interval(self) -> t.Optional[exp.Interval]: 3401 index = self._index 3402 3403 if not self._match(TokenType.INTERVAL): 3404 return None 3405 3406 if self._match(TokenType.STRING, advance=False): 3407 this = self._parse_primary() 3408 else: 3409 this = self._parse_term() 3410 3411 if not this or ( 3412 isinstance(this, exp.Column) 3413 and not this.table 3414 and not this.this.quoted 3415 and this.name.upper() == "IS" 3416 ): 3417 self._retreat(index) 3418 return None 3419 3420 unit = self._parse_function() or self._parse_var(any_token=True, upper=True) 3421 3422 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3423 # each INTERVAL expression into this canonical form so it's easy to transpile 3424 if this and this.is_number: 3425 this = exp.Literal.string(this.name) 3426 elif this and this.is_string: 3427 parts = this.name.split() 3428 3429 if len(parts) == 2: 3430 if unit: 3431 # This is not actually a unit, it's something else (e.g. a "window side") 3432 unit = None 3433 self._retreat(self._index - 1) 3434 3435 this = exp.Literal.string(parts[0]) 3436 unit = self.expression(exp.Var, this=parts[1].upper()) 3437 3438 return self.expression(exp.Interval, this=this, unit=unit) 3439 3440 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3441 this = self._parse_term() 3442 3443 while True: 3444 if self._match_set(self.BITWISE): 3445 this = self.expression( 3446 self.BITWISE[self._prev.token_type], 3447 this=this, 3448 expression=self._parse_term(), 3449 ) 3450 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3451 this = self.expression( 3452 exp.DPipe, 3453 this=this, 3454 expression=self._parse_term(), 3455 safe=not self.dialect.STRICT_STRING_CONCAT, 3456 ) 3457 elif self._match(TokenType.DQMARK): 3458 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3459 elif self._match_pair(TokenType.LT, TokenType.LT): 3460 this = self.expression( 3461 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3462 ) 3463 elif self._match_pair(TokenType.GT, TokenType.GT): 3464 this = self.expression( 3465 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3466 ) 3467 else: 3468 break 3469 3470 return this 3471 3472 def _parse_term(self) -> t.Optional[exp.Expression]: 3473 return self._parse_tokens(self._parse_factor, self.TERM) 3474 3475 def _parse_factor(self) -> t.Optional[exp.Expression]: 3476 if self.EXPONENT: 3477 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3478 else: 3479 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3480 if isinstance(factor, exp.Div): 3481 factor.args["typed"] = self.dialect.TYPED_DIVISION 3482 factor.args["safe"] = self.dialect.SAFE_DIVISION 3483 return factor 3484 3485 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3486 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3487 3488 def _parse_unary(self) -> t.Optional[exp.Expression]: 3489 if self._match_set(self.UNARY_PARSERS): 3490 return self.UNARY_PARSERS[self._prev.token_type](self) 3491 return self._parse_at_time_zone(self._parse_type()) 3492 3493 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3494 interval = parse_interval and self._parse_interval() 3495 if interval: 3496 return interval 3497 3498 index = self._index 3499 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3500 this = self._parse_column() 3501 3502 if data_type: 3503 if isinstance(this, exp.Literal): 3504 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3505 if parser: 3506 return parser(self, this, data_type) 3507 return self.expression(exp.Cast, this=this, to=data_type) 3508 if not data_type.expressions: 3509 self._retreat(index) 3510 return self._parse_column() 3511 return self._parse_column_ops(data_type) 3512 3513 return this and self._parse_column_ops(this) 3514 3515 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3516 this = self._parse_type() 3517 if not this: 3518 return None 3519 3520 return self.expression( 3521 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3522 ) 3523 3524 def _parse_types( 3525 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3526 ) -> t.Optional[exp.Expression]: 3527 index = self._index 3528 3529 prefix = self._match_text_seq("SYSUDTLIB", ".") 3530 3531 if not self._match_set(self.TYPE_TOKENS): 3532 identifier = allow_identifiers and self._parse_id_var( 3533 any_token=False, tokens=(TokenType.VAR,) 3534 ) 3535 3536 if identifier: 3537 tokens = self.dialect.tokenize(identifier.name) 3538 3539 if len(tokens) != 1: 3540 self.raise_error("Unexpected identifier", self._prev) 3541 3542 if tokens[0].token_type in self.TYPE_TOKENS: 3543 self._prev = tokens[0] 3544 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3545 type_name = identifier.name 3546 3547 while self._match(TokenType.DOT): 3548 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3549 3550 return exp.DataType.build(type_name, udt=True) 3551 else: 3552 return None 3553 else: 3554 return None 3555 3556 type_token = self._prev.token_type 3557 3558 if type_token == TokenType.PSEUDO_TYPE: 3559 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3560 3561 if type_token == TokenType.OBJECT_IDENTIFIER: 3562 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3563 3564 nested = type_token in self.NESTED_TYPE_TOKENS 3565 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3566 expressions = None 3567 maybe_func = False 3568 3569 if self._match(TokenType.L_PAREN): 3570 if is_struct: 3571 expressions = self._parse_csv(self._parse_struct_types) 3572 elif nested: 3573 expressions = self._parse_csv( 3574 lambda: self._parse_types( 3575 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3576 ) 3577 ) 3578 elif type_token in self.ENUM_TYPE_TOKENS: 3579 expressions = self._parse_csv(self._parse_equality) 3580 else: 3581 expressions = self._parse_csv(self._parse_type_size) 3582 3583 if not expressions or not self._match(TokenType.R_PAREN): 3584 self._retreat(index) 3585 return None 3586 3587 maybe_func = True 3588 3589 this: t.Optional[exp.Expression] = None 3590 values: t.Optional[t.List[exp.Expression]] = None 3591 3592 if nested and self._match(TokenType.LT): 3593 if is_struct: 3594 expressions = self._parse_csv(self._parse_struct_types) 3595 else: 3596 expressions = self._parse_csv( 3597 lambda: self._parse_types( 3598 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3599 ) 3600 ) 3601 3602 if not self._match(TokenType.GT): 3603 self.raise_error("Expecting >") 3604 3605 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3606 values = self._parse_csv(self._parse_conjunction) 3607 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3608 3609 if type_token in self.TIMESTAMPS: 3610 if self._match_text_seq("WITH", "TIME", "ZONE"): 3611 maybe_func = False 3612 tz_type = ( 3613 exp.DataType.Type.TIMETZ 3614 if type_token in self.TIMES 3615 else exp.DataType.Type.TIMESTAMPTZ 3616 ) 3617 this = exp.DataType(this=tz_type, expressions=expressions) 3618 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3619 maybe_func = False 3620 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3621 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3622 maybe_func = False 3623 elif type_token == TokenType.INTERVAL: 3624 unit = self._parse_var() 3625 3626 if self._match_text_seq("TO"): 3627 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3628 else: 3629 span = None 3630 3631 if span or not unit: 3632 this = self.expression( 3633 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3634 ) 3635 else: 3636 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3637 3638 if maybe_func and check_func: 3639 index2 = self._index 3640 peek = self._parse_string() 3641 3642 if not peek: 3643 self._retreat(index) 3644 return None 3645 3646 self._retreat(index2) 3647 3648 if not this: 3649 if self._match_text_seq("UNSIGNED"): 3650 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3651 if not unsigned_type_token: 3652 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3653 3654 type_token = unsigned_type_token or type_token 3655 3656 this = exp.DataType( 3657 this=exp.DataType.Type[type_token.value], 3658 expressions=expressions, 3659 nested=nested, 3660 values=values, 3661 prefix=prefix, 3662 ) 3663 3664 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3665 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3666 3667 return this 3668 3669 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3670 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3671 self._match(TokenType.COLON) 3672 return self._parse_column_def(this) 3673 3674 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3675 if not self._match_text_seq("AT", "TIME", "ZONE"): 3676 return this 3677 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3678 3679 def _parse_column(self) -> t.Optional[exp.Expression]: 3680 this = self._parse_field() 3681 if isinstance(this, exp.Identifier): 3682 this = self.expression(exp.Column, this=this) 3683 elif not this: 3684 return self._parse_bracket(this) 3685 return self._parse_column_ops(this) 3686 3687 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3688 this = self._parse_bracket(this) 3689 3690 while self._match_set(self.COLUMN_OPERATORS): 3691 op_token = self._prev.token_type 3692 op = self.COLUMN_OPERATORS.get(op_token) 3693 3694 if op_token == TokenType.DCOLON: 3695 field = self._parse_types() 3696 if not field: 3697 self.raise_error("Expected type") 3698 elif op and self._curr: 3699 self._advance() 3700 value = self._prev.text 3701 field = ( 3702 exp.Literal.number(value) 3703 if self._prev.token_type == TokenType.NUMBER 3704 else exp.Literal.string(value) 3705 ) 3706 else: 3707 field = self._parse_field(anonymous_func=True, any_token=True) 3708 3709 if isinstance(field, exp.Func): 3710 # bigquery allows function calls like x.y.count(...) 3711 # SAFE.SUBSTR(...) 3712 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3713 this = self._replace_columns_with_dots(this) 3714 3715 if op: 3716 this = op(self, this, field) 3717 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3718 this = self.expression( 3719 exp.Column, 3720 this=field, 3721 table=this.this, 3722 db=this.args.get("table"), 3723 catalog=this.args.get("db"), 3724 ) 3725 else: 3726 this = self.expression(exp.Dot, this=this, expression=field) 3727 this = self._parse_bracket(this) 3728 return this 3729 3730 def _parse_primary(self) -> t.Optional[exp.Expression]: 3731 if self._match_set(self.PRIMARY_PARSERS): 3732 token_type = self._prev.token_type 3733 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3734 3735 if token_type == TokenType.STRING: 3736 expressions = [primary] 3737 while self._match(TokenType.STRING): 3738 expressions.append(exp.Literal.string(self._prev.text)) 3739 3740 if len(expressions) > 1: 3741 return self.expression(exp.Concat, expressions=expressions) 3742 3743 return primary 3744 3745 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3746 return exp.Literal.number(f"0.{self._prev.text}") 3747 3748 if self._match(TokenType.L_PAREN): 3749 comments = self._prev_comments 3750 query = self._parse_select() 3751 3752 if query: 3753 expressions = [query] 3754 else: 3755 expressions = self._parse_expressions() 3756 3757 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3758 3759 if isinstance(this, exp.Subqueryable): 3760 this = self._parse_set_operations( 3761 self._parse_subquery(this=this, parse_alias=False) 3762 ) 3763 elif len(expressions) > 1: 3764 this = self.expression(exp.Tuple, expressions=expressions) 3765 else: 3766 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3767 3768 if this: 3769 this.add_comments(comments) 3770 3771 self._match_r_paren(expression=this) 3772 return this 3773 3774 return None 3775 3776 def _parse_field( 3777 self, 3778 any_token: bool = False, 3779 tokens: t.Optional[t.Collection[TokenType]] = None, 3780 anonymous_func: bool = False, 3781 ) -> t.Optional[exp.Expression]: 3782 return ( 3783 self._parse_primary() 3784 or self._parse_function(anonymous=anonymous_func) 3785 or self._parse_id_var(any_token=any_token, tokens=tokens) 3786 ) 3787 3788 def _parse_function( 3789 self, 3790 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3791 anonymous: bool = False, 3792 optional_parens: bool = True, 3793 ) -> t.Optional[exp.Expression]: 3794 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3795 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3796 fn_syntax = False 3797 if ( 3798 self._match(TokenType.L_BRACE, advance=False) 3799 and self._next 3800 and self._next.text.upper() == "FN" 3801 ): 3802 self._advance(2) 3803 fn_syntax = True 3804 3805 func = self._parse_function_call( 3806 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3807 ) 3808 3809 if fn_syntax: 3810 self._match(TokenType.R_BRACE) 3811 3812 return func 3813 3814 def _parse_function_call( 3815 self, 3816 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3817 anonymous: bool = False, 3818 optional_parens: bool = True, 3819 ) -> t.Optional[exp.Expression]: 3820 if not self._curr: 3821 return None 3822 3823 comments = self._curr.comments 3824 token_type = self._curr.token_type 3825 this = self._curr.text 3826 upper = this.upper() 3827 3828 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3829 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3830 self._advance() 3831 return parser(self) 3832 3833 if not self._next or self._next.token_type != TokenType.L_PAREN: 3834 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3835 self._advance() 3836 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3837 3838 return None 3839 3840 if token_type not in self.FUNC_TOKENS: 3841 return None 3842 3843 self._advance(2) 3844 3845 parser = self.FUNCTION_PARSERS.get(upper) 3846 if parser and not anonymous: 3847 this = parser(self) 3848 else: 3849 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3850 3851 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3852 this = self.expression(subquery_predicate, this=self._parse_select()) 3853 self._match_r_paren() 3854 return this 3855 3856 if functions is None: 3857 functions = self.FUNCTIONS 3858 3859 function = functions.get(upper) 3860 3861 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3862 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3863 3864 if function and not anonymous: 3865 if "dialect" in function.__code__.co_varnames: 3866 func = function(args, dialect=self.dialect) 3867 else: 3868 func = function(args) 3869 3870 func = self.validate_expression(func, args) 3871 if not self.dialect.NORMALIZE_FUNCTIONS: 3872 func.meta["name"] = this 3873 3874 this = func 3875 else: 3876 this = self.expression(exp.Anonymous, this=this, expressions=args) 3877 3878 if isinstance(this, exp.Expression): 3879 this.add_comments(comments) 3880 3881 self._match_r_paren(this) 3882 return self._parse_window(this) 3883 3884 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3885 return self._parse_column_def(self._parse_id_var()) 3886 3887 def _parse_user_defined_function( 3888 self, kind: t.Optional[TokenType] = None 3889 ) -> t.Optional[exp.Expression]: 3890 this = self._parse_id_var() 3891 3892 while self._match(TokenType.DOT): 3893 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3894 3895 if not self._match(TokenType.L_PAREN): 3896 return this 3897 3898 expressions = self._parse_csv(self._parse_function_parameter) 3899 self._match_r_paren() 3900 return self.expression( 3901 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3902 ) 3903 3904 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3905 literal = self._parse_primary() 3906 if literal: 3907 return self.expression(exp.Introducer, this=token.text, expression=literal) 3908 3909 return self.expression(exp.Identifier, this=token.text) 3910 3911 def _parse_session_parameter(self) -> exp.SessionParameter: 3912 kind = None 3913 this = self._parse_id_var() or self._parse_primary() 3914 3915 if this and self._match(TokenType.DOT): 3916 kind = this.name 3917 this = self._parse_var() or self._parse_primary() 3918 3919 return self.expression(exp.SessionParameter, this=this, kind=kind) 3920 3921 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3922 index = self._index 3923 3924 if self._match(TokenType.L_PAREN): 3925 expressions = t.cast( 3926 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3927 ) 3928 3929 if not self._match(TokenType.R_PAREN): 3930 self._retreat(index) 3931 else: 3932 expressions = [self._parse_id_var()] 3933 3934 if self._match_set(self.LAMBDAS): 3935 return self.LAMBDAS[self._prev.token_type](self, expressions) 3936 3937 self._retreat(index) 3938 3939 this: t.Optional[exp.Expression] 3940 3941 if self._match(TokenType.DISTINCT): 3942 this = self.expression( 3943 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3944 ) 3945 else: 3946 this = self._parse_select_or_expression(alias=alias) 3947 3948 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3949 3950 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3951 index = self._index 3952 3953 if not self.errors: 3954 try: 3955 if self._parse_select(nested=True): 3956 return this 3957 except ParseError: 3958 pass 3959 finally: 3960 self.errors.clear() 3961 self._retreat(index) 3962 3963 if not self._match(TokenType.L_PAREN): 3964 return this 3965 3966 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3967 3968 self._match_r_paren() 3969 return self.expression(exp.Schema, this=this, expressions=args) 3970 3971 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3972 return self._parse_column_def(self._parse_field(any_token=True)) 3973 3974 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3975 # column defs are not really columns, they're identifiers 3976 if isinstance(this, exp.Column): 3977 this = this.this 3978 3979 kind = self._parse_types(schema=True) 3980 3981 if self._match_text_seq("FOR", "ORDINALITY"): 3982 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3983 3984 constraints: t.List[exp.Expression] = [] 3985 3986 if not kind and self._match(TokenType.ALIAS): 3987 constraints.append( 3988 self.expression( 3989 exp.ComputedColumnConstraint, 3990 this=self._parse_conjunction(), 3991 persisted=self._match_text_seq("PERSISTED"), 3992 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3993 ) 3994 ) 3995 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3996 self._match(TokenType.ALIAS) 3997 constraints.append( 3998 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3999 ) 4000 4001 while True: 4002 constraint = self._parse_column_constraint() 4003 if not constraint: 4004 break 4005 constraints.append(constraint) 4006 4007 if not kind and not constraints: 4008 return this 4009 4010 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4011 4012 def _parse_auto_increment( 4013 self, 4014 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4015 start = None 4016 increment = None 4017 4018 if self._match(TokenType.L_PAREN, advance=False): 4019 args = self._parse_wrapped_csv(self._parse_bitwise) 4020 start = seq_get(args, 0) 4021 increment = seq_get(args, 1) 4022 elif self._match_text_seq("START"): 4023 start = self._parse_bitwise() 4024 self._match_text_seq("INCREMENT") 4025 increment = self._parse_bitwise() 4026 4027 if start and increment: 4028 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4029 4030 return exp.AutoIncrementColumnConstraint() 4031 4032 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4033 if not self._match_text_seq("REFRESH"): 4034 self._retreat(self._index - 1) 4035 return None 4036 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4037 4038 def _parse_compress(self) -> exp.CompressColumnConstraint: 4039 if self._match(TokenType.L_PAREN, advance=False): 4040 return self.expression( 4041 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4042 ) 4043 4044 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4045 4046 def _parse_generated_as_identity( 4047 self, 4048 ) -> ( 4049 exp.GeneratedAsIdentityColumnConstraint 4050 | exp.ComputedColumnConstraint 4051 | exp.GeneratedAsRowColumnConstraint 4052 ): 4053 if self._match_text_seq("BY", "DEFAULT"): 4054 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4055 this = self.expression( 4056 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4057 ) 4058 else: 4059 self._match_text_seq("ALWAYS") 4060 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4061 4062 self._match(TokenType.ALIAS) 4063 4064 if self._match_text_seq("ROW"): 4065 start = self._match_text_seq("START") 4066 if not start: 4067 self._match(TokenType.END) 4068 hidden = self._match_text_seq("HIDDEN") 4069 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4070 4071 identity = self._match_text_seq("IDENTITY") 4072 4073 if self._match(TokenType.L_PAREN): 4074 if self._match(TokenType.START_WITH): 4075 this.set("start", self._parse_bitwise()) 4076 if self._match_text_seq("INCREMENT", "BY"): 4077 this.set("increment", self._parse_bitwise()) 4078 if self._match_text_seq("MINVALUE"): 4079 this.set("minvalue", self._parse_bitwise()) 4080 if self._match_text_seq("MAXVALUE"): 4081 this.set("maxvalue", self._parse_bitwise()) 4082 4083 if self._match_text_seq("CYCLE"): 4084 this.set("cycle", True) 4085 elif self._match_text_seq("NO", "CYCLE"): 4086 this.set("cycle", False) 4087 4088 if not identity: 4089 this.set("expression", self._parse_bitwise()) 4090 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4091 args = self._parse_csv(self._parse_bitwise) 4092 this.set("start", seq_get(args, 0)) 4093 this.set("increment", seq_get(args, 1)) 4094 4095 self._match_r_paren() 4096 4097 return this 4098 4099 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4100 self._match_text_seq("LENGTH") 4101 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4102 4103 def _parse_not_constraint( 4104 self, 4105 ) -> t.Optional[exp.Expression]: 4106 if self._match_text_seq("NULL"): 4107 return self.expression(exp.NotNullColumnConstraint) 4108 if self._match_text_seq("CASESPECIFIC"): 4109 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4110 if self._match_text_seq("FOR", "REPLICATION"): 4111 return self.expression(exp.NotForReplicationColumnConstraint) 4112 return None 4113 4114 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4115 if self._match(TokenType.CONSTRAINT): 4116 this = self._parse_id_var() 4117 else: 4118 this = None 4119 4120 if self._match_texts(self.CONSTRAINT_PARSERS): 4121 return self.expression( 4122 exp.ColumnConstraint, 4123 this=this, 4124 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4125 ) 4126 4127 return this 4128 4129 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4130 if not self._match(TokenType.CONSTRAINT): 4131 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4132 4133 this = self._parse_id_var() 4134 expressions = [] 4135 4136 while True: 4137 constraint = self._parse_unnamed_constraint() or self._parse_function() 4138 if not constraint: 4139 break 4140 expressions.append(constraint) 4141 4142 return self.expression(exp.Constraint, this=this, expressions=expressions) 4143 4144 def _parse_unnamed_constraint( 4145 self, constraints: t.Optional[t.Collection[str]] = None 4146 ) -> t.Optional[exp.Expression]: 4147 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4148 constraints or self.CONSTRAINT_PARSERS 4149 ): 4150 return None 4151 4152 constraint = self._prev.text.upper() 4153 if constraint not in self.CONSTRAINT_PARSERS: 4154 self.raise_error(f"No parser found for schema constraint {constraint}.") 4155 4156 return self.CONSTRAINT_PARSERS[constraint](self) 4157 4158 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4159 self._match_text_seq("KEY") 4160 return self.expression( 4161 exp.UniqueColumnConstraint, 4162 this=self._parse_schema(self._parse_id_var(any_token=False)), 4163 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4164 ) 4165 4166 def _parse_key_constraint_options(self) -> t.List[str]: 4167 options = [] 4168 while True: 4169 if not self._curr: 4170 break 4171 4172 if self._match(TokenType.ON): 4173 action = None 4174 on = self._advance_any() and self._prev.text 4175 4176 if self._match_text_seq("NO", "ACTION"): 4177 action = "NO ACTION" 4178 elif self._match_text_seq("CASCADE"): 4179 action = "CASCADE" 4180 elif self._match_text_seq("RESTRICT"): 4181 action = "RESTRICT" 4182 elif self._match_pair(TokenType.SET, TokenType.NULL): 4183 action = "SET NULL" 4184 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4185 action = "SET DEFAULT" 4186 else: 4187 self.raise_error("Invalid key constraint") 4188 4189 options.append(f"ON {on} {action}") 4190 elif self._match_text_seq("NOT", "ENFORCED"): 4191 options.append("NOT ENFORCED") 4192 elif self._match_text_seq("DEFERRABLE"): 4193 options.append("DEFERRABLE") 4194 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4195 options.append("INITIALLY DEFERRED") 4196 elif self._match_text_seq("NORELY"): 4197 options.append("NORELY") 4198 elif self._match_text_seq("MATCH", "FULL"): 4199 options.append("MATCH FULL") 4200 else: 4201 break 4202 4203 return options 4204 4205 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4206 if match and not self._match(TokenType.REFERENCES): 4207 return None 4208 4209 expressions = None 4210 this = self._parse_table(schema=True) 4211 options = self._parse_key_constraint_options() 4212 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4213 4214 def _parse_foreign_key(self) -> exp.ForeignKey: 4215 expressions = self._parse_wrapped_id_vars() 4216 reference = self._parse_references() 4217 options = {} 4218 4219 while self._match(TokenType.ON): 4220 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4221 self.raise_error("Expected DELETE or UPDATE") 4222 4223 kind = self._prev.text.lower() 4224 4225 if self._match_text_seq("NO", "ACTION"): 4226 action = "NO ACTION" 4227 elif self._match(TokenType.SET): 4228 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4229 action = "SET " + self._prev.text.upper() 4230 else: 4231 self._advance() 4232 action = self._prev.text.upper() 4233 4234 options[kind] = action 4235 4236 return self.expression( 4237 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4238 ) 4239 4240 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4241 return self._parse_field() 4242 4243 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4244 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4245 4246 id_vars = self._parse_wrapped_id_vars() 4247 return self.expression( 4248 exp.PeriodForSystemTimeConstraint, 4249 this=seq_get(id_vars, 0), 4250 expression=seq_get(id_vars, 1), 4251 ) 4252 4253 def _parse_primary_key( 4254 self, wrapped_optional: bool = False, in_props: bool = False 4255 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4256 desc = ( 4257 self._match_set((TokenType.ASC, TokenType.DESC)) 4258 and self._prev.token_type == TokenType.DESC 4259 ) 4260 4261 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4262 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4263 4264 expressions = self._parse_wrapped_csv( 4265 self._parse_primary_key_part, optional=wrapped_optional 4266 ) 4267 options = self._parse_key_constraint_options() 4268 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4269 4270 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4271 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4272 4273 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4274 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4275 return this 4276 4277 bracket_kind = self._prev.token_type 4278 expressions = self._parse_csv( 4279 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4280 ) 4281 4282 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4283 self.raise_error("Expected ]") 4284 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4285 self.raise_error("Expected }") 4286 4287 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4288 if bracket_kind == TokenType.L_BRACE: 4289 this = self.expression(exp.Struct, expressions=expressions) 4290 elif not this or this.name.upper() == "ARRAY": 4291 this = self.expression(exp.Array, expressions=expressions) 4292 else: 4293 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4294 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4295 4296 self._add_comments(this) 4297 return self._parse_bracket(this) 4298 4299 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4300 if self._match(TokenType.COLON): 4301 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4302 return this 4303 4304 def _parse_case(self) -> t.Optional[exp.Expression]: 4305 ifs = [] 4306 default = None 4307 4308 comments = self._prev_comments 4309 expression = self._parse_conjunction() 4310 4311 while self._match(TokenType.WHEN): 4312 this = self._parse_conjunction() 4313 self._match(TokenType.THEN) 4314 then = self._parse_conjunction() 4315 ifs.append(self.expression(exp.If, this=this, true=then)) 4316 4317 if self._match(TokenType.ELSE): 4318 default = self._parse_conjunction() 4319 4320 if not self._match(TokenType.END): 4321 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4322 default = exp.column("interval") 4323 else: 4324 self.raise_error("Expected END after CASE", self._prev) 4325 4326 return self._parse_window( 4327 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4328 ) 4329 4330 def _parse_if(self) -> t.Optional[exp.Expression]: 4331 if self._match(TokenType.L_PAREN): 4332 args = self._parse_csv(self._parse_conjunction) 4333 this = self.validate_expression(exp.If.from_arg_list(args), args) 4334 self._match_r_paren() 4335 else: 4336 index = self._index - 1 4337 condition = self._parse_conjunction() 4338 4339 if not condition: 4340 self._retreat(index) 4341 return None 4342 4343 self._match(TokenType.THEN) 4344 true = self._parse_conjunction() 4345 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4346 self._match(TokenType.END) 4347 this = self.expression(exp.If, this=condition, true=true, false=false) 4348 4349 return self._parse_window(this) 4350 4351 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4352 if not self._match_text_seq("VALUE", "FOR"): 4353 self._retreat(self._index - 1) 4354 return None 4355 4356 return self.expression( 4357 exp.NextValueFor, 4358 this=self._parse_column(), 4359 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4360 ) 4361 4362 def _parse_extract(self) -> exp.Extract: 4363 this = self._parse_function() or self._parse_var() or self._parse_type() 4364 4365 if self._match(TokenType.FROM): 4366 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4367 4368 if not self._match(TokenType.COMMA): 4369 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4370 4371 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4372 4373 def _parse_any_value(self) -> exp.AnyValue: 4374 this = self._parse_lambda() 4375 is_max = None 4376 having = None 4377 4378 if self._match(TokenType.HAVING): 4379 self._match_texts(("MAX", "MIN")) 4380 is_max = self._prev.text == "MAX" 4381 having = self._parse_column() 4382 4383 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4384 4385 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4386 this = self._parse_conjunction() 4387 4388 if not self._match(TokenType.ALIAS): 4389 if self._match(TokenType.COMMA): 4390 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4391 4392 self.raise_error("Expected AS after CAST") 4393 4394 fmt = None 4395 to = self._parse_types() 4396 4397 if self._match(TokenType.FORMAT): 4398 fmt_string = self._parse_string() 4399 fmt = self._parse_at_time_zone(fmt_string) 4400 4401 if not to: 4402 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4403 if to.this in exp.DataType.TEMPORAL_TYPES: 4404 this = self.expression( 4405 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4406 this=this, 4407 format=exp.Literal.string( 4408 format_time( 4409 fmt_string.this if fmt_string else "", 4410 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4411 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4412 ) 4413 ), 4414 ) 4415 4416 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4417 this.set("zone", fmt.args["zone"]) 4418 return this 4419 elif not to: 4420 self.raise_error("Expected TYPE after CAST") 4421 elif isinstance(to, exp.Identifier): 4422 to = exp.DataType.build(to.name, udt=True) 4423 elif to.this == exp.DataType.Type.CHAR: 4424 if self._match(TokenType.CHARACTER_SET): 4425 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4426 4427 return self.expression( 4428 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4429 ) 4430 4431 def _parse_string_agg(self) -> exp.Expression: 4432 if self._match(TokenType.DISTINCT): 4433 args: t.List[t.Optional[exp.Expression]] = [ 4434 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4435 ] 4436 if self._match(TokenType.COMMA): 4437 args.extend(self._parse_csv(self._parse_conjunction)) 4438 else: 4439 args = self._parse_csv(self._parse_conjunction) # type: ignore 4440 4441 index = self._index 4442 if not self._match(TokenType.R_PAREN) and args: 4443 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4444 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4445 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4446 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4447 4448 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4449 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4450 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4451 if not self._match_text_seq("WITHIN", "GROUP"): 4452 self._retreat(index) 4453 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4454 4455 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4456 order = self._parse_order(this=seq_get(args, 0)) 4457 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4458 4459 def _parse_convert( 4460 self, strict: bool, safe: t.Optional[bool] = None 4461 ) -> t.Optional[exp.Expression]: 4462 this = self._parse_bitwise() 4463 4464 if self._match(TokenType.USING): 4465 to: t.Optional[exp.Expression] = self.expression( 4466 exp.CharacterSet, this=self._parse_var() 4467 ) 4468 elif self._match(TokenType.COMMA): 4469 to = self._parse_types() 4470 else: 4471 to = None 4472 4473 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4474 4475 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4476 """ 4477 There are generally two variants of the DECODE function: 4478 4479 - DECODE(bin, charset) 4480 - DECODE(expression, search, result [, search, result] ... [, default]) 4481 4482 The second variant will always be parsed into a CASE expression. Note that NULL 4483 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4484 instead of relying on pattern matching. 4485 """ 4486 args = self._parse_csv(self._parse_conjunction) 4487 4488 if len(args) < 3: 4489 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4490 4491 expression, *expressions = args 4492 if not expression: 4493 return None 4494 4495 ifs = [] 4496 for search, result in zip(expressions[::2], expressions[1::2]): 4497 if not search or not result: 4498 return None 4499 4500 if isinstance(search, exp.Literal): 4501 ifs.append( 4502 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4503 ) 4504 elif isinstance(search, exp.Null): 4505 ifs.append( 4506 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4507 ) 4508 else: 4509 cond = exp.or_( 4510 exp.EQ(this=expression.copy(), expression=search), 4511 exp.and_( 4512 exp.Is(this=expression.copy(), expression=exp.Null()), 4513 exp.Is(this=search.copy(), expression=exp.Null()), 4514 copy=False, 4515 ), 4516 copy=False, 4517 ) 4518 ifs.append(exp.If(this=cond, true=result)) 4519 4520 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4521 4522 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4523 self._match_text_seq("KEY") 4524 key = self._parse_column() 4525 self._match_set((TokenType.COLON, TokenType.COMMA)) 4526 self._match_text_seq("VALUE") 4527 value = self._parse_bitwise() 4528 4529 if not key and not value: 4530 return None 4531 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4532 4533 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4534 if not this or not self._match_text_seq("FORMAT", "JSON"): 4535 return this 4536 4537 return self.expression(exp.FormatJson, this=this) 4538 4539 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4540 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4541 for value in values: 4542 if self._match_text_seq(value, "ON", on): 4543 return f"{value} ON {on}" 4544 4545 return None 4546 4547 def _parse_json_object(self) -> exp.JSONObject: 4548 star = self._parse_star() 4549 expressions = ( 4550 [star] 4551 if star 4552 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4553 ) 4554 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4555 4556 unique_keys = None 4557 if self._match_text_seq("WITH", "UNIQUE"): 4558 unique_keys = True 4559 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4560 unique_keys = False 4561 4562 self._match_text_seq("KEYS") 4563 4564 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4565 self._parse_type() 4566 ) 4567 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4568 4569 return self.expression( 4570 exp.JSONObject, 4571 expressions=expressions, 4572 null_handling=null_handling, 4573 unique_keys=unique_keys, 4574 return_type=return_type, 4575 encoding=encoding, 4576 ) 4577 4578 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4579 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4580 if not self._match_text_seq("NESTED"): 4581 this = self._parse_id_var() 4582 kind = self._parse_types(allow_identifiers=False) 4583 nested = None 4584 else: 4585 this = None 4586 kind = None 4587 nested = True 4588 4589 path = self._match_text_seq("PATH") and self._parse_string() 4590 nested_schema = nested and self._parse_json_schema() 4591 4592 return self.expression( 4593 exp.JSONColumnDef, 4594 this=this, 4595 kind=kind, 4596 path=path, 4597 nested_schema=nested_schema, 4598 ) 4599 4600 def _parse_json_schema(self) -> exp.JSONSchema: 4601 self._match_text_seq("COLUMNS") 4602 return self.expression( 4603 exp.JSONSchema, 4604 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4605 ) 4606 4607 def _parse_json_table(self) -> exp.JSONTable: 4608 this = self._parse_format_json(self._parse_bitwise()) 4609 path = self._match(TokenType.COMMA) and self._parse_string() 4610 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4611 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4612 schema = self._parse_json_schema() 4613 4614 return exp.JSONTable( 4615 this=this, 4616 schema=schema, 4617 path=path, 4618 error_handling=error_handling, 4619 empty_handling=empty_handling, 4620 ) 4621 4622 def _parse_match_against(self) -> exp.MatchAgainst: 4623 expressions = self._parse_csv(self._parse_column) 4624 4625 self._match_text_seq(")", "AGAINST", "(") 4626 4627 this = self._parse_string() 4628 4629 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4630 modifier = "IN NATURAL LANGUAGE MODE" 4631 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4632 modifier = f"{modifier} WITH QUERY EXPANSION" 4633 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4634 modifier = "IN BOOLEAN MODE" 4635 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4636 modifier = "WITH QUERY EXPANSION" 4637 else: 4638 modifier = None 4639 4640 return self.expression( 4641 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4642 ) 4643 4644 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4645 def _parse_open_json(self) -> exp.OpenJSON: 4646 this = self._parse_bitwise() 4647 path = self._match(TokenType.COMMA) and self._parse_string() 4648 4649 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4650 this = self._parse_field(any_token=True) 4651 kind = self._parse_types() 4652 path = self._parse_string() 4653 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4654 4655 return self.expression( 4656 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4657 ) 4658 4659 expressions = None 4660 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4661 self._match_l_paren() 4662 expressions = self._parse_csv(_parse_open_json_column_def) 4663 4664 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4665 4666 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4667 args = self._parse_csv(self._parse_bitwise) 4668 4669 if self._match(TokenType.IN): 4670 return self.expression( 4671 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4672 ) 4673 4674 if haystack_first: 4675 haystack = seq_get(args, 0) 4676 needle = seq_get(args, 1) 4677 else: 4678 needle = seq_get(args, 0) 4679 haystack = seq_get(args, 1) 4680 4681 return self.expression( 4682 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4683 ) 4684 4685 def _parse_predict(self) -> exp.Predict: 4686 self._match_text_seq("MODEL") 4687 this = self._parse_table() 4688 4689 self._match(TokenType.COMMA) 4690 self._match_text_seq("TABLE") 4691 4692 return self.expression( 4693 exp.Predict, 4694 this=this, 4695 expression=self._parse_table(), 4696 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4697 ) 4698 4699 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4700 args = self._parse_csv(self._parse_table) 4701 return exp.JoinHint(this=func_name.upper(), expressions=args) 4702 4703 def _parse_substring(self) -> exp.Substring: 4704 # Postgres supports the form: substring(string [from int] [for int]) 4705 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4706 4707 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4708 4709 if self._match(TokenType.FROM): 4710 args.append(self._parse_bitwise()) 4711 if self._match(TokenType.FOR): 4712 args.append(self._parse_bitwise()) 4713 4714 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4715 4716 def _parse_trim(self) -> exp.Trim: 4717 # https://www.w3resource.com/sql/character-functions/trim.php 4718 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4719 4720 position = None 4721 collation = None 4722 expression = None 4723 4724 if self._match_texts(self.TRIM_TYPES): 4725 position = self._prev.text.upper() 4726 4727 this = self._parse_bitwise() 4728 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4729 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4730 expression = self._parse_bitwise() 4731 4732 if invert_order: 4733 this, expression = expression, this 4734 4735 if self._match(TokenType.COLLATE): 4736 collation = self._parse_bitwise() 4737 4738 return self.expression( 4739 exp.Trim, this=this, position=position, expression=expression, collation=collation 4740 ) 4741 4742 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4743 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4744 4745 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4746 return self._parse_window(self._parse_id_var(), alias=True) 4747 4748 def _parse_respect_or_ignore_nulls( 4749 self, this: t.Optional[exp.Expression] 4750 ) -> t.Optional[exp.Expression]: 4751 if self._match_text_seq("IGNORE", "NULLS"): 4752 return self.expression(exp.IgnoreNulls, this=this) 4753 if self._match_text_seq("RESPECT", "NULLS"): 4754 return self.expression(exp.RespectNulls, this=this) 4755 return this 4756 4757 def _parse_window( 4758 self, this: t.Optional[exp.Expression], alias: bool = False 4759 ) -> t.Optional[exp.Expression]: 4760 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4761 self._match(TokenType.WHERE) 4762 this = self.expression( 4763 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4764 ) 4765 self._match_r_paren() 4766 4767 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4768 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4769 if self._match_text_seq("WITHIN", "GROUP"): 4770 order = self._parse_wrapped(self._parse_order) 4771 this = self.expression(exp.WithinGroup, this=this, expression=order) 4772 4773 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4774 # Some dialects choose to implement and some do not. 4775 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4776 4777 # There is some code above in _parse_lambda that handles 4778 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4779 4780 # The below changes handle 4781 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4782 4783 # Oracle allows both formats 4784 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4785 # and Snowflake chose to do the same for familiarity 4786 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4787 this = self._parse_respect_or_ignore_nulls(this) 4788 4789 # bigquery select from window x AS (partition by ...) 4790 if alias: 4791 over = None 4792 self._match(TokenType.ALIAS) 4793 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4794 return this 4795 else: 4796 over = self._prev.text.upper() 4797 4798 if not self._match(TokenType.L_PAREN): 4799 return self.expression( 4800 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4801 ) 4802 4803 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4804 4805 first = self._match(TokenType.FIRST) 4806 if self._match_text_seq("LAST"): 4807 first = False 4808 4809 partition, order = self._parse_partition_and_order() 4810 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4811 4812 if kind: 4813 self._match(TokenType.BETWEEN) 4814 start = self._parse_window_spec() 4815 self._match(TokenType.AND) 4816 end = self._parse_window_spec() 4817 4818 spec = self.expression( 4819 exp.WindowSpec, 4820 kind=kind, 4821 start=start["value"], 4822 start_side=start["side"], 4823 end=end["value"], 4824 end_side=end["side"], 4825 ) 4826 else: 4827 spec = None 4828 4829 self._match_r_paren() 4830 4831 window = self.expression( 4832 exp.Window, 4833 this=this, 4834 partition_by=partition, 4835 order=order, 4836 spec=spec, 4837 alias=window_alias, 4838 over=over, 4839 first=first, 4840 ) 4841 4842 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4843 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4844 return self._parse_window(window, alias=alias) 4845 4846 return window 4847 4848 def _parse_partition_and_order( 4849 self, 4850 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4851 return self._parse_partition_by(), self._parse_order() 4852 4853 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4854 self._match(TokenType.BETWEEN) 4855 4856 return { 4857 "value": ( 4858 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4859 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4860 or self._parse_bitwise() 4861 ), 4862 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4863 } 4864 4865 def _parse_alias( 4866 self, this: t.Optional[exp.Expression], explicit: bool = False 4867 ) -> t.Optional[exp.Expression]: 4868 any_token = self._match(TokenType.ALIAS) 4869 comments = self._prev_comments 4870 4871 if explicit and not any_token: 4872 return this 4873 4874 if self._match(TokenType.L_PAREN): 4875 aliases = self.expression( 4876 exp.Aliases, 4877 comments=comments, 4878 this=this, 4879 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4880 ) 4881 self._match_r_paren(aliases) 4882 return aliases 4883 4884 alias = self._parse_id_var(any_token) 4885 4886 if alias: 4887 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4888 4889 return this 4890 4891 def _parse_id_var( 4892 self, 4893 any_token: bool = True, 4894 tokens: t.Optional[t.Collection[TokenType]] = None, 4895 ) -> t.Optional[exp.Expression]: 4896 identifier = self._parse_identifier() 4897 4898 if identifier: 4899 return identifier 4900 4901 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4902 quoted = self._prev.token_type == TokenType.STRING 4903 return exp.Identifier(this=self._prev.text, quoted=quoted) 4904 4905 return None 4906 4907 def _parse_string(self) -> t.Optional[exp.Expression]: 4908 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4909 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4910 return self._parse_placeholder() 4911 4912 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4913 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4914 4915 def _parse_number(self) -> t.Optional[exp.Expression]: 4916 if self._match(TokenType.NUMBER): 4917 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4918 return self._parse_placeholder() 4919 4920 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4921 if self._match(TokenType.IDENTIFIER): 4922 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4923 return self._parse_placeholder() 4924 4925 def _parse_var( 4926 self, 4927 any_token: bool = False, 4928 tokens: t.Optional[t.Collection[TokenType]] = None, 4929 upper: bool = False, 4930 ) -> t.Optional[exp.Expression]: 4931 if ( 4932 (any_token and self._advance_any()) 4933 or self._match(TokenType.VAR) 4934 or (self._match_set(tokens) if tokens else False) 4935 ): 4936 return self.expression( 4937 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 4938 ) 4939 return self._parse_placeholder() 4940 4941 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4942 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4943 self._advance() 4944 return self._prev 4945 return None 4946 4947 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4948 return self._parse_var() or self._parse_string() 4949 4950 def _parse_null(self) -> t.Optional[exp.Expression]: 4951 if self._match_set(self.NULL_TOKENS): 4952 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4953 return self._parse_placeholder() 4954 4955 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4956 if self._match(TokenType.TRUE): 4957 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4958 if self._match(TokenType.FALSE): 4959 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4960 return self._parse_placeholder() 4961 4962 def _parse_star(self) -> t.Optional[exp.Expression]: 4963 if self._match(TokenType.STAR): 4964 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4965 return self._parse_placeholder() 4966 4967 def _parse_parameter(self) -> exp.Parameter: 4968 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4969 return ( 4970 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4971 ) 4972 4973 self._match(TokenType.L_BRACE) 4974 this = _parse_parameter_part() 4975 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4976 self._match(TokenType.R_BRACE) 4977 4978 return self.expression(exp.Parameter, this=this, expression=expression) 4979 4980 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4981 if self._match_set(self.PLACEHOLDER_PARSERS): 4982 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4983 if placeholder: 4984 return placeholder 4985 self._advance(-1) 4986 return None 4987 4988 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4989 if not self._match(TokenType.EXCEPT): 4990 return None 4991 if self._match(TokenType.L_PAREN, advance=False): 4992 return self._parse_wrapped_csv(self._parse_column) 4993 4994 except_column = self._parse_column() 4995 return [except_column] if except_column else None 4996 4997 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4998 if not self._match(TokenType.REPLACE): 4999 return None 5000 if self._match(TokenType.L_PAREN, advance=False): 5001 return self._parse_wrapped_csv(self._parse_expression) 5002 5003 replace_expression = self._parse_expression() 5004 return [replace_expression] if replace_expression else None 5005 5006 def _parse_csv( 5007 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5008 ) -> t.List[exp.Expression]: 5009 parse_result = parse_method() 5010 items = [parse_result] if parse_result is not None else [] 5011 5012 while self._match(sep): 5013 self._add_comments(parse_result) 5014 parse_result = parse_method() 5015 if parse_result is not None: 5016 items.append(parse_result) 5017 5018 return items 5019 5020 def _parse_tokens( 5021 self, parse_method: t.Callable, expressions: t.Dict 5022 ) -> t.Optional[exp.Expression]: 5023 this = parse_method() 5024 5025 while self._match_set(expressions): 5026 this = self.expression( 5027 expressions[self._prev.token_type], 5028 this=this, 5029 comments=self._prev_comments, 5030 expression=parse_method(), 5031 ) 5032 5033 return this 5034 5035 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5036 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5037 5038 def _parse_wrapped_csv( 5039 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5040 ) -> t.List[exp.Expression]: 5041 return self._parse_wrapped( 5042 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5043 ) 5044 5045 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5046 wrapped = self._match(TokenType.L_PAREN) 5047 if not wrapped and not optional: 5048 self.raise_error("Expecting (") 5049 parse_result = parse_method() 5050 if wrapped: 5051 self._match_r_paren() 5052 return parse_result 5053 5054 def _parse_expressions(self) -> t.List[exp.Expression]: 5055 return self._parse_csv(self._parse_expression) 5056 5057 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5058 return self._parse_select() or self._parse_set_operations( 5059 self._parse_expression() if alias else self._parse_conjunction() 5060 ) 5061 5062 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5063 return self._parse_query_modifiers( 5064 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5065 ) 5066 5067 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5068 this = None 5069 if self._match_texts(self.TRANSACTION_KIND): 5070 this = self._prev.text 5071 5072 self._match_texts(("TRANSACTION", "WORK")) 5073 5074 modes = [] 5075 while True: 5076 mode = [] 5077 while self._match(TokenType.VAR): 5078 mode.append(self._prev.text) 5079 5080 if mode: 5081 modes.append(" ".join(mode)) 5082 if not self._match(TokenType.COMMA): 5083 break 5084 5085 return self.expression(exp.Transaction, this=this, modes=modes) 5086 5087 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5088 chain = None 5089 savepoint = None 5090 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5091 5092 self._match_texts(("TRANSACTION", "WORK")) 5093 5094 if self._match_text_seq("TO"): 5095 self._match_text_seq("SAVEPOINT") 5096 savepoint = self._parse_id_var() 5097 5098 if self._match(TokenType.AND): 5099 chain = not self._match_text_seq("NO") 5100 self._match_text_seq("CHAIN") 5101 5102 if is_rollback: 5103 return self.expression(exp.Rollback, savepoint=savepoint) 5104 5105 return self.expression(exp.Commit, chain=chain) 5106 5107 def _parse_refresh(self) -> exp.Refresh: 5108 self._match(TokenType.TABLE) 5109 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5110 5111 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5112 if not self._match_text_seq("ADD"): 5113 return None 5114 5115 self._match(TokenType.COLUMN) 5116 exists_column = self._parse_exists(not_=True) 5117 expression = self._parse_field_def() 5118 5119 if expression: 5120 expression.set("exists", exists_column) 5121 5122 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5123 if self._match_texts(("FIRST", "AFTER")): 5124 position = self._prev.text 5125 column_position = self.expression( 5126 exp.ColumnPosition, this=self._parse_column(), position=position 5127 ) 5128 expression.set("position", column_position) 5129 5130 return expression 5131 5132 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5133 drop = self._match(TokenType.DROP) and self._parse_drop() 5134 if drop and not isinstance(drop, exp.Command): 5135 drop.set("kind", drop.args.get("kind", "COLUMN")) 5136 return drop 5137 5138 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5139 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5140 return self.expression( 5141 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5142 ) 5143 5144 def _parse_add_constraint(self) -> exp.AddConstraint: 5145 this = None 5146 kind = self._prev.token_type 5147 5148 if kind == TokenType.CONSTRAINT: 5149 this = self._parse_id_var() 5150 5151 if self._match_text_seq("CHECK"): 5152 expression = self._parse_wrapped(self._parse_conjunction) 5153 enforced = self._match_text_seq("ENFORCED") 5154 5155 return self.expression( 5156 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5157 ) 5158 5159 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5160 expression = self._parse_foreign_key() 5161 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5162 expression = self._parse_primary_key() 5163 else: 5164 expression = None 5165 5166 return self.expression(exp.AddConstraint, this=this, expression=expression) 5167 5168 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5169 index = self._index - 1 5170 5171 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5172 return self._parse_csv(self._parse_add_constraint) 5173 5174 self._retreat(index) 5175 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5176 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5177 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5178 5179 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5180 self._match(TokenType.COLUMN) 5181 column = self._parse_field(any_token=True) 5182 5183 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5184 return self.expression(exp.AlterColumn, this=column, drop=True) 5185 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5186 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5187 5188 self._match_text_seq("SET", "DATA") 5189 return self.expression( 5190 exp.AlterColumn, 5191 this=column, 5192 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5193 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5194 using=self._match(TokenType.USING) and self._parse_conjunction(), 5195 ) 5196 5197 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5198 index = self._index - 1 5199 5200 partition_exists = self._parse_exists() 5201 if self._match(TokenType.PARTITION, advance=False): 5202 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5203 5204 self._retreat(index) 5205 return self._parse_csv(self._parse_drop_column) 5206 5207 def _parse_alter_table_rename(self) -> exp.RenameTable: 5208 self._match_text_seq("TO") 5209 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5210 5211 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5212 start = self._prev 5213 5214 if not self._match(TokenType.TABLE): 5215 return self._parse_as_command(start) 5216 5217 exists = self._parse_exists() 5218 only = self._match_text_seq("ONLY") 5219 this = self._parse_table(schema=True) 5220 5221 if self._next: 5222 self._advance() 5223 5224 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5225 if parser: 5226 actions = ensure_list(parser(self)) 5227 5228 if not self._curr: 5229 return self.expression( 5230 exp.AlterTable, 5231 this=this, 5232 exists=exists, 5233 actions=actions, 5234 only=only, 5235 ) 5236 5237 return self._parse_as_command(start) 5238 5239 def _parse_merge(self) -> exp.Merge: 5240 self._match(TokenType.INTO) 5241 target = self._parse_table() 5242 5243 if target and self._match(TokenType.ALIAS, advance=False): 5244 target.set("alias", self._parse_table_alias()) 5245 5246 self._match(TokenType.USING) 5247 using = self._parse_table() 5248 5249 self._match(TokenType.ON) 5250 on = self._parse_conjunction() 5251 5252 return self.expression( 5253 exp.Merge, 5254 this=target, 5255 using=using, 5256 on=on, 5257 expressions=self._parse_when_matched(), 5258 ) 5259 5260 def _parse_when_matched(self) -> t.List[exp.When]: 5261 whens = [] 5262 5263 while self._match(TokenType.WHEN): 5264 matched = not self._match(TokenType.NOT) 5265 self._match_text_seq("MATCHED") 5266 source = ( 5267 False 5268 if self._match_text_seq("BY", "TARGET") 5269 else self._match_text_seq("BY", "SOURCE") 5270 ) 5271 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5272 5273 self._match(TokenType.THEN) 5274 5275 if self._match(TokenType.INSERT): 5276 _this = self._parse_star() 5277 if _this: 5278 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5279 else: 5280 then = self.expression( 5281 exp.Insert, 5282 this=self._parse_value(), 5283 expression=self._match(TokenType.VALUES) and self._parse_value(), 5284 ) 5285 elif self._match(TokenType.UPDATE): 5286 expressions = self._parse_star() 5287 if expressions: 5288 then = self.expression(exp.Update, expressions=expressions) 5289 else: 5290 then = self.expression( 5291 exp.Update, 5292 expressions=self._match(TokenType.SET) 5293 and self._parse_csv(self._parse_equality), 5294 ) 5295 elif self._match(TokenType.DELETE): 5296 then = self.expression(exp.Var, this=self._prev.text) 5297 else: 5298 then = None 5299 5300 whens.append( 5301 self.expression( 5302 exp.When, 5303 matched=matched, 5304 source=source, 5305 condition=condition, 5306 then=then, 5307 ) 5308 ) 5309 return whens 5310 5311 def _parse_show(self) -> t.Optional[exp.Expression]: 5312 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5313 if parser: 5314 return parser(self) 5315 return self._parse_as_command(self._prev) 5316 5317 def _parse_set_item_assignment( 5318 self, kind: t.Optional[str] = None 5319 ) -> t.Optional[exp.Expression]: 5320 index = self._index 5321 5322 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5323 return self._parse_set_transaction(global_=kind == "GLOBAL") 5324 5325 left = self._parse_primary() or self._parse_id_var() 5326 assignment_delimiter = self._match_texts(("=", "TO")) 5327 5328 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5329 self._retreat(index) 5330 return None 5331 5332 right = self._parse_statement() or self._parse_id_var() 5333 this = self.expression(exp.EQ, this=left, expression=right) 5334 5335 return self.expression(exp.SetItem, this=this, kind=kind) 5336 5337 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5338 self._match_text_seq("TRANSACTION") 5339 characteristics = self._parse_csv( 5340 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5341 ) 5342 return self.expression( 5343 exp.SetItem, 5344 expressions=characteristics, 5345 kind="TRANSACTION", 5346 **{"global": global_}, # type: ignore 5347 ) 5348 5349 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5350 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5351 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5352 5353 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5354 index = self._index 5355 set_ = self.expression( 5356 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5357 ) 5358 5359 if self._curr: 5360 self._retreat(index) 5361 return self._parse_as_command(self._prev) 5362 5363 return set_ 5364 5365 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5366 for option in options: 5367 if self._match_text_seq(*option.split(" ")): 5368 return exp.var(option) 5369 return None 5370 5371 def _parse_as_command(self, start: Token) -> exp.Command: 5372 while self._curr: 5373 self._advance() 5374 text = self._find_sql(start, self._prev) 5375 size = len(start.text) 5376 return exp.Command(this=text[:size], expression=text[size:]) 5377 5378 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5379 settings = [] 5380 5381 self._match_l_paren() 5382 kind = self._parse_id_var() 5383 5384 if self._match(TokenType.L_PAREN): 5385 while True: 5386 key = self._parse_id_var() 5387 value = self._parse_primary() 5388 5389 if not key and value is None: 5390 break 5391 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5392 self._match(TokenType.R_PAREN) 5393 5394 self._match_r_paren() 5395 5396 return self.expression( 5397 exp.DictProperty, 5398 this=this, 5399 kind=kind.this if kind else None, 5400 settings=settings, 5401 ) 5402 5403 def _parse_dict_range(self, this: str) -> exp.DictRange: 5404 self._match_l_paren() 5405 has_min = self._match_text_seq("MIN") 5406 if has_min: 5407 min = self._parse_var() or self._parse_primary() 5408 self._match_text_seq("MAX") 5409 max = self._parse_var() or self._parse_primary() 5410 else: 5411 max = self._parse_var() or self._parse_primary() 5412 min = exp.Literal.number(0) 5413 self._match_r_paren() 5414 return self.expression(exp.DictRange, this=this, min=min, max=max) 5415 5416 def _parse_comprehension( 5417 self, this: t.Optional[exp.Expression] 5418 ) -> t.Optional[exp.Comprehension]: 5419 index = self._index 5420 expression = self._parse_column() 5421 if not self._match(TokenType.IN): 5422 self._retreat(index - 1) 5423 return None 5424 iterator = self._parse_column() 5425 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5426 return self.expression( 5427 exp.Comprehension, 5428 this=this, 5429 expression=expression, 5430 iterator=iterator, 5431 condition=condition, 5432 ) 5433 5434 def _find_parser( 5435 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5436 ) -> t.Optional[t.Callable]: 5437 if not self._curr: 5438 return None 5439 5440 index = self._index 5441 this = [] 5442 while True: 5443 # The current token might be multiple words 5444 curr = self._curr.text.upper() 5445 key = curr.split(" ") 5446 this.append(curr) 5447 5448 self._advance() 5449 result, trie = in_trie(trie, key) 5450 if result == TrieResult.FAILED: 5451 break 5452 5453 if result == TrieResult.EXISTS: 5454 subparser = parsers[" ".join(this)] 5455 return subparser 5456 5457 self._retreat(index) 5458 return None 5459 5460 def _match(self, token_type, advance=True, expression=None): 5461 if not self._curr: 5462 return None 5463 5464 if self._curr.token_type == token_type: 5465 if advance: 5466 self._advance() 5467 self._add_comments(expression) 5468 return True 5469 5470 return None 5471 5472 def _match_set(self, types, advance=True): 5473 if not self._curr: 5474 return None 5475 5476 if self._curr.token_type in types: 5477 if advance: 5478 self._advance() 5479 return True 5480 5481 return None 5482 5483 def _match_pair(self, token_type_a, token_type_b, advance=True): 5484 if not self._curr or not self._next: 5485 return None 5486 5487 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5488 if advance: 5489 self._advance(2) 5490 return True 5491 5492 return None 5493 5494 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5495 if not self._match(TokenType.L_PAREN, expression=expression): 5496 self.raise_error("Expecting (") 5497 5498 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5499 if not self._match(TokenType.R_PAREN, expression=expression): 5500 self.raise_error("Expecting )") 5501 5502 def _match_texts(self, texts, advance=True): 5503 if self._curr and self._curr.text.upper() in texts: 5504 if advance: 5505 self._advance() 5506 return True 5507 return False 5508 5509 def _match_text_seq(self, *texts, advance=True): 5510 index = self._index 5511 for text in texts: 5512 if self._curr and self._curr.text.upper() == text: 5513 self._advance() 5514 else: 5515 self._retreat(index) 5516 return False 5517 5518 if not advance: 5519 self._retreat(index) 5520 5521 return True 5522 5523 @t.overload 5524 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5525 ... 5526 5527 @t.overload 5528 def _replace_columns_with_dots( 5529 self, this: t.Optional[exp.Expression] 5530 ) -> t.Optional[exp.Expression]: 5531 ... 5532 5533 def _replace_columns_with_dots(self, this): 5534 if isinstance(this, exp.Dot): 5535 exp.replace_children(this, self._replace_columns_with_dots) 5536 elif isinstance(this, exp.Column): 5537 exp.replace_children(this, self._replace_columns_with_dots) 5538 table = this.args.get("table") 5539 this = ( 5540 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5541 ) 5542 5543 return this 5544 5545 def _replace_lambda( 5546 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5547 ) -> t.Optional[exp.Expression]: 5548 if not node: 5549 return node 5550 5551 for column in node.find_all(exp.Column): 5552 if column.parts[0].name in lambda_variables: 5553 dot_or_id = column.to_dot() if column.table else column.this 5554 parent = column.parent 5555 5556 while isinstance(parent, exp.Dot): 5557 if not isinstance(parent.parent, exp.Dot): 5558 parent.replace(dot_or_id) 5559 break 5560 parent = parent.parent 5561 else: 5562 if column is node: 5563 node = dot_or_id 5564 else: 5565 column.replace(dot_or_id) 5566 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
982 def __init__( 983 self, 984 error_level: t.Optional[ErrorLevel] = None, 985 error_message_context: int = 100, 986 max_errors: int = 3, 987 dialect: DialectType = None, 988 ): 989 from sqlglot.dialects import Dialect 990 991 self.error_level = error_level or ErrorLevel.IMMEDIATE 992 self.error_message_context = error_message_context 993 self.max_errors = max_errors 994 self.dialect = Dialect.get_or_raise(dialect) 995 self.reset()
1007 def parse( 1008 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1009 ) -> t.List[t.Optional[exp.Expression]]: 1010 """ 1011 Parses a list of tokens and returns a list of syntax trees, one tree 1012 per parsed SQL statement. 1013 1014 Args: 1015 raw_tokens: The list of tokens. 1016 sql: The original SQL string, used to produce helpful debug messages. 1017 1018 Returns: 1019 The list of the produced syntax trees. 1020 """ 1021 return self._parse( 1022 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1023 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1025 def parse_into( 1026 self, 1027 expression_types: exp.IntoType, 1028 raw_tokens: t.List[Token], 1029 sql: t.Optional[str] = None, 1030 ) -> t.List[t.Optional[exp.Expression]]: 1031 """ 1032 Parses a list of tokens into a given Expression type. If a collection of Expression 1033 types is given instead, this method will try to parse the token list into each one 1034 of them, stopping at the first for which the parsing succeeds. 1035 1036 Args: 1037 expression_types: The expression type(s) to try and parse the token list into. 1038 raw_tokens: The list of tokens. 1039 sql: The original SQL string, used to produce helpful debug messages. 1040 1041 Returns: 1042 The target Expression. 1043 """ 1044 errors = [] 1045 for expression_type in ensure_list(expression_types): 1046 parser = self.EXPRESSION_PARSERS.get(expression_type) 1047 if not parser: 1048 raise TypeError(f"No parser registered for {expression_type}") 1049 1050 try: 1051 return self._parse(parser, raw_tokens, sql) 1052 except ParseError as e: 1053 e.errors[0]["into_expression"] = expression_type 1054 errors.append(e) 1055 1056 raise ParseError( 1057 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1058 errors=merge_errors(errors), 1059 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1096 def check_errors(self) -> None: 1097 """Logs or raises any found errors, depending on the chosen error level setting.""" 1098 if self.error_level == ErrorLevel.WARN: 1099 for error in self.errors: 1100 logger.error(str(error)) 1101 elif self.error_level == ErrorLevel.RAISE and self.errors: 1102 raise ParseError( 1103 concat_messages(self.errors, self.max_errors), 1104 errors=merge_errors(self.errors), 1105 )
Logs or raises any found errors, depending on the chosen error level setting.
1107 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1108 """ 1109 Appends an error in the list of recorded errors or raises it, depending on the chosen 1110 error level setting. 1111 """ 1112 token = token or self._curr or self._prev or Token.string("") 1113 start = token.start 1114 end = token.end + 1 1115 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1116 highlight = self.sql[start:end] 1117 end_context = self.sql[end : end + self.error_message_context] 1118 1119 error = ParseError.new( 1120 f"{message}. Line {token.line}, Col: {token.col}.\n" 1121 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1122 description=message, 1123 line=token.line, 1124 col=token.col, 1125 start_context=start_context, 1126 highlight=highlight, 1127 end_context=end_context, 1128 ) 1129 1130 if self.error_level == ErrorLevel.IMMEDIATE: 1131 raise error 1132 1133 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1135 def expression( 1136 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1137 ) -> E: 1138 """ 1139 Creates a new, validated Expression. 1140 1141 Args: 1142 exp_class: The expression class to instantiate. 1143 comments: An optional list of comments to attach to the expression. 1144 kwargs: The arguments to set for the expression along with their respective values. 1145 1146 Returns: 1147 The target expression. 1148 """ 1149 instance = exp_class(**kwargs) 1150 instance.add_comments(comments) if comments else self._add_comments(instance) 1151 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1158 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1159 """ 1160 Validates an Expression, making sure that all its mandatory arguments are set. 1161 1162 Args: 1163 expression: The expression to validate. 1164 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1165 1166 Returns: 1167 The validated expression. 1168 """ 1169 if self.error_level != ErrorLevel.IGNORE: 1170 for error_message in expression.error_messages(args): 1171 self.raise_error(error_message) 1172 1173 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.