Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
smartnoise-sql / snsql / sql / parser / SqlSmall.g4
Size: Mime:
grammar SqlSmall;

batch :
    unifiedQuery (';' unifiedQuery)* (';')? EOF
    ;

basicQuery :
      selectClause
      fromClause?
      whereClause?
      aggregationClause?
      havingClause?
      orderClause?
      limitClause?
    ;

unifiedQuery:
    basicQuery queryOperations*
    | '(' basicQuery ')' queryOperations*
    ;

queryOperations
    : (UNION ALL? | INTERSECT ALL? | EXCEPT ALL?) basicQuery
    | (UNION ALL? | INTERSECT ALL? | EXCEPT ALL?) '(' basicQuery ')'
    ;

selectClause
    : (WITH namedSubquerySeq) ? SELECT (setQuantifier)? namedExpressionSeq
    ;

fromClause : FROM relation (',' relation)*;

whereClause
    : WHERE booleanExpression
    ;

aggregationClause
    : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)*
    ;

havingClause
    : HAVING booleanExpression
    ;

orderClause
    : (ORDER BY order+=sortItem (',' order+=sortItem)*)
    ;

limitClause : LIMIT n=number (OFFSET offset=number)?;

topClause : TOP ('(')? n=number (')')?;

joinRelation
    : (joinType) JOIN right=relationPrimary joinCriteria?
    ;

joinType
    : INNER?
    | CROSS
    | LEFT OUTER?
    | LEFT? SEMI
    | RIGHT OUTER?
    | FULL OUTER?
    | LEFT? ANTI
    ;

joinCriteria
    : ON booleanExpression                          #booleanJoin
    | USING '(' identifier (',' identifier)* ')'    #usingJoin
    ;

sortItem
    : expression ordering=(ASC | DESC)?
    ;

setQuantifier
    : DISTINCT
    | ALL
    | (topClause)
    ;

relation
    : relationPrimary joinRelation*
    ;

relationPrimary
    : qualifiedTableName (AS? alias=identifier)? tablesample? #table
    | '(' unifiedQuery ')' (AS? alias=identifier)?            #aliasedQuery
    | '(' relation ')' (AS alias=identifier)?     #aliasedRelation
    | unnestOperator #unnest
    ;

tablesample: TABLESAMPLE (BERNOULLI|SYSTEM) '(' (p=number|n=number ROWS) ')' (REPEATABLE '(' seed=number ')')?;

unnestOperator: UNNEST '(' qualifiedTableName ')' (AS? alias=identifier)?;

caseExpression
    : CASE baseCaseExpr=expression (whenBaseExpression)+ (ELSE elseExpr=expression)? END #caseBaseExpr
    | CASE (whenExpression)+ (ELSE elseExpr=expression)? END #caseWhenExpr
    ;

namedExpression
    : expression (AS name=identifier)?
    ;

namedExpressionSeq
    : namedExpression (',' namedExpression)*
    ;

namedSubquery
    : name=identifier ('(' identifier (',' identifier)* ')' )?  AS '(' unifiedQuery ')'
    ;

namedSubquerySeq
    : namedSubquery (',' namedSubquery)*;

whenExpression : (WHEN baseBoolExpr=booleanExpression THEN thenExpr=expression);
whenBaseExpression : (WHEN baseWhenExpr=expression THEN thenExpr=expression);

expression
    : name=qualifiedColumnName                              #columnName
    | left=expression op=ASTERISK right=expression          #multiply
    | left=expression op=SLASH right=expression             #divide
    | left=expression op='%' right=expression               #modulo
    | left=expression op=PLUS right=expression              #add
    | left=expression op=MINUS right=expression             #subtract
    | caseExpression                                        #caseExpr
    | castExpression                                        #castExpr
    | allExpression                                         #allExpr
    | literal                                               #literalExpr
    | rankingFunction                                       #rankFunction
    | functionExpression                                    #functionExpr
    | '(' unifiedQuery ')'                                  #subqueryExpr
    | '(' expression ')'                                    #nestedExpr
    | stringFunction                                        #stringFunc
    | dateTimeFunction                                      #dateTimeFunc
    ;


predicate
    : NOT? kind=BETWEEN lower=expression AND upper=expression #betweenCondition
    | NOT? kind=IN '(' expression (',' expression)* ')' #inCondition
    | IS NOT? kind=(NULL | TRUE | FALSE) #isCondition
    ;

functionExpression
    : bareFunction              #bareFunc
    | roundFunction             #roundFunc
    | powerFunction             #powerFunc
    | truncFunction             #truncFunc
    | function=aggregateFunctionName '(' setQuantifier? expression ')' #aggFunc
    | function=mathFunctionName '(' expression ')' #mathFunc
    | IIF '(' test=booleanExpression ',' yes=expression ',' no=expression ')' #iifFunc
    | CHOOSE '(' index=expression (',' literal)+ ')' # chooseFunc
    | leadFunction             #leadFunc
    | greatestFunction         #greatestFunc
    | leastFunction             #leastFunc
    | function=identifier '(' userDefinedArgument (',' userDefinedArgument)* ')'    #userDefFunc
    | NOT '(' booleanExpression ')'   #notFunc
    ;

userDefinedArgument
    : expression  #exprArg
    | datePart    #datePartArg
    | dbType      #typeArg
    ;


booleanExpression
    : NOT booleanExpression   #logicalNot
    | left=expression op=comparisonOperator right=expression  #comparison
    | left=booleanExpression AND right=booleanExpression #conjunction
    | left=booleanExpression OR right=booleanExpression #disjunction
    | '(' booleanExpression ')' #nestedBoolean
    | expression predicate #predicated
    | name=qualifiedColumnName #boolColumn
    | value=expression LIKE pattern=expression # likeCondition
    | value=expression ILIKE pattern=expression # iLikeCondition
    | regexpContainsFunction # bigqueryRegexpContainsCondition
    ;

castExpression: CAST '(' fromExpr=expression AS dbType ')';

dbType
    : INTEGER
    | FLOAT
    | NUMERIC
    | BOOLEAN
    | TIMESTAMP
    | DATE
    | TIME
    | variableString
    | fixedString
    | TEXT
    | bigqueryString
    | ARRAY
    | variableBinary
    ;

bigqueryString: STRING;
variableString: (VARCHAR | NVARCHAR) ( '(' varCharLength=(INTEGER_VALUE | MAX) ')')?;
fixedString: (CHAR | NCHAR) ( '(' fixedCharLength=INTEGER_VALUE ')')?;
variableBinary: VARBINARY ( '(' varBinLength=(INTEGER_VALUE | MAX) ')')?;

bareFunction : function=bareFunctionName '(' ')';

rankingFunction: function=rankingFunctionName  '(' ')' overClause;

leadFunction
    : LEAD '(' sourceExpr=expression (',' offset=expression (',' default=expression)? )? ')' overClause
    ;

comparisonOperator
    : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ
    ;

booleanValue
    : TRUE | FALSE
    ;

allExpression
    : ASTERISK
    | identifier '.' ASTERISK
    | QN2 '.' ASTERISK
    ;

literal
    : QUOTED_STRING    #stringLiteral
    | RAW_STRING    #rawStringLiteral
    | number    #numberLiteral
    | TRUE      #trueLiteral
    | FALSE     #falseLiteral
    | NULL      #nullLiteral
    ;

// date and time functions
dateTimeFunction
    : getTime                                               #currentTimeFunc
    | extractFunction                                       #extractFunc
    | dayNameFunction                                       #dayNameFunc
    | fromUnixTimeFunction                                  #fromUnixTimeFunc
    | unixTimeStampFunction                                 #unixTimeStampFunc
    | dateFormatFunction                                    #dateFormatFunc
    | formatDateFunction                                    #formatDateFunc
    | quarterFunction                                       #quarterFunc
    | dowFunction                                           #dowFunc
    | datetimeDiffFunction                                  #datetimeDiffFunc
    | datePartFunction                                      #datePartFunc
    | dateFunction                                          #dateFunc
    ;


extractFunction: EXTRACT '(' datePart FROM sourceExpr=expression ')';
dayNameFunction: DAYNAME '(' expr=expression ')';
fromUnixTimeFunction: FROM_UNIXTIME '(' unix_timestamp=expression (',' formatDate=expression)? ')';
unixTimeStampFunction: UNIX_TIMESTAMP '(' (expr=expression)? ')';
dateFormatFunction: DATE_FORMAT '(' expr=expression ',' formatDate=expression ')';
formatDateFunction: FORMAT_DATE '('formatDate=expression ',' expr=expression ')';
quarterFunction: QUARTER '(' expr=expression ')';
dowFunction: (DOW | DAYOFWEEK) '(' expr=expression ')';
dayOfWeekFunction: DAYOFWEEK '(' expr=expression ')';
datetimeDiffFunction: DATETIME_DIFF '(' order_date=expression ',' ship_date=expression ',' datePart ')';
datePartFunction: DATEPART '('  datePart ',' interval=expression ')';
dateFunction: DATE '(' expr=expression ')';

datePart
    :
    | NANOSECOND
    | MICROSECOND
    | MILLISECOND
    | SECOND
    | MINUTE
    | HOUR
    | DAY
    | ISOWEEK
    | WEEKDAY
    | WEEK
    | DAYOFWEEK
    | MONTH
    | QUARTER
    | YEAR
    | ISOYEAR
    | EPOCH
    ;

getTime
    : CURRENT_DATE
    | CURRENT_TIME
    | CURRENT_TIMESTAMP
    ;

// string functions
stringFunction
    : stringUpper
    | stringLower
    | stringConcat
    | coalesceFunction
    | trimFunction
    | substringFunction
    | positionFunction
    | charLengthFunction
    | substringBigqueryFunction
    | regexpContainsFunction
    | regexpExctractFunction
    | regexpReplaceFunction
    | encodeFunction
    | decodeFunction
    | unhexFunction
    | hexFunction
    | regexpSplitToTableFunction
    ;

stringUpper: UPPER '(' sourceExpr=expression ')';
stringLower: LOWER '(' sourceExpr=expression ')';
stringConcat: CONCAT '(' expression (',' expression)* ')';
coalesceFunction: COALESCE '(' expression (',' expression)* ')';
trimFunction: TRIM '(' sourceExpr=expression ')';
substringFunction: SUBSTRING '(' sourceExpr=expression (FROM | ',') startIdx=expression ((FOR | ',') length=expression)? ')'; // postgres
positionFunction: POSITION '(' searchString=expression IN sourceString=expression ')';
charLengthFunction: (CHAR_LENGTH | LENGTH) '(' sourceString=expression ')';
encodeFunction: ENCODE '(' sourceString=expression ',' formatString=expression ')';
decodeFunction: DECODE '(' sourceString=expression ',' formatString=expression ')';
unhexFunction: (UNHEX | FROM_HEX) '(' sourceString=expression ')';
hexFunction: (HEX | TO_HEX) '(' sourceString=expression ')';
regexpSplitToTableFunction: REGEXP_SPLIT_TO_TABLE '(' sourceString=expression ',' pattern=expression (',' flag=QUOTED_STRING)? ')';


// bigquery functions
substringBigqueryFunction: SUBSTR '(' sourceExpr=expression ',' position=expression ( ',' length=expression)? ')';
regexpContainsFunction: REGEXP_CONTAINS '(' sourceExpr=expression ',' regExpr=expression ')';
regexpExctractFunction: (REGEXP_EXTRACT | REGEXP_SUBSTR) '(' sourceExpr=expression ',' regExpr=expression (',' position=expression (',' occurrence=expression)? )? ')';
regexpReplaceFunction: (REGEXP_REPLACE) '(' sourceExpr=expression ',' regExpr=expression ',' replacementExpr=expression ')';

// numeric functions
truncFunction: (TRUNC | TRUNCATE) '(' sourceExpr=expression (',' digits=number)? ')';
roundFunction : ROUND '(' expression (',' digits=number)? ')';
powerFunction : POWER '(' expExpr=expression ',' powerExpr=expression ')';
mathFunctionName : ABS | CEIL | CEILING | FLOOR | SIGN | SQRT | SQUARE | EXP | LN | LOG | LOG10 | LOG2 | SIN | COS | TAN | ASIN | ACOS | ATAN | ATANH  | DEGREES;
bareFunctionName : PI | RANDOM | RAND | NEWID;

greatestFunction: GREATEST '(' expression (',' expression)* ')';
leastFunction: LEAST '(' expression (',' expression)* ')';

rankingFunctionName : ROW_NUMBER | RANK | DENSE_RANK;

aggregateFunctionName : COUNT | SUM | AVG | VAR | VARIANCE | STD | STDDEV | STDEV | MIN | MAX | PERCENTILE_DISC | PERCENTILE_CONT;
partitionByClause : PARTITION BY expression (',' expression)*;
overClause : OVER '(' (partitionByClause)? (orderClause)? ')';

aliasedSubquery : '(' unifiedQuery ')' (AS alias=identifier)?;

aliasedTableOrSubquerySeq : (aliasedTableName | aliasedSubquery) (',' (aliasedTableName | aliasedSubquery))*;

aliasedTableSeq : aliasedTableName (',' aliasedTableName)*;

aliasedTableName : qualifiedTableName (AS alias=identifier)?;

qualifiedTableName : QNn | QN2 | IDENT;

qualifiedColumnName
    : QNn
    | QN2
    | IDENT
    | '"' QUOTED_STRING '"'
    ;

identifier: IDENT;

number
    : MINUS? DECIMAL_VALUE            #decimalLiteral
    | MINUS? INTEGER_VALUE            #integerLiteral
    ;

ABS: A B S;
ACOS: A C O S;
ALL: A L L;
AND: A N D;
ANTI: A N T I;
AS: A S;
ASC: A S C;
ASIN: A S I N;
ATAN: A T A N;
ATANH: A T A N H;
ARRAY: A R R A Y;
AVG: A V G;
BETWEEN: B E T W E E N;
BERNOULLI: B E R N O U L L I;
BOOLEAN: B O O L E A N;
BY: B Y;
CASE: C A S E;
CAST: C A S T;
CHAR: C H A R;
CHAR_LENGTH: C H A R UNDERSCORE L E N G T H;
CEIL: C E I L;
CEILING: C E I L I N G;
CHOOSE: C H O O S E;
COALESCE: C O A L E S C E;
CONCAT: C O N C A T;
COS: C O S;
COT: C O T;
COUNT: C O U N T;
CROSS: C R O S S;
CURRENT_DATE: C U R R E N T UNDERSCORE D A T E;
CURRENT_TIME: C U R R E N T UNDERSCORE T I M E;
CURRENT_TIMESTAMP: C U R R E N T UNDERSCORE T I M E S T A M P;
DATE: D A T E;
DATE_FORMAT: D A T E '_' F O R M A T;
DATEPART: D A T E P A R T;
DATETIME_DIFF: D A T E T I M E '_' D I F F;
DAY: D A Y;
DAYNAME: D A Y N A M E;
DAYOFWEEK: D A Y O F W E E K;
DECODE: D E C O D E;
DEGREES: D E G R E E S;
DENSE_RANK: D E N S E '_' R A N K;
DESC: D E S C;
DISTINCT: D I S T I N C T;
DIV: D I V;
DOW: D O W;
ELSE: E L S E;
ENCODE: E N C O D E;
END: E N D;
EPOCH: E P O C H;
EXP: E X P;
EXCEPT: E X C E P T;
EXTRACT: E X T R A C T;
FALSE: F A L S E;
FLOAT: F L O A T;
FLOOR: F L O O R;
FOR: F O R;
FORMAT_DATE: F O R M A T '_' D A T E;
FROM: F R O M;
FROM_HEX: F R O M '_' H E X;
FROM_UNIXTIME: F R O M '_' U N I X T I M E;
FULL: F U L L;
GREATEST: G R E A T E S T;
GROUP: G R O U P;
HAVING: H A V I N G;
HEX: H E X;
HOUR: H O U R;
IF: I F;
IIF: I I F;
ILIKE: I L I K E;
IN: I N;
INNER: I N N E R;
INTEGER: I N T E G E R;
INTERSECT: I N T E R S E C T;
IS: I S;
ISOWEEK: I S O W E E K;
ISOYEAR: I S O Y E A R;
JOIN: J O I N;
LEAD: L E A D;
LEAST: L E A S T;
LEFT: L E F T;
LENGTH: L E N G T H;
LIMIT: L I M I T;
LIKE: L I K E;
LN: L N;
LOG: L O G;
LOG10: L O G '1' '0';
LOG2: L O G '2';
LOWER: L O W E R;
MAX: M A X;
MICROSECOND: M I C R O S E C O N D;
MILLISECOND: M I L L I S E C O N D;
MIN: M I N;
MINUTE: M I N U T E;
MONTH: M O N T H;
NANOSECOND: N A N O S E C O N D;
NCHAR: N C H A R;
NEWID: N E W I D;
NOT: N O T;
NULL: N U L L;
NUMERIC: N U M E R I C;
OFFSET: O F F S E T;
ON: O N;
OR: O R;
ORDER: O R D E R;
OUTER: O U T E R;
OVER: O V E R;
PARTITION: P A R T I T I O N;
PERCENTILE_CONT: P E R C E N T I L E '_' C O N T;
PERCENTILE_DISC: P E R C E N T I L E '_' D I S C;
PI: P I;
POSITION: P O S I T I O N;
POWER: P O W E R;
QUARTER: Q U A R T E R;
RAND: R A N D;
RANDOM: R A N D O M;
RANK: R A N K;
REGEXP_CONTAINS: R E G E X P '_' C O N T A I N S;
REGEXP_EXTRACT: R E G E X P '_' E X T R A C T;
REGEXP_SUBSTR: R E G E X P '_' S U B S T R;
REGEXP_REPLACE: R E G E X P '_' R E P L A C E;
REGEXP_SPLIT_TO_TABLE: R E G E X P '_' S P L I T '_' T O '_' T A B L E;
REPEATABLE: R E P E  A T A B L E;
RIGHT: R I G H T;
ROUND: R O U N D;
ROW_NUMBER: R O W '_' N U M B E R;
ROWNUM: R O W N U M;
ROWS: R O W S;
SECOND: S E C O N D;
SELECT: S E L E C T;
SEMI: S E M I;
SIGN: S I G N;
SIN: S I N;
SORT: S O R T;
SQL: S Q L;  // reserved
SQRT: S Q R T;
SQUARE: S Q U A R E;
STD: S T D;
STDDEV: S T D D E V;
STDEV: S T D E V;
STRING: S T R I N G;
SUBSTR: S U B S T R;
SUBSTRING: S U B S T R I N G;
SUM: S U M;
SYSTEM: S Y S T E M;
TAN: T A N;
TABLESAMPLE: T A B L E S A M P L E;
TEXT: T E X T;
THEN: T H E N;
TIME: T I M E;
TIMESTAMP: T I M E S T A M P;
TO_HEX: T O '_' H E X;
TOP: T O P;
TRIM: T R I M;
TRUE: T R U E;
TRUNC: T R U N C;
TRUNCATE: T R U N C A T E;
TZOFFSET: T Z O F F S E T;
UNNEST: U N N E S T;
UNHEX: U N H E X;
UNION: U N I O N; // reserved
UNIX_TIMESTAMP: U N I X '_' T I M E S T A M P;
UPPER: U P P E R;
USING: U S I N G;
VAR: V A R;
VARBINARY: V A R B I N A R Y;
VARCHAR: V A R C H A R;
NVARCHAR: N V A R C H A R;
VARIANCE: V A R I A N C E;
WEEKDAY: W E E K D A Y;
WEEK: W E E K;
WHEN: W H E N;
WHERE: W H E R E;
WITH: W I T H;
YEAR: Y E A R;

EQ  : '=' | '==';
NSEQ: '<=>';
NEQ : '<>';
NEQJ: '!=';
LT  : '<';
LTE : '<=' | '!>';
GT  : '>';
GTE : '>=' | '!<';

PLUS: '+';
MINUS: '-';
ASTERISK: '*';
SLASH: '/';
PERCENT: '%';
TILDE: '~';
AMPERSAND: '&';
PIPE: '|';
CONCAT_PIPE: '||';
HAT: '^';
UNDERSCORE: '_';


/*
    Standard Lexer stuff
*/
QUOTED_STRING
    : '\'' ( ~('\''|'\\')
    | ('\\' .) )* '\''
    ;
//    | '"' ( ~('"'|'\\') | ('\\' .) )* '"';
RAW_STRING
    : 'r'+QUOTED_STRING
    | 'R'+QUOTED_STRING
    ;

INTEGER_VALUE
    : DIGIT+
    ;

DECIMAL_VALUE
    : DIGIT+ EXPONENT
    | DECIMAL_DIGITS EXPONENT?
    ;


QN2 : IDENT '.' IDENT;

QNn : IDENT '.' IDENT ('.' IDENT)*;

IDENT: IDENTIFIER | ESCAPED_IDENTIFIER;

IDENTIFIER_UNICODE : [a-zA-Z_\u00A1-\uFFFF][a-zA-Z_\u00A1-\uFFFF0-9$]*;

IDENTIFIER
    : LETTER+ (LETTER | DIGIT | '_')*
    | DOUBLEQ_STRING_LITERAL
    | IDENTIFIER_UNICODE
    ;

ESCAPED_IDENTIFIER
    : '[' (LETTER | DIGIT | '_' | ' ' | '-')*? ']'
    | '"' (LETTER | DIGIT | '_' | ' ' | '-')*? '"'
    | '`' (LETTER | DIGIT | '_' | ' ' | '-')*? '`'
    ;

LETTER : (UCASE | LCASE);
DOUBLEQ_STRING_LITERAL : DQUOTA_STRING;

fragment DQUOTA_STRING : '"' ( '\\'. | '""' | ~('"' | '\\') )* '"';
fragment DECIMAL_DIGITS
    : DIGIT+ '.' DIGIT*
    ;

fragment EXPONENT
    : E [+-]? DIGIT+
    ;

fragment DIGIT
    : [0-9]
    ;

fragment UCASE: [A-Z];
fragment LCASE: [a-z];

fragment A : [aA];
fragment B : [bB];
fragment C : [cC];
fragment D : [dD];
fragment E : [eE];
fragment F : [fF];
fragment G : [gG];
fragment H : [hH];
fragment I : [iI];
fragment J : [jJ];
fragment K : [kK];
fragment L : [lL];
fragment M : [mM];
fragment N : [nN];
fragment O : [oO];
fragment P : [pP];
fragment Q : [qQ];
fragment R : [rR];
fragment S : [sS];
fragment T : [tT];
fragment U : [uU];
fragment V : [vV];
fragment W : [wW];
fragment X : [xX];
fragment Y : [yY];
fragment Z : [zZ];

SIMPLE_COMMENT
    : '--' ~[\r\n]* '\r'? '\n'? -> skip
    ;

BRACKETED_EMPTY_COMMENT
    : '/**/' -> skip
    ;

BRACKETED_COMMENT
    : '/*' ~[+] .*? '*/' -> skip
    ;


WS : [ \t\r\n]+ -> skip ;

SPACE : [ \t]+ -> skip ;