1

我正在开发一个 sql 解析器,以允许我们的程序员创建一个类似 mysql 的查询,然后根据您的需要自动转换为 mssql 或 mysql 查询。这允许在 mysql 和 mssql 数据库之间灵活转换。

我目前有我打算创建的东西,只是有一个问题。

如果我运行这样的查询:

SELECT * FROM `x`.`y` WHERE `y`.`b` = 'hello'

字符串被视为标识符 llo

这是我的语法

grammar Query;

query
    : (select_stmt|update_stmt|delete_stmt|insert_stmt|upsert_stmt) ';'? EOF
    ;

select_stmt
    : select 'FROM' select_table_exp where? groupby? orderby? limit?
    ;

update_stmt
    : 'UPDATE' table_exp set where?
    ;

delete_stmt
    : 'DELETE FROM ' table_exp where    
    ;

insert_stmt
    : 'INSERT INTO' table_exp fields values
    ;

upsert_stmt
    : 'UPSERT' table_exp set ?  
    ;

values
    : 'VALUES (' valuelist ')'
    ;

valuelist
    : value (',' valuelist)?where
    ;

fields
    : '(' groupbylist ')'
    ;

set
    : 'SET' (setlist)
    ;

setlist
    : setters (',' setlist)?
    ;

select
    : 'SELECT' fieldlist
    ;

where
    : 'WHERE' where_stmt 
    ;

limit
    : 'LIMIT' (INT|param) (',' (INT|param))?
    ;

groupby
    : 'GROUP BY' groupbylist    
    ;

orderby
    : 'ORDER BY' orderbylist
    ;

select_table_exp
    : table_exp (join_stmt)?    
    | '(' select_stmt ')' as
    ;

table_exp
    : identifier as?
    ;

fieldlist
    : 
      (
          function as
        | (identifier as?)
        | identifier_prefix? '*' // solve the problem of the `ID`.* problem
        | '(' select_stmt ')' as
      ) (',' fieldlist)?
    ;

function
    : 'COUNT(' (identifier|'*') ')' 
    | 'MAX(' identifier ')'
    | 'MIN(' identifier ')'
    | 'AVERAGE(' identifier ')'
    | 'CONCAT(' groupbylist ')'
    | 'CONCAT_WS(' CHAR ',' groupbylist ')' 
    ;

orderbylist
    : identifier ('ASC'|'DESC')? (',' orderbylist)? 
    ;

groupbylist
    : identifier (',' groupbylist)? 
    ;

as
    : 'AS' (ID | '`' ID '`')    
    ;

join_stmt
    : leftjoin (join_stmt)?
    | rightjoin (join_stmt)?
    | join (join_stmt)?
    ;

leftjoin
    : 'LEFT JOIN' (table_exp|('(' select_stmt ')' as)) on_stmt
    ;

rightjoin
    : 'RIGHT JOIN' (table_exp|('(' select_stmt ')' as)) on_stmt
    ;

join
    : 'JOIN' (table_exp|('(' select_stmt ')' as)) on_stmt
    ;

on_stmt
    : 'ON' where_stmt
    ;

where_stmt
    : where_exp ((BOOLEANAND|BOOLEANOR) where_stmt)?
    ;

where_exp
    : where_exp_identifier
    | value (WHEREOPERATORS|EQUALITYOPERATOR) identifier
    | '(' select_stmt ')' (WHEREOPERATORS|EQUALITYOPERATOR) identifier
    ;

where_exp_identifier
    : identifier (WHEREOPERATORS|EQUALITYOPERATOR) where_exp_identifier_operator_right
    | identifier 'BETWEEN' (numeric_value BOOLEANAND numeric_value)
    | identifier 'LIKE' string_value
    ;

where_exp_identifier_operator_right
    : value
    | identifier
    | '(' select_stmt ')'
    ;

setters
    : identifier EQUALITYOPERATOR value
    | value EQUALITYOPERATOR identifier
    | identifier EQUALITYOPERATOR '(' select_stmt ')'
    | '(' select_stmt ')' EQUALITYOPERATOR identifier
    ;

identifier_value
    : value
    | identifier
    ;

value
    : string_value
    | numeric_value
    | param
    ;

string_value
    : STRING
    | CHAR
    ;

numeric_value
    : INT
    | FLOAT
    ;

identifier_prefix
    : ID '.'
    | '`' ID '`.'   
    ;

identifier
    : identifier_prefix? ID 
    | identifier_prefix? '`' ID '`'
    ;

param
    : '@' ID
    ;

BOOLEANAND
    : 'AND'
    ;

BOOLEANOR
    : 'OR'
    ;

EQUALITYOPERATOR
    : '='
    ;

WHEREOPERATORS
    : ('!='|'<>'|'<'|'>'|'<='|'>=')
    ;

ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
    ;

INT :   '0'..'9'+
    ;

FLOAT
    :   ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
    |   '.' ('0'..'9')+ EXPONENT?
    |   ('0'..'9')+ EXPONENT
    ;

COMMENT
    :   '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    |   '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
    ;

WS  :   ( ' '
        | '\t'
        | '\r'
        | '\n'
        ) {$channel=HIDDEN;}
    ;

STRING
    :  '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
    ;

CHAR:  '\'' ( ESC_SEQ | ~('\''|'\\') ) '\''
    ;

fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;

fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;

fragment
ESC_SEQ
    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    |   UNICODE_ESC
    |   OCTAL_ESC
    ;

fragment
OCTAL_ESC
    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7')
    ;

fragment
UNICODE_ESC
    :   '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
    ;

我离 ANTLR 专家还很远,所以在摆弄我的语法 3 小时后,试图让语法在那个位置接受一个正确的字符串,我决定向你们寻求帮助。也许这里的一些antlr大师可以帮助我解决这个问题。

非常感谢

4

1 回答 1

0

您的输入'hello'没有被标记为字符串,因为您定义了一个用双引号括起来的字符串。Achar在语法中被单引号包围:

STRING
    :  '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
    ;

CHAR:  '\'' ( ESC_SEQ | ~('\''|'\\') ) '\''
    ;

这就是错误的原因。

于 2012-08-11T19:53:25.193 回答