4

我目前正在构建一个 Java 反编译器。

为了辅助模式识别,我正在通过ANTLR构造一个简单的语法,并使用ANTLRWorks解释器进行调试。

以下是到目前为止的初步语法。在沿着这条路线前进时,我假设我能够将某些 JVM 字节码简化为下面的语法可以检测到的表达式。

您在这种方法中看到了哪些问题?6 月 29 日 2:36 GMT 1更新了 Ira 评论的语法

    grammar JVM;

options {k=3;}

WS  :   (' '|'\r'|'\n'|'\t')+ {$channel=HIDDEN;}
    ;
INT :   ('0'..'9')+ ;
UINT    :   ('_' INT)?;
IFEQ    :   'ifeq';
IFGE    :   'ifge';
IFGT    :   'ifgt';
IFLE    :   'ifle';
IFLT    :   'iflt';
IFNE    :   'ifne';
IFACMP_CONDTYPE :   'if_acmp' ('eq'|'ne'|'lt'|'ge'|'gt'|'le');
// THIS :   'aload_0';
LDC :   'ldc2_w'|'ldc_w'|'ldc';
LOADREFERENCE
//  :   THIS
    : 'aload' UINT;
//  | 'aload_2'
//  | 'aload_3';
DLOAD   :   'dload' UINT;
LOADINT :   'iload_0'
    |   'iload_1'
    | 'iload_2'
    | 'iload_3'
    ;
DCONST  :   'dconst' UINT;  
ICONST  :   'iconst' UINT;

goal    :   jvmStatement2+ ;

//fragment
//jvmStatement1
//  :   returnStatement
//  | newArrayStatement
//  | storeStatement
//  | assignmentStatement
//  | assertStatement
//  | invokeStatement
//  | ifStatement
//  | gotoStatement
//  ;

fragment // to test assert
jvmStatement2
    : returnStatement     // 2
    | newArrayStatement   // 3
    | storeStatement      // 4
    | invokeStatement     // 5
    | assignmentStatement // 6
    | assertStatement     // 7
    | ifStatement         // 8  
    | gotoStatement
    ;

fragment
setAssertionStatus
    :   ifStatement pushIntegerConstant
    gotoStatement pushIntegerConstant setStaticFieldInClass;

fragment
fetchFieldFromObject
    :   LOADREFERENCE 'getfield' INT;

fragment
loadDoubleFromLocalVariable
    :   DLOAD;

fragment
loadFloatFromLocalVariable
    :   'fload' UINT;

fragment
loadIntFromLocalVariable
    :   LOADINT;

fragment
loadLongFromLocalVariable
    :   'lload' UINT;   

fragment
loadReferenceFromLocalVariable
    :   'aload' UINT;

fragment
loadReferenceFromArray
    :   'aaload';

fragment
storeReference
    : storeIntoByteOrBooleanArray;  

fragment
storeReferenceIntoLocalVariable
    :   'astore' UINT;

fragment
storeDoubleIntoLocalVariable
    :   'dstore' INT;

fragment
storeFloatIntoLocalVariable
    :   'fstore' UINT;

fragment
storeIntIntoLocalVariable
    :   'istore' (INT|UINT);

fragment
storeLongIntoLocalVariable
    :   'lstore' UINT;  

fragment
storeIntoByteOrBooleanArray
    :   'bastore';

fragment
storeIntoReferenceArray
    :   'aastore';

fragment
pushNull:   'aconst_null';

fragment
pushByte:   'bipush' INT;

fragment
pushIntegerConstant
    :   ICONST;

fragment
pushDoubleConstant
    :   DCONST;

fragment
pushLongConstant
    :   'lconst' UINT;

fragment
pushFloatConstant
    :   'fconst' UINT;

fragment
pushItemFromRuntimeConstantPool
    :   LDC INT;


fragment invokeStatementArgument: constantExpr
    | createAnonymousClass;

fragment createAnonymousClass
    :   createNewObject dup thisInstance;

fragment invokeStatementArguments: invokeStatementArgument*;

fragment invokeStatement: getStaticField? invokeStatementArguments invokeMethod;    

fragment
invokeMethod
    : invokeInstanceMethod
    | invokeVirtualMethod
    | invokeStaticMethod
    ;

fragment
invokeInstanceMethod
    :   'invokespecial' INT;

fragment
invokeVirtualMethod
    :   'invokevirtual' INT;    

fragment
invokeStaticMethod
    :   'invokestatic' INT;

fragment
newArrayStatement
    :   'newarray' simpleType;

fragment
setFieldInObject
    :   'putfield' INT;

fragment setStaticFieldInClass
    :   'putstatic' INT;

fragment
simpleType
    :   ('boolean'|'byte'|'char'|'double'|'float'|'int'|'long'|'short');

fragment
returnVoid
    :    'return';
fragment
returnSimpleType
    :   returnReference
    | returnDouble
    | returnFloat
    | returnInteger
    | returnLong;

fragment
returnReference
    :    'areturn';
fragment
returnDouble
    :   'dreturn';
fragment returnFloat
    :   'freturn';
fragment returnInteger
    :   'ireturn';
fragment returnLong
    :   'lreturn';  

fragment
returnStatement
    :   returnVoid 
    | constantExpr returnSimpleType;    

fragment
dupX1
    :   'dup_x1';

fragment
dup
    :   'dup';  

fragment
storeStatement
    : storeReferenceIntoLocalVariable 
    | storeIntIntoLocalVariable
    | setStaticFieldInClass
    | storeIntoReferenceArray
    | setFieldInObject;

fragment
convertDouble
    :   convertDoubleToFloat | convertDoubleToInt | convertDoubleToLong;

fragment
convertDoubleToFloat
    :   'd2f';

fragment
convertDoubleToInt
    :   'd2i';

fragment
convertDoubleToLong
    :   'd2l';

fragment
convertFloat
    :   convertFloatToDouble|convertFloatToInt|convertFloatToLong;

fragment
convertFloatToDouble
    :   'f2d';
fragment
convertFloatToInt
    :   'f2i';
fragment
convertFloatToLong
    :   'f2l';  

fragment
convertInt
    :   convertIntToByte
    |convertIntToChar
    |convertIntToDouble
    |convertIntToFloat
    |convertIntToLong
    |convertIntToShort;

fragment
convertIntToByte
    :   'i2b';

fragment
convertIntToChar
    :   'i2c';

fragment
convertIntToDouble
    :   'i2d';

fragment
convertIntToFloat
    :   'i2f';

fragment
convertIntToLong
    :   'i2l';

fragment
convertIntToShort
    :   'i2s';

fragment
branchComparison
    :branchIfReferenceComparison
    |branchIfIntComparison
    |branchIfIntComparisonWithZero
    |branchIfReferenceNotNull
    |branchIfReferenceNull; 

fragment
branchIfReferenceComparison
    :   'if_acmp' condType;

fragment
branchIfIntComparison
    :   'if_icmp' condType INT;

fragment
branchIfIntComparisonWithZero
    :   (IFEQ|IFGE|IFGT|IFLE|IFLT|IFNE) INT;

fragment
gotoStatement
    :   'goto' INT;

fragment
ifStatementCompare
    :   (IFEQ INT)
    |   (IFNE INT);

fragment
ifStatement
    :   booleanExpression ifStatementCompare;

fragment
ifType  : 'ifeq'
 |'ifne'
 |'iflt'
 |'ifge'
 |'ifgt'
 |'ifle';

fragment
branchIfReferenceNotNull
    :   'ifnonnull' ;

fragment
branchIfReferenceNull
    :   'ifnull';

fragment
condType:   'eq'
 |'ne'
 |'lt'
 |'ge'
 |'gt'
 |'le';

fragment
checkCast
    :   'checkcast' INT;

fragment
createNewArrayOfReference
    :   constantExpr 'anewarray' INT;

fragment
createNewObject
    :   'new' INT;

fragment
assignmentStatement
//  : pushItemFromRuntimeConstantPool storeStatement
    : (constantExpr)+ storeStatement
    | invokeInheritedConstructor
    | expressionStatement
//  | setAssertionStatus
    ;

fragment
invokeInheritedConstructor
    :   loadReferenceFromLocalVariable invokeInstanceMethod;

fragment
throwExceptionOrError
    :   'athrow';

fragment
getStaticField
    :   'getstatic' INT;

fragment
newInstance
    :   'new' INT;

fragment // this needs to be extended to recognize more patterns
booleanExpression
    :   integerComparison
    | loadIntFromLocalVariable
    | invokeMethod;

fragment
integerComparison
    : loadIntFromLocalVariable loadIntFromLocalVariable branchIfIntComparison;  

fragment assertIfAssertEnabled: getStaticField branchIfIntComparisonWithZero;

fragment assertCondition:booleanExpression branchIfIntComparisonWithZero;

fragment assertThrow:createNewObject dup assertMessage throwExceptionOrError;

fragment assertMessage:pushItemFromRuntimeConstantPool invokeMethod;

fragment assertStatement:assertIfAssertEnabled assertCondition assertThrow;


fragment
stringPlusNumber
    :pushItemFromRuntimeConstantPool invokeMethod 
 loadReferenceFromLocalVariable invokeMethod invokeMethod invokeMethod;

fragment expressionStatement:   statementExpression;

fragment
statementExpression 
    :   preIncrementExpression
    | preDecrementExpression
//  | postIncrementExpression
//  | postDecrementExpression
    | newByteArray
    | ternaryExpression
    | createAndStoreObject // assignment expression
    | createNewArrayStatement
    | fetchFieldFromObject
    ;

fragment
createNewArrayStatement // with elements
    :   createNewArrayOfReference createNewArrayInitElement+;

createNewArrayInitElement
    : (dup constantExpr getStaticField storeStatement);

fragment
createAndStoreObject
    :   createNewObject dup invokeStatement storeStatement;

fragment ternaryExpression // doesn't cover all situations yet
    : loadIntFromLocalVariable ifStatementCompare loadIntFromLocalVariable gotoStatement
    loadIntFromLocalVariable storeStatement;    

fragment preIncrementExpression: preIncrementInteger;

fragment preDecrementExpression: preDecrementFloat|preDecrementLong|preDecrementDouble; 

fragment doubleExpression: pushDoubleConstant;

fragment integerExpression: pushIntegerConstant;

fragment longExpression: pushLongConstant;

fragment floatExpression: pushFloatConstant;

fragment preIncrementInteger: loadReferenceFromLocalVariable dup fetchFieldFromObject integerExpression 
    iAdd dupX1? setFieldInObject;

fragment preDecrementDouble: loadDoubleFromLocalVariable doubleExpression dSub storeDoubleIntoLocalVariable;

fragment preDecrementLong: loadLongFromLocalVariable longExpression lSub storeLongIntoLocalVariable;

fragment preDecrementFloat: loadFloatFromLocalVariable floatExpression fSub storeFloatIntoLocalVariable;

fragment newByteArray: newByteArrayWithNull|newByteArrayWithData;

// byte[] b = {'c', 'h', 'u', 'a'};
fragment newByteArrayWithData:  constantExpr newArrayStatement byteArrayElements;

fragment byteArrayElements: constantExpr constantExpr storeIntoByteOrBooleanArray;  

fragment constantExpr: 
    //loadReferenceFromLocalVariable
    LOADREFERENCE
    |loadDoubleFromLocalVariable
    |loadFloatFromLocalVariable
    |loadIntFromLocalVariable
    |loadLongFromLocalVariable
    |pushByte
    |pushDoubleConstant
    |pushFloatConstant
    |pushIntegerConstant
    |pushItemFromRuntimeConstantPool
    |pushLongConstant
    |pushNull
    |fetchFieldFromObject
    ;

// byte[] c = null;
// String s = null;
fragment newByteArrayWithNull: pushNull (checkCast)? storeReference;

fragment thisInstance:  LOADREFERENCE invokeMethod;

fragment ternaryOperator
    :   ifStatementCompare pushIntegerConstant gotoStatement pushIntegerConstant setStaticFieldInClass;

fragment floatMultiply
    :   constantExpr constantExpr dMul;

fragment iAdd: 'iadd';      
fragment dSub: 'dsub';
fragment fSub: 'fsub';
fragment lSub: 'lsub';
fragment lAdd: 'ladd';  
fragment dMul: 'dmul';

例如,当前的语法(上述的进一步演变)可以转向

getstatic 25
ifne 25
iload_1
iload_2
if_icmpgt 25
new 25
dup
invokespecial 44
athrow
return

进入

在此处输入图像描述

4

2 回答 2

2

如果您只想识别单个 JVM 指令,那么语法可能没问题。您可能会花时间摆弄语法以获取正确的细节。这可能是简单的矫枉过正。字节操作码驱动的有限状态自动机 (FSA) 实现为巨型案例语句可能更容易;毕竟,JVM 指令应该很容易解码,以便半快速解释器可以执行这些指令。

根据模糊的回忆,类文件中还有其他部分(表格,例如文字)。您可能也可以使用解析器识别它们,但也可能会矫枉过正。

识别指令/表格信息后,您有第二个问题;解析器生成器倾向于帮助您构建某种 AST。这些指令不是 AST;它们至少是一个线性链,如果包含跳转目标,它们会形成一个引用表格的图表。所以我怀疑你最终会努力获得语义操作来以你想要的方式收集数据。

它是您可能想要捕获的图表。如果图具有某种层次结构(源自结构化编程语言),您可能希望发现该层次结构。解析器方法在这里没有任何贡献。

于 2012-06-20T13:17:32.710 回答
1

这种方法存在识别参数嵌套的问题。

例如,给定声明,

int func1(int x, int y, int z) {
    return 0;
}

int func0() {
    return 0;
}

和电话

Object[] x = new Object[func1(2, 3, 4)];
x = new Object[func0()];
x = new Object[func1(func1(func1(0, 1, 2), 3, 4), 5, 6)];

它生成以下字节码:

Offset  Instruction       Comments (Method: none)
0       aload_0           (cheewee.helloworld.test000031_applet this)
1       iconst_2
2       iconst_3
3       iconst_4
4       invokevirtual 79  (cheewee.helloworld.test000031_applet.func1)
7       anewarray 81      (java.lang.Object)
10      astore_1          (java.lang.Object[] x)
11      aload_0           (cheewee.helloworld.test000031_applet this)
12      invokevirtual 83  (cheewee.helloworld.test000031_applet.func0)
15      anewarray 81      (java.lang.Object)
18      astore_1          (java.lang.Object[] x)
19      aload_0           (cheewee.helloworld.test000031_applet this)
20      aload_0           (cheewee.helloworld.test000031_applet this)
21      aload_0           (cheewee.helloworld.test000031_applet this)
22      iconst_0
23      iconst_1
24      iconst_2
25      invokevirtual 79  (cheewee.helloworld.test000031_applet.func1)
28      iconst_3
29      iconst_4
30      invokevirtual 79  (cheewee.helloworld.test000031_applet.func1)
33      iconst_5
34      bipush 6
36      invokevirtual 79  (cheewee.helloworld.test000031_applet.func1)
39      anewarray 81      (java.lang.Object)
42      astore_1          (java.lang.Object[] x)
43      return

它无法检测到涉及嵌套。我不确定这是否是 ANTLR 的限制,或者这是否是我学习如何编写 ANTLR 语法的限制。

下一步将使用混合方法,首先将字节码组简化为标记(以便将它们识别为更简单的模式),然后将其传递给解析器以检测更高级别的模式。

于 2012-06-28T10:14:16.767 回答