我想解析一个ASCIIMath
表达式并将其转换为MathML
. 但是,结果不用于在网页上显示,所以我不能使用 MathJax 或本机解析器(它们在 js 中)。我使用的语言是 PHP。有一个ASCIIMathMLPHP但它已经过时了,也不完全适合我的需要;例如,不支持绝对值符号。
我在解析 (and RegEx
) 方面完全没有经验,所以我可能一开始就走错了路。我的方法是使用 aRegEx
来提取表达式的标记,遍历它们并继续使用相同的方法分解它们,RegEx
直到我不能,最后将这些最小的标记附加到 a DOMNode
,最终附加到 aDOMDocument
并输出它。我希望到目前为止听起来不错。
问题
- 概念:如果我使用and
RegEx
产生的,我会在编译时得到,但不是 with 。这是为什么?get_regex_for( SIMPLE_EXPRESSION )
get_regex_for( INTERMEDIATE_EXPRESSION )
recursive call could loop indefinitely
get_regex_for( EXPRESSION )
- 实用:如果我使用由
RegEx
制作的get_regex_for( EXPRESSION )
,它几乎可以匹配所有内容。但是,我似乎无法修改表达式以便捕获组正确捕获我想要的所有标记。有办法吗? 实用:到目前为止我发现,上面已解决我不应该添加RegEx
不匹配没有内容的分组括号,而只是在之后添加一个?
或似乎会导致灾难性的回溯。我知道原子组,但不确定是否在这里应用它们。有什么建议吗??+
.
?
,.
因为我不想在其他任何地方匹配任何零长度内容:它总是会成功匹配零长度,这将导致无限递归。相反,我应该更改lEr
为lE?r
. 更改现在反映在下面的代码中。- 概念性:Q1 的延续:如果我
(?&S)|
添加到get_regex_for( EXPRESSION )
,我不会收到任何错误。那么为什么会出现 Q1 呢?此外,如果我在前面(?&I)(\/(?&I))?|
添加,标记似乎匹配,但它看起来不是多余的,因为它基本上E
代表什么,如果你看看下面的语法? - 实用:现在解析不平衡括号很慢。有没有办法避免这种缓慢?
- 实用:有什么比弄清楚这些事情更好的方法可以节省大量时间?
我的全班都在这里供参考。它远未完成(尤其是符号列表和回调),但让我们只关注方法(也欢迎其他内容!):
<?php
// element tags
define( 'IDENTIFIER', 'mi' );
define( 'NUMBER', 'mn' );
define( 'OPERATOR', 'mo' );
define( 'SQUARE_ROOT', 'msqrt' );
define( 'TEXT', 'mtext' );
define( 'STYLE', 'mstyle' );
define( 'SPACE', 'mspace' );
// expression tags
define( 'FRACTION', 'mfrac' );
define( 'ROOT', 'mroot' );
define( 'SUBSCRIPT', 'msub' );
define( 'SUPERSCRIPT', 'msup' );
define( 'SUB_SUPERSCRIPT', 'msubsup' );
// format tags
define( 'OVER', 'mover' );
define( 'UNDER', 'munder' );
define( 'UNDER_OVER', 'munderover' );
define( 'FENCED', 'mfenced' );
// parameter specifier
define( 'HEX', 0 );
define( 'AS_IS', 1 );
define( 'HIDDEN', 2 );
define( 'ATTRIBUTE', 3 );
define( 'SYMBOL', 4 );
define( 'OUTER', 5 );
define( 'TO', 0 );
define( 'CONSTANT', 0 );
define( 'UNARY', 1 );
define( 'BINARY', 2 );
define( 'LEFT', 3 );
define( 'RIGHT', 4 );
define( 'SIMPLE_EXPRESSION', 5 );
define( 'INTERMEDIATE_EXPRESSION', 6 );
define( 'EXPRESSION', 7 );
class Simple_Ascii_Math_Parser {
private $mathml;
private $math;
private function __construct() {
$this->mathml = new DOMDocument;
$this->mathml->formatOutput = true;
$this->create_math_element();
}
private function create_math_element() {
$this->math = $this->mathml->createElement( 'math' );
$this->math->setAttribute( 'xmlns', 'http://www.w3.org/1998/Math/MathML' );
$this->mathml->appendChild( $this->math );
}
public static function get_regex_for( $type, &$defined = array() ) {
// contains intentional assignment in ternary operators
!empty( $defined ) or $defined = array_fill( 0, 8, false );
switch( $type ) {
case CONSTANT:
return ( !$defined[ CONSTANT ] and $defined[ CONSTANT ] = true )?
sprintf( '(?P<V>%s|%s|%s)', '(?:[0-9]*+\.)?[0-9]+', self::get_regex_of( self::$__CONSTANT ),
sprintf( '(?!%s|%s|\\|).', self::get_regex_for( LEFT, $defined ), self::get_regex_for( RIGHT, $defined ) )
) : '(?&V)';
break;
case UNARY:
return ( !$defined[ UNARY ] and $defined[ UNARY ] = true )?
sprintf( '(?P<U>%s|%s)', self::get_regex_of( self::$__UNARY ), self::get_regex_of( self::$__SPECIAL_UNARY_FUNC ) ) : '(?&U)';
break;
case BINARY:
return ( !$defined[ BINARY ] and $defined[ BINARY ] = true )?
sprintf( '(?P<B>%s)', self::get_regex_of( self::$__BINARY ) ) : '(?&B)';
break;
case LEFT:
return ( !$defined[ LEFT ] and $defined[ LEFT ] = true )?
sprintf( '(?P<L>%s)', self::get_regex_of( self::$__GROUPING_BRACKETS_LEFT ) ) : '(?&L)';
break;
case RIGHT:
return ( !$defined[ RIGHT ] and $defined[ RIGHT ] = true )?
sprintf( '(?P<R>%s)', self::get_regex_of( self::$__GROUPING_BRACKETS_RIGHT ) ) : '(?&R)';
break;
case SIMPLE_EXPRESSION:
return ( !$defined[ SIMPLE_EXPRESSION ] and $defined[ SIMPLE_EXPRESSION ] = true )?
sprintf( '(?P<S>%s|%s|%s|%s|%s)',
sprintf( '%s%s', self::get_regex_for( UNARY, $defined ), self::get_regex_for( SIMPLE_EXPRESSION, $defined ) ),
sprintf( '%s%s{2}', self::get_regex_for( BINARY, $defined ), self::get_regex_for( SIMPLE_EXPRESSION, $defined ) ),
sprintf( '%s%s?%s', self::get_regex_for( LEFT, $defined ), self::get_regex_for( EXPRESSION, $defined ), self::get_regex_for( RIGHT, $defined ) ),
sprintf( preg_quote('|%s|', '/'), self::get_regex_for( EXPRESSION, $defined ) . '?' ),
sprintf( '%s', self::get_regex_for( CONSTANT, $defined ) )
) : '(?&S)';
break;
case INTERMEDIATE_EXPRESSION:
return ( !$defined[ INTERMEDIATE_EXPRESSION ] and $defined[ INTERMEDIATE_EXPRESSION ] = true )?
sprintf( '(?P<I>(%s)(?:_(%s))?(?:\\^(%s))?)',
self::get_regex_for( SIMPLE_EXPRESSION, $defined ), self::get_regex_for( SIMPLE_EXPRESSION, $defined ), self::get_regex_for( SIMPLE_EXPRESSION, $defined )
) : '(?&I)';
break;
case EXPRESSION:
return ( !$defined[ EXPRESSION ] and $defined[ EXPRESSION ] = true )?
sprintf( '(?P<E>%s(?:(?&E)|\\/(?&I))?)',
self::get_regex_for( INTERMEDIATE_EXPRESSION, $defined )
) : '(?&E)';
break;
}
}
public static function get_regex_of( $array ) {
$result = '';
$keys = array_keys( $array );
// Longer key comes first
usort( $keys, function( $e1, $e2 ){ return strlen( $e2 ) - strlen( $e1 ); });
foreach($keys as $key ) {
$result .= '|' . preg_quote( $key, '/' ) ;
}
return substr( $result, 1 );
}
private static $__CONSTANT = array(
// lowercase greek symbols
'alpha' => array( 'syntax' => 'alpha', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B1' ) ),
'beta' => array( 'syntax' => 'beta', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B2' ) ),
'gamma' => array( 'syntax' => 'gamma', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B3' ) ),
'delta' => array( 'syntax' => 'delta', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B4' ) ),
'epsi' => array( 'syntax' => 'epsi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B5' ) ),
'epsilon' => array( 'syntax' => 'epsilon', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B5' ) ),
'zeta' => array( 'syntax' => 'zeta', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B6' ) ),
'eta' => array( 'syntax' => 'eta', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B7' ) ),
'theta' => array( 'syntax' => 'theta', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B8' ) ),
'iota' => array( 'syntax' => 'iota', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03B9' ) ),
'kappa' => array( 'syntax' => 'kappa', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03BA' ) ),
'lambda' => array( 'syntax' => 'lambda', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03BB' ) ),
'mu' => array( 'syntax' => 'mu', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03BC' ) ),
'nu' => array( 'syntax' => 'nu', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03BD' ) ),
'xi' => array( 'syntax' => 'xi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03BE' ) ),
// hex = 03BF : omicron is not supported, use letter o instead...
'pi' => array( 'syntax' => 'pi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C0' ) ),
'rho' => array( 'syntax' => 'rho', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C1' ) ),
// hex = 03C2 : final sigma is not supported...
'sigma' => array( 'syntax' => 'sigma', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C3' ) ),
'tau' => array( 'syntax' => 'tau', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C4' ) ),
'upsilon' => array( 'syntax' => 'upsilon', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C5' ) ),
'phi' => array( 'syntax' => 'phi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C6' ) ),
'chi' => array( 'syntax' => 'chi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C7' ) ),
'psi' => array( 'syntax' => 'psi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C8' ) ),
'omega' => array( 'syntax' => 'omega', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03C9' ) ),
// and their variations
'varepsilon' => array( 'syntax' => 'varepsilon', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '025B' ) ),
'vartheta' => array( 'syntax' => 'vartheta', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03D1' ) ),
'varphi' => array( 'syntax' => 'varphi', 'callback' => 'output', 'args' => array( IDENTIFIER, HEX, '03D5' ) ),
// uppercase greek symbols
// note: uppercases are treated as operators
'Gamma' => array( 'syntax' => 'Gamma', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '0393' ) ),
'Delta' => array( 'syntax' => 'Delta', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '0394' ) ),
'Theta' => array( 'syntax' => 'Theta', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '0398' ) ),
'Lambda' => array( 'syntax' => 'Lambda', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '039B' ) ),
'Xi' => array( 'syntax' => 'Xi', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '039E' ) ),
'Pi' => array( 'syntax' => 'Pi', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '03A0' ) ),
'Sigma' => array( 'syntax' => 'Sigma', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '03A3' ) ),
'Phi' => array( 'syntax' => 'Phi', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '03A6' ) ),
'Psi' => array( 'syntax' => 'Psi', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '03A8' ) ),
'Omega' => array( 'syntax' => 'Omega', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '03A9' ) ),
// constants
// operators
'+' => array( 'syntax' => '+', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'-' => array( 'syntax' => '-', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'+-' => array( 'syntax' => '+-', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '00B1' ) ),
'*' => array( 'syntax' => '*', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22C5' ) ),
'**' => array( 'syntax' => '**', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22C6' ) ),
'//' => array( 'syntax' => '//', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '002F' ) ),
'\\' => array( 'syntax' => '\\', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '005C' ) ),
'xx' => array( 'syntax' => 'xx', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '00D7' ) ),
'-:' => array( 'syntax' => '-:', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '00F7' ) ),
'@' => array( 'syntax' => '@', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2218' ) ),
'o+' => array( 'syntax' => 'o+', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2295' ) ),
'ox' => array( 'syntax' => 'ox', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2297' ) ),
'o.' => array( 'syntax' => 'o.', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2299' ) ),
// relation symbols
'=' => array( 'syntax' => '=', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'<' => array( 'syntax' => '<', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'>' => array( 'syntax' => '>', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'<=' => array( 'syntax' => '<=', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2264' ) ),
'>=' => array( 'syntax' => '<=', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2265' ) ),
'!=' => array( 'syntax' => '!=', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2260' ) ),
'-<' => array( 'syntax' => '-<', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '227A' ) ),
'>-' => array( 'syntax' => '>-', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '227B' ) ),
'-=' => array( 'syntax' => '-=', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2261' ) ),
'~=' => array( 'syntax' => '~=', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2245' ) ),
'~~' => array( 'syntax' => '~~', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2248' ) ),
'prop' => array( 'syntax' => 'prop', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '221D' ) ),
// misc. symbols
'O/' => array( 'syntax' => 'O/', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2205' ) ),
'oo' => array( 'syntax' => 'oo', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '221E' ) ),
'aleph' => array( 'syntax' => 'aleph', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2135' ) ),
'/_' => array( 'syntax' => '/_', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2220' ) ),
':.' => array( 'syntax' => ':.', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2234' ) ),
'diamond' => array( 'syntax' => 'diamond', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22C4' ) ),
'square' => array( 'syntax' => 'square', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '25A1' ) ),
'\\ ' => array( 'syntax' => '\\ ', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '00A0' ) ),
// dots
'cdots' => array( 'syntax' => 'cdots', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22EF' ) ),
'vdots' => array( 'syntax' => 'vdots', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22EE' ) ),
'ddots' => array( 'syntax' => 'ddots', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22F1' ) ),
// sets
'uu' => array( 'syntax' => 'uu', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '222A' ) ),
'nn' => array( 'syntax' => 'nn', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2229' ) ),
'vv' => array( 'syntax' => 'vv', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2228' ) ),
'^^' => array( 'syntax' => '^^', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2227' ) ),
'in' => array( 'syntax' => 'in', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2208' ) ),
'!in' => array( 'syntax' => '!in', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2209' ) ),
'sub' => array( 'syntax' => 'sub', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2282' ) ),
'sup' => array( 'syntax' => 'sup', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2283' ) ),
'sube' => array( 'syntax' => 'sube', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2286' ) ),
'supe' => array( 'syntax' => 'supe', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2287' ) ),
// brackets
'||' => array( 'syntax' => '||', 'callback' => 'output', 'args' => array( FENCED, HEX, '2016' ) ),
'|__' => array( 'syntax' => '|__', 'callback' => 'output', 'args' => array( FENCED, HEX, '230A' ) ),
'__|' => array( 'syntax' => '__|', 'callback' => 'output', 'args' => array( FENCED, HEX, '230B' ) ),
'|~' => array( 'syntax' => '|~', 'callback' => 'output', 'args' => array( FENCED, HEX, '2308' ) ),
'~|' => array( 'syntax' => '~|', 'callback' => 'output', 'args' => array( FENCED, HEX, '2309' ) ),
// logical symbols
'not' => array( 'syntax' => 'not', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '00AC' ) ),
'=>' => array( 'syntax' => '=>', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '21D2' ) ),
'<=>' => array( 'syntax' => '<=>', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '21D4' ) ),
'AA' => array( 'syntax' => 'AA', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2200' ) ),
'EE' => array( 'syntax' => 'EE', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2203' ) ),
'_|_' => array( 'syntax' => '_|_', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '27C2' ) ),
'TT' => array( 'syntax' => 'TT', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22A4' ) ),
'|--' => array( 'syntax' => '|--', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22A2' ) ),
'|==' => array( 'syntax' => '|==', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '22A8' ) ),
// arrows
'uarr' => array( 'syntax' => 'uarr', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2191' ) ),
'->' => array( 'syntax' => '->', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2192' ) ),
'rarr' => array( 'syntax' => 'rarr', 'callback' => 'transform', 'args' => array( TO, '->' ) ),
'darr' => array( 'syntax' => 'darr', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2193' ) ),
'larr' => array( 'syntax' => 'larr', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2190' ) ),
'harr' => array( 'syntax' => 'harr', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2194' ) ),
'|->' => array( 'syntax' => '|->', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '21A6' ) ),
'lArr' => array( 'syntax' => 'lArr', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '21D0' ) ),
'hArr' => array( 'syntax' => 'hArr', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '21D4' ) ),
// unary symbols
'sqrt' => array( 'syntax' => 'sqrt', 'callback' => 'special_func', 'args' => array( SQUARE_ROOT ) ),
'text' => array( 'syntax' => 'text', 'callback' => 'special_func', 'args' => array( TEXT ) ),
'f' => array( 'syntax' => 'f', 'callback' => 'special_func', 'args' => array( IDENTIFIER ) ),
'g' => array( 'syntax' => 'g', 'callback' => 'special_func', 'args' => array( IDENTIFIER ) ),
// array( 'syntax' => '"(0)"', 'callback' => 'func', 'args' => array( TEXT ) ), // hard-code this...
// calculus
'int' => array( 'syntax' => 'int', 'callback' => 'func_eater', 'args' => array( SUB_SUPERSCRIPT, HEX, '22C1' ) ),
'oint' => array( 'syntax' => 'oint', 'callback' => 'func_eater', 'args' => array( SUB_SUPERSCRIPT, HEX, '22C0' ) ),
'del' => array( 'syntax' => 'del', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2202' ) ),
'grad' => array( 'syntax' => 'grad', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2207' ) ),
'prime' => array( 'syntax' => 'prime', 'callback' => 'output', 'args' => array( OPERATOR, HEX, '2032' ) ),
'dim' => array( 'syntax' => 'dim', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'mod' => array( 'syntax' => 'mod', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'lub' => array( 'syntax' => 'lub', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
'glb' => array( 'syntax' => 'glb', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
);
private static $__UNDER_OVER = array(
// operators
'sum' => array( 'syntax' => 'sum', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, HEX, '2211' ) ),
'prod' => array( 'syntax' => 'prod', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, HEX, '220F' ) ),
'vvv' => array( 'syntax' => 'vvv', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, HEX, '22C1' ) ),
'^^^' => array( 'syntax' => '^^^', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, HEX, '22C0' ) ),
'uuu' => array( 'syntax' => 'uuu', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, HEX, '22C3' ) ),
'nnn' => array( 'syntax' => 'nnn', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, HEX, '22C5' ) ),
'min' => array( 'syntax' => 'min', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'max' => array( 'syntax' => 'max', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'Lim' => array( 'syntax' => 'Lim', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, AS_IS ) ),
'lim' => array( 'syntax' => 'lim', 'callback' => 'func_eater', 'args' => array( OPERATOR, UNDER_OVER, AS_IS ) ),
);
private static $__SPACE = array(
'and' => array( 'syntax' => 'and', 'callback' => 'output', 'args' => array( TEXT, AS_IS ) ),
'or' => array( 'syntax' => 'or', 'callback' => 'output', 'args' => array( TEXT, AS_IS ) ),
'if' => array( 'syntax' => 'if', 'callback' => 'output', 'args' => array( OPERATOR, AS_IS ) ),
);
private static $__UNARY = array(
// standard function
'sin' => array( 'syntax' => 'sin', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'cos' => array( 'syntax' => 'cos', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'tan' => array( 'syntax' => 'tan', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'csc' => array( 'syntax' => 'csc', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'sec' => array( 'syntax' => 'sec', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'cot' => array( 'syntax' => 'cot', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'sinh' => array( 'syntax' => 'sinh', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'cosh' => array( 'syntax' => 'cosh', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'tanh' => array( 'syntax' => 'tanh', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'log' => array( 'syntax' => 'log', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'ln' => array( 'syntax' => 'ln', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'det' => array( 'syntax' => 'det', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'gcd' => array( 'syntax' => 'gcd', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'lcm' => array( 'syntax' => 'lcm', 'callback' => 'func_eater', 'args' => array( OPERATOR, SUB_SUPERSCRIPT, AS_IS ) ),
'f' => array( 'syntax' => 'f', 'callback' => 'func_eater', 'args' => array( IDENTIFIER, AS_IS ) ),
'g' => array( 'syntax' => 'g', 'callback' => 'func_eater', 'args' => array( IDENTIFIER, AS_IS ) ),
);
private static $__BINARY = array(
// binary symbols
'frac' => array( 'syntax' => 'frac(0)(1)', 'callback' => 'non_math_func', 'args' => array( FRACTION ) ),
'root' => array( 'syntax' => 'root(1)(0)', 'callback' => 'non_math_func', 'args' => array( ROOT ) ),
'stackrel' => array( 'syntax' => 'stackrel(1)(0)', 'callback' => 'non_math_func', 'args' => array( OVER ) ),
);
private static $__SPECIAL_UNARY_FUNC = array(
// font commands
'bb' => array( 'syntax' => 'bb', 'callback' => 'non_math_func', 'args' => array( STYLE, ATTRIBUTE, array( 'mathvariant' => 'bold' ) ) ),
'bbb' => array( 'syntax' => 'bbb', 'callback' => 'non_math_func', 'args' => array( STYLE, ATTRIBUTE, array( 'mathvariant' => 'double-struck' ) ) ),
'cc' => array( 'syntax' => 'cc', 'callback' => 'non_math_func', 'args' => array( STYLE, ATTRIBUTE, array( 'mathvariant' => 'script' ) ) ),
'tt' => array( 'syntax' => 'tt', 'callback' => 'non_math_func', 'args' => array( STYLE, ATTRIBUTE, array( 'mathvariant' => 'monospace' ) ) ),
'fr' => array( 'syntax' => 'fr', 'callback' => 'non_math_func', 'args' => array( STYLE, ATTRIBUTE, array( 'mathvariant' => 'fraktur' ) ) ),
'sf' => array( 'syntax' => 'sf', 'callback' => 'non_math_func', 'args' => array( STYLE, ATTRIBUTE, array( 'mathvariant' => 'sans-serif' ) ) ),
// accents
'hat' => array( 'syntax' => 'hat', 'callback' => 'non_math_func', 'args' => array( OVER, HEX, '0302' ) ),
'bar' => array( 'syntax' => 'bar', 'callback' => 'non_math_func', 'args' => array( OVER, HEX, '0305' ) ),
'ul' => array( 'syntax' => 'ul', 'callback' => 'non_math_func', 'args' => array( UNDER, HEX, '0332' ) ),
'vec' => array( 'syntax' => 'vec', 'callback' => 'non_math_func', 'args' => array( OVER, HEX, '20D7' ) ),
'dot' => array( 'syntax' => 'dot', 'callback' => 'non_math_func', 'args' => array( OVER, HEX, '0307' ) ),
'ddot' => array( 'syntax' => 'ddot', 'callback' => 'non_math_func', 'args' => array( OVER, HEX, '0308' ) ),
'tilde' => array( 'syntax' => 'tilde', 'callback' => 'non_math_func', 'args' => array( OVER, HEX, '0303' ) ),
);
private static $__EXPRESSION = array(
'/' => array( 'syntax' => '(0)/(1)', 'callback' => 'expression', 'args' => array( FRACTION ) ),
'_' => array( 'syntax' => '(0)_(1)', 'callback' => 'expression', 'args' => array( SUBSCRIPT, HIDDEN ) ),
'^' => array( 'syntax' => '(0)^(1)', 'callback' => 'expression', 'args' => array( SUPERSCRIPT, HIDDEN ) ),
//'(0)_(1)^(2)' => array( 'syntax' => '(0)_(1)^(2)', 'callback' => 'expression', 'args' => array( SUB_SUPERSCRIPT ) ),
);
private static $__GROUPING_BRACKETS_LEFT = array(
'(' => array( 'syntax' => '(', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
'[' => array( 'syntax' => '[', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
'{' => array( 'syntax' => '}', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
'{:' => array( 'syntax' => '{:', 'callback' => 'output', 'args' => array( FENCED, HIDDEN ) ),
'(:' => array( 'syntax' => '(:', 'callback' => 'output', 'args' => array( FENCED, HEX, '2329' ) ),
//'|' => array( 'syntax' => '|', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
);
private static $__GROUPING_BRACKETS_RIGHT = array(
')' => array( 'syntax' => ')', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
']' => array( 'syntax' => ']', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
'}' => array( 'syntax' => '{', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
//'|' => array( 'syntax' => '|', 'callback' => 'output', 'args' => array( FENCED, AS_IS ) ),
':}' => array( 'syntax' => ':}', 'callback' => 'output', 'args' => array( FENCED, HIDDEN ) ),
':)' => array( 'syntax' => ':)', 'callback' => 'output', 'args' => array( FENCED, HEX, '232A' ) ),
);
?>
作者在这个文件ASCIIMath
中给出了语法的语法,这是我用来创建方法的:get_regex_for
/**
* Parsing ASCII math expressions with the following grammar
* v ::= [A-Za-z] | greek letters | numbers | other constant symbols
* u ::= sqrt | text | bb | other unary symbols for font commands
* b ::= frac | root | stackrel binary symbols
* l ::= ( | [ | { | (: | {: left brackets
* r ::= ) | ] | } | :) | :} right brackets
* S ::= v | lEr | uS | bSS Simple expression
* I ::= S_S | S^S | S_S^S | S Intermediate expression
* E ::= IE | I/I Expression
*
* Each terminal symbol is translated into a corresponding mathml node.
*/