我正在使用 xmerl_sax_parser:file() 解析一个非常大的文件,并在中途遇到异常。
exception throw: {'EXIT',{undef,[{xmerl_sax_parser_utf8,cf,
[<<"Ä">>,
{xmerl_sax_parser_state}]}]}}
我查看了数据,它不包含任何奇怪的字符,如果有,我该如何处理。文档没有告诉我任何事情。
代码如下所示:
run(FileName) ->
{ok, Xml, _Rest} =
xmerl_sax_parser:file(FileName, [{event_fun, fun event/3},
{event_state, {[], ""}},
{encoding, utf8},
{file_type, normal},
skip_external_dtd]),
Xml.
event(_Ev = {startElement, _, "product", _, _}, _Loc, _State = {Xml, _}) ->
{[[]|Xml], ""};
event(_Event = {characters, Chars}, _Location, _State = {Quotes, _}) ->
{Quotes, Chars};
event(_Ev = {endElement,_,"stock",_},_L,_State = {[Data|Rest],Chars}) ->
Element = list_to_atom("name"),
Updated = [{Element, Chars}|Data],
{[Updated|Rest], undefined};
event(_Ev = {endElement,_,"date",_},_L,_State = {[Data|Rest],Chars}) ->
Element = list_to_atom("brand"),
Updated = [{Element, Chars}|Data],
{[Updated|Rest], undefined};
event(_Ev = {endElement,_,"open",_},_L,_State = {[Data|Rest],Chars}) ->
Element = list_to_atom("price"),
Updated = [{Element, Chars}|Data],
{[Updated|Rest], undefined};
event(_Ev = {endElement,_,"low",_},_L,_State = {[Data|Rest],Chars}) ->
Element = list_to_atom("url"),
Updated = [{Element, Chars}|Data],
{[Updated|Rest], undefined};
event(_Ev = {endElement,_,"stats",_},_L,_State = {[Data|Rest],Chars}) ->
Element = list_to_atom("category"),
Updated = [{Element, Chars}|Data],
{[Updated|Rest], undefined};
event(_Event, _Location, State) ->
State.
关于如何正确修复或处理的任何想法?