0

有一个类似的字符串;

{uniqueKey=test20745,content={acostumbrado={tf=1,df=1},al={tf=1,df=6945},ante={tf=1,df=42},co={tf=1,df=9187},ello={tf=1,df=2},está={tf=2,df=21},falcao={tf=1,df=105},guardia={tf=1,df=2},http={tf=1,df=9893},nada={tf=1,df=24},no={tf=1,df=2493},osa={tf=1,df=429},para={tf=1,df=6382},partido={tf=1,df=50},pretorian={tf=1,df=1},que={tf=1,df=358},sebcfkeı={tf=1,df=1},su={tf=1,df=7368},t={tf=1,df=14423},tuvo={tf=1,df=3},un={tf=1,df=8511}}}

如何将此字符串解析为如下所示的映射(键,列表);

acostumbrado->tf=1,df=1
al ->tf=1,df=6945
ante ->tf=1,df=42
...

编辑:

所以我到目前为止做了什么;

    String delims = "[{},]";
    HashMap<String, List<String>> valueMap = new HashMap<String, List<String>>();
    for (int i = 0; i < text.size(); i++) {
        String[] tokens = val.toString().split(delims);
        ArrayList<String> tfAndDfValues = new ArrayList<String>();
        tfAndDfValues.add(tokens[4].substring(3));
        tfAndDfValues.add(tokens[5].substring(3));
        if (valueMap.containsKey(removeLastChar(tokens[3]))) {
            valueMap.get(removeLastChar(tokens[3])).addAll(tfAndDfValues);
        } else {
            valueMap.put(removeLastChar(tokens[3]), tfAndDfValues);
        }
    }
4

3 回答 3

1

你有没有想过状态模式?请参阅使用示例:

public class StatePatternProgram {

    public static void main(String[] args) {
        String example = "{uniqueKey=test20745,content={acostumbrado={tf=1,df=1},al={tf=1,df=6945},ante={tf=1,df=42},co={tf=1,df=9187},ello={tf=1,df=2},está={tf=2,df=21},falcao={tf=1,df=105},guardia={tf=1,df=2},http={tf=1,df=9893},nada={tf=1,df=24},no={tf=1,df=2493},osa={tf=1,df=429},para={tf=1,df=6382},partido={tf=1,df=50},pretorian={tf=1,df=1},que={tf=1,df=358},sebcfkeı={tf=1,df=1},su={tf=1,df=7368},t={tf=1,df=14423},tuvo={tf=1,df=3},un={tf=1,df=8511}}}";
        ParseContext context = new ParseContext();
        Map<String, List<Integer>> map = context.parseContent(example);
        System.out.println(map);
    }
}

class ParseContext {

    private Map<String, List<Integer>> contentMap;

    private String currentKey;
    private List<Integer> currentList;

    private boolean continueParsing = true;
    private ParseState state = new StartParseState();

    public Map<String, List<Integer>> parseContent(String content) {
        StringTokenizer tokenizer = new StringTokenizer(content, "{}=,", true);
        while (continueParsing) {
            state.parse(tokenizer);
        }
        return contentMap;
    }

    interface ParseState {
        void parse(StringTokenizer tokenizer);
    }

    class StartParseState implements ParseState {
        @Override
        public void parse(StringTokenizer tokenizer) {
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if (!"content".equals(token)) {
                    continue;
                }

                contentMap = new LinkedHashMap<String, List<Integer>>();

                tokenizer.nextToken();
                tokenizer.nextToken();

                state = new KeyParseState();
                break;
            }
        }
    }

    class KeyParseState implements ParseState {
        @Override
        public void parse(StringTokenizer tokenizer) {
            if (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if (",".equals(token)) {
                    token = tokenizer.nextToken();
                }
                if ("}".equals(token)) {
                    state = new EndParseState();
                    return;
                }
                currentKey = token;
                tokenizer.nextToken();
                state = new ListParseState();
            }
        }
    }

    class ListParseState implements ParseState {
        @Override
        public void parse(StringTokenizer tokenizer) {
            currentList = new ArrayList<Integer>();
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                if ("}".equals(token)) {
                    break;
                }
                if ("=".equals(token)) {
                    currentList.add(Integer.valueOf(tokenizer.nextToken()));
                }
            }
            contentMap.put(currentKey, currentList);
            state = new KeyParseState();
        }
    }

    class EndParseState implements ParseState {
        @Override
        public void parse(StringTokenizer tokenizer) {
            continueParsing = false;
        }
    }
}

该程序打印:

{acostumbrado=[1, 1], al=[1, 6945], ante=[1, 42], co=[1, 9187], ello=[1, 2], está=[2, 21], falcao=[1, 105], guardia=[1, 2], http=[1, 9893], nada=[1, 24], no=[1, 2493], osa=[1, 429], para=[1, 6382], partido=[1, 50], pretorian=[1, 1], que=[1, 358], sebcfkeı=[1, 1], su=[1, 7368], t=[1, 14423], tuvo=[1, 3], un=[1, 8511]}
于 2012-11-05T15:10:08.237 回答
1

此解决方案使用正则表达式组捕获:

private static final String SAMPLE = "{uniqueKey=test20745,content={acostumbrado={tf=1,df=1},al={tf=1,df=6945},ante={tf=1,df=42},co={tf=1,df=9187},ello={tf=1,df=2},está={tf=2,df=21},falcao={tf=1,df=105},guardia={tf=1,df=2},http={tf=1,df=9893},nada={tf=1,df=24},no={tf=1,df=2493},osa={tf=1,df=429},para={tf=1,df=6382},partido={tf=1,df=50},pretorian={tf=1,df=1},que={tf=1,df=358},sebcfkeı={tf=1,df=1},su={tf=1,df=7368},t={tf=1,df=14423},tuvo={tf=1,df=3},un={tf=1,df=8511}}}";

private static final String CONTENT = "(\\p{L}*)=\\{((tf=\\d*),(df=\\d*))\\}";

public static void main(String[] args) {
    Pattern p = Pattern.compile(CONTENT);
    Matcher m = p.matcher(SAMPLE);

    Map<String, List<String>> result = new HashMap<String, List<String>>();
    while (m.find()) {
        String key = m.group(1);

        List<String> values = new ArrayList<String>();
        values.add(m.group(3));
        values.add(m.group(4));

        result.put(key, values);
    }

    System.out.println(result);
}
于 2012-11-05T15:59:53.000 回答
0

如果您的输入模式一直都是唯一的,试试这个

    String delims = "[{},=]";
    Map map = new HashMap() ;
    String[] tokens = text.toString().split(delims);
    for( int i=5;i<tokens.length - 7;i = i+ 7) {
        map.put(tokens[i], tokens[i+2]+"->"+tokens[i+3]+","+tokens[i+4]+"->"+tokens[i+5]);
    }
    System.out.println(map);
于 2012-11-05T14:23:42.310 回答