0

我目前正在根据结束时间对 json 文件中的 netflow 数据进行排序。我将所有这些数据放入字典中,其中键是结束时间(但只有小时和分钟,因此多个数据值属于一个时间)。然而,这需要一点时间——不超过几秒钟,但这仍然太长了。有什么好方法可以改善这个大 O?我现在正在做的只是逐行浏览文件,提取结束时间,并创建一个空字典(其中值是空集),键是结束时间的小时/分钟。然后,我只是浏览字典并将具有相应结束时间的行添加到给定键的值,这是一个集合。

编辑:这是一种 json 数据的示例。以下是其中的一行。我正在使用的文件接近 300,000 行。

{
    "@timestamp": "2015-05-18T19:26:08.000Z",
    "netflow": {
        "version": "9",
        "flow_seq_num": "188185",
        "flowset_id": "257",
        "last_switched": "2015-05-15T14:28:02.999Z",
        "first_switched": "2015-05-15T14:27:38.999Z",
        "in_bytes": "71",
        "in_pkts": "1",
        "input_snmp": "5",
        "output_snmp": "4",
        "ipv4_src_addr": "192.1.44.133",
        "ipv4_dst_addr": "10.10.1.4",
        "protocol": "6",
        "src_tos": "0",
        "dst_tos": "2",
        "l4_src_port": "12373",
        "l4_dst_port": "80",
        "flow_sampler_id": "0",
        "ipv4_next_hop": "10.10.1.5",
        "dst_mask": "2",
        "src_mask": "31",
        "tcp_flags": "6",
        "direction": "0"
    },
    "@version": "1",
    "host": "192.168.19.202",
    "src_host_name": "",
    "dst_host_name": "",
    "app_name": "",
    "tcp_flags_str": "",
    "dscp": "",
    "highval": "",
    "src_blacklisted": "0",
    "dst_blacklisted": "0",
    "invalid_ToS": "0",
    "bytes_per_packet": 71,
    "tcp_nominal_payload": "0",
    "malformed_ip": "0",
    "empty_tcp": "0",
    "short_tcp_handshake": "0",
    "icmp_malformed_packets": "0",
    "snort_attack_flow": "0",
    "empty_udp": "0",
    "short_udp": "0",
    "short_tcp_rstack": "0",
    "short_tcp_pansf": "0",
    "short_tcp_synack": "0",
    "short_tcp_synrst": "0",
    "short_tcp_finack": "0",
    "short_tcp_pna": "0",
    "non_unicast_src": "0",
    "multicast": "0",
    "broadcast": "0",
    "network": "0",
    "tcp_urg": "0",
    "land_attack": "0",
    "short_tcp_ack": "0",
    "tcp_synfin": "0",
    "tcp_fin": "0",
    "malformed_tcp": "1",
    "tcp_xmas": "0",
    "udp_echo_req": "0",
    "tcp_null": "0",
    "tcp_syn": "0",
    "malformed_udp": "0",
    "tcp_rst": "0",
    "icmp_request": "0",
    "icmp_response": "0",
    "icmp_port_unreachable": "0",
    "icmp_host_unreachable": "0",
    "icmp_unreachable_for_Tos": "0",
    "icmp_network_unreachable": "0",
    "icmp_redirects": "0",
    "icmp_time_exceeded_flows": "0",
    "icmp_parameter_problem_flows": "0",
    "icmp_trace_route": "0",
    "icmp_datagram": "0",
    "udp_echo_chargen_broadcast": "0",
    "udp_chargen_echo_broadcast": "0",
    "icmp_src_quench": "0",
    "icmp_proto_unreachable": "0",
    "udp_echo_broadcast": "0",
    "udp_echo_rsp": "0"
}

至于我尝试过的代码,目前我只是将这些行转换为字典来访问我想要排序的不同值。这真的很简单,我只是使用 json.loads 等来创建字典。什么样的数据结构最适合组织这种事情?我现在正在使用字典,但有更好的字典吗?

4

0 回答 0