0

我是 cassandra python 模块的新手。所以我正在尝试使用批处理语句将 json 文件插入到表中。但我收到“KeyError:0”错误。我知道提供的文件太多了。但我几乎尝试了所有东西,但不明白我的错误。请帮忙!!!!!!

import json
import logging
from cassandra.cluster import Cluster
import os
from uuid import uuid4
from cassandra.cluster import Cluster, BatchStatement
from cassandra import ConsistencyLevel
from myencoder import MyEncoder
import logging
import re
import ast


def parsing():

    with open('dfs.json', 'r', encoding="utf8") as json_file:
        data = json.load(json_file)
        aboutlegacy = data['aboutLegacy']
        accomplishments = data['accomplishments']
        profilealternative = data['profileAlternative']
        educations = data['educations']
        profileLegacy = data['profileLegacy']
        peopleAlsoviewed = data['peopleAlsoViewed']
        positions = data['positions']
        skills = data['skills']
        recommendations = data['recommendations']
        volunteerExperience = data['volunteerExperience']
        profile = data['profile']
        idd = uuid4()


        query = """
        INSERT INTO profile (id,profilelegacy,profilealternative,aboutlegacy,positions,educations,skills,recommendations,accomplishments,peoplealsoviewed,volunteerExperience,profile)
          VALUES (?,?,?,?,?,?,?,?,?,?,?,?);"""
        insert_user = session.prepare(query)
        batch = BatchStatement(consistency_level=ConsistencyLevel.ONE)

        batch.add(insert_user, (idd, profileLegacy, profilealternative, aboutlegacy, positions, educations,
                                skills, recommendations, accomplishments, peopleAlsoviewed, volunteerExperience, profile,))
        log = logging.getLogger()
        log.info('Batch Insert Completed')
        session.execute(batch)


if __name__ == "__main__":
    cluster = Cluster(['127.0.0.1'], port=9042)
    session = cluster.connect('profiles', wait_for_all_pools=True)
    session.execute('USE profiles')
    parsing()

这是产生的错误:


File "cassandratest2.py", line 61, in <module>
    parsing()
  File "cassandratest2.py", line 51, in parsing
    skills, recommendations, accomplishments, peopleAlsoviewed, volunteerExperience, profile,))
  File "C:\Python\Python37\lib\site-packages\cassandra\query.py", line 815, in add
    bound_statement = statement.bind(() if parameters is None else parameters)
  File "C:\Python\Python37\lib\site-packages\cassandra\query.py", line 501, in bind
    return BoundStatement(self).bind(values)
  File "C:\Python\Python37\lib\site-packages\cassandra\query.py", line 627, in bind
    self.values.append(col_spec.type.serialize(value, proto_version))
  File "C:\Python\Python37\lib\site-packages\cassandra\cqltypes.py", line 723, in serialize
    return cls.serialize_safe(val, protocol_version)
  File "C:\Python\Python37\lib\site-packages\cassandra\cqltypes.py", line 942, in serialize_safe
    item = val[i]
KeyError: 0

这是 json 文件的格式,因为它包含机密信息


{
    "profileLegacy": {
        "name": "",
        "headline": "",
        "location": "",
        "connections": 0,
        "summary": ""
    },
    "profileAlternative": {
        "name": "",
        "headline": "",
        "location": "",
        "connections": 0
    },
    "aboutLegacy": {
        "text": ""
    },
    "positions": [{
        "org": "",
        "title": "",
        "end": "",
        "start": "",
        "desce": ""
    }],
    "educations": [{
        "major": "",
        "end": "",
        "name": "",
        "degree": "Maestr\u00eda en Finanzas",
        "start": "",
        "desce": ""
    }],
    "skills": [
        "Key Account Development",
        "Strategic Planning"
    ],
    "recommendations": {
        "givenCount": "0",
        "receivedCount": "0",
        "given": [],
        "received": []
    },
    "accomplishments": [],
    "peopleAlsoViewed": [{
        "url": "",
        "id": ""
    }],
    "volunteerExperience": [],
    "profile": {
        "name": "",
        "headline": "",
        "location": "",
        "connections": 0
    }
}  

这是 cqlsh 中的创建表查询

CREATE TABLE profile (
    id uuid PRIMARY KEY,
    profilelegacy frozen<profilelegacy>,
    profilealternative  frozen<profilelaternative>,
    aboutlegacy text,
    positions list<frozen<positions>>,
    educations set<frozen<educations>>,
    skills list<text>,
    recommendations frozen<recommendations>,
    accomplishments list<text>,
    peoplealsoviewed list<frozen<peoplealsoviewed>>,
    volunteerExperience list<text>,
    profile frozen<profilelaternative>
) WITH bloom_filter_fp_chance = 0.01
    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
    AND comment = ''
    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
    AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
    AND crc_check_chance = 1.0
    AND dclocal_read_repair_chance = 0.1
    AND default_time_to_live = 0
    AND gc_grace_seconds = 864000
    AND max_index_interval = 2048
    AND memtable_flush_period_in_ms = 0
    AND min_index_interval = 128
    AND read_repair_chance = 0.0
    AND speculative_retry = '99PERCENTILE';
and these are the declared types

CREATE TYPE profiles.peoplealsoviewed (
    url text,
    id text
);

CREATE TYPE profiles.profilelegacy (
    name text,
    headline text,
    location text,
    connections int,
    summary text
);

CREATE TYPE profiles.positions (
    org text,
    title text,
    end text,
    start text,
    desce text
);

CREATE TYPE profiles.recommendations (
    givencount text,
    receivedcount text,
    given frozen<list<text>>,
    received frozen<list<text>>
);

CREATE TYPE profiles.skills (
    title text,
    count text
);

CREATE TYPE profiles.educations (
    major text,
    end text,
    name text,
    degree text,
    start text,
    desce text
);

CREATE TYPE profiles.profilelaternative (
    name text,
    headline text,
    location text,
    connections int
);
4

1 回答 1

1

当 Cassandra 尝试根据用户定义的类型进行填充时,它期望 a tupleof 值与类型字段的顺序相同,例如 for profilelegacy,它想要(name, headline, location, connections, summary)或者具有与所讨论名称匹配的属性的对象(例如,某些对象x是可以检索x.name, thenx.headline等)。

您正在加载一个 JSON 文件,它将 JSON 对象解码为 Python dict(使用基于键的查找),而不是 Python 对象(使用基于属性的查找)。您需要从 转换为dict适当的tuple或具有适当属性的对象。

可能最简单的方法是使用types.SimpleNamespace让您从任意dicts 创建对象。导入types并将您的更改json.load为:

data = json.load(json_file, object_hook=lambda d: types.SimpleNamespace(**d))

结果中的查找从dict-style 更改为 object-style:

aboutlegacy = data.aboutLegacy
accomplishments = data.accomplishments
# ... etc ...

应该让您更接近解决方案。

于 2019-08-12T19:27:18.167 回答