1

我的目标是为pyLodStorage 项目添加 Weaviate 支持 具体来说,我想使用以下示例数据:

https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample.py

其中有

  • 一些皇室成员的记录
  • 包含数千个条目的城市列表
  • 一个人为的记录列表,其中包含您希望的尽可能多的记录

作为例子。

所有数据都是表格。一些基本的python类型,如:

  • 字符串
  • 布尔
  • 整数
  • 漂浮
  • 日期
  • 约会时间

需要支持。

我创建了项目http://wiki.bitplan.com/index.php/DgraphAndWeaviateTest和一个通过 docker compose运行 Weaviate 的脚本。有一个 python 单元测试用于与 Weaviate Python 客户端 0.4.1 一起工作

我正在尝试使用来自https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html的信息来重构这个单元测试,但不知道如何去做吧。

需要做些什么才能让 CRUD 测试像在其他三个测试中一样运行https : //github.com/WolfgangFahl/pyLoDStorage/tree/master/tests

  • JSON
  • SPARQL
  • SQL

我对具有上述标准数据类型的字典列表(又名“表”)的“往返”处理特别感兴趣。所以我想创建一个字典列表,然后:

  • 通过查看一些示例记录自动派生模式
  • 检查架构是否已经存在,如果删除它
  • 创建架构
  • 检查数据是否已经存在,如果删除它
  • 添加数据并存储
  • 可选地存储模式以供进一步参考
  • 使用或不使用架构信息恢复数据

检查恢复的数据(字典列表)是否与原始数据相同

    Created on 2020-07-24
    
    @author: wf
    '''
    import unittest
    import weaviate
    import time
    #import getpass
    
    class TestWeaviate(unittest.TestCase):
    # https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
    
        def setUp(self):
            self.port=8153
            self.host="localhost"
            #if getpass.getuser()=="wf":
            #    self.host="zeus"
            #    self.port=8080
            pass
        
        def getClient(self):
            self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
            return self.client
    
        def tearDown(self):
            pass
            
        def testRunning(self):
            '''
            make sure weaviate is running
            '''
            w=self.getClient()
            self.assertTrue(w.is_live())
            self.assertTrue(w.is_ready())
                
    
        def testWeaviateSchema(self):
            ''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
            w = self.getClient()
            #contains_schema = w.schema.contains()
            try:
                w.create_schema("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json")
            except:
                pass
            entries=[
               [ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
               [ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
               [ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
            ]
            for entry in entries:
                dict,type,uid=entry
                try:
                    w.create(dict,type,uid)
                except weaviate.exceptions.ThingAlreadyExistsException as taee:
                    print ("%s already created" % dict['name'])
                
            pass
        
        def testPersons(self):
            return
            w = self.getClient()
    
            schema = {
            "actions": {"classes": [],"type": "action"},
            "things": {"classes": [{
                "class": "Person",
                "description": "A person such as humans or personality known through culture",
                "properties": [
                    {
                        "cardinality": "atMostOne",
                        "dataType": ["text"],
                        "description": "The name of this person",
                        "name": "name"
                    }
                ]}],
                "type": "thing"
            }
            }
            w.create_schema(schema)
            
            w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
            w.create_thing({"name": "Alan Turing"}, "Person")
            w.create_thing({"name": "John von Neumann"}, "Person")
            w.create_thing({"name": "Tim Berners-Lee"}, "Person")
            
        def testEventSchema(self):    
            '''
            https://stackoverflow.com/a/63077495/1497139
            '''
            return
            schema = {
              "things": {
                "type": "thing",
                "classes": [
                  {
                    "class": "Event",
                    "description": "event",
                    "properties": [
                      {
                        "name": "acronym",
                        "description": "acronym",
                        "dataType": [
                          "text"
                        ]
                      },
                      {
                        "name": "inCity",
                        "description": "city reference",
                        "dataType": [
                          "City"
                        ],
                        "cardinality": "many"
                      }
                    ]
                  },
                  {
                    "class": "City",
                    "description": "city",
                    "properties": [
                      {
                        "name": "name",
                        "description": "name",
                        "dataType": [
                          "text"
                        ]
                      },
                      {
                        "name": "hasEvent",
                        "description": "event references",
                        "dataType": [
                          "Event"
                        ],
                        "cardinality": "many"
                      }
                    ]
                  }
                ]
              }
            }
    
    
            client = self.getClient()
    
            if not client.contains_schema():
                client.create_schema(schema)
    
            event = {"acronym": "example"}
            client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
            city = {"name": "Amsterdam"}
            client.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
    
            time.sleep(2.0)
            client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
    
    
    if __name__ == "__main__":
        #import sys;sys.argv = ['', 'Test.testName']
        unittest.main()
4

2 回答 2

3

Weaviate 的新版本现已推出(v1.2.1是撰写本文时的最新版本)。在这个版本中,很多东西被删除,甚至更多。一个重大的突破性变化是,actionsthings移除,objects被引入。weaviate v1.2 的所有更改和功能都可以与weaviate-clientpython 库v2.3一起使用。

大多数当前功能在本文weaviate-client中进行了解释并展示了它的工作原理。

这是相同的单元测试,但针对 Weaviate v1.2.1 并使用weaviate-clientv2.3.1 编写:

import unittest
import weaviate
import time
#import getpass

person_schema = {
    "classes": [
    {
    "class": "Person",
    "description": "A person such as humans or personality known through culture",
    "properties": [
        {
        "name": "name",
        "description": "The name of this person",
        "dataType": ["text"]
        }
    ]
    },
    {
    "class": "Group",
    "description": "A set of persons who are associated with each other over some common properties",
    "properties": [
        {
        "name": "name",
        "description": "The name under which this group is known",
        "dataType": ["text"]
        },
        {
        "name": "members",
        "description": "The persons that are part of this group",
        "dataType": ["Person"]
        }
    ]
    }
]
}



class TestWeaviate(unittest.TestCase):
    # NEW link to the page
    # https://www.semi.technology/developers/weaviate/current/client-libraries/python.html

    def setUp(self):
        self.port=8080
        self.host="localhost"
        #if getpass.getuser()=="wf":
        #    self.host="zeus"
        #    self.port=8080
        pass
    
    def getClient(self):
        self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
        return self.client

    def tearDown(self):
        pass
        
    def testRunning(self):
        '''
        make sure weaviate is running
        '''
        w=self.getClient()
        self.assertTrue(w.is_live())
        self.assertTrue(w.is_ready())
            

    def testWeaviateSchema(self):
        # NEW link to the page
        # https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
        w = self.getClient()
        #contains_schema = w.schema.contains()

        # it is a good idea to check if Weaviate has a schema already when testing, otherwise it will result in an error
        # this way you know for sure that your current schema is known to weaviate.

        if w.schema.contains():
            # delete the existing schema, (removes all the data objects too)
            w.schema.delete_all()
        # instead of w.create_schema(person_schema)
        w.schema.create(person_schema)
        entries=[
            [ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
            [ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
            [ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
        ]
        for entry in entries:
            dict,type,uid=entry
            try:
                # instead of w.create(dict,type,uid), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#create-a-data-object
                w.data_object.create(dict,type,uid)
            # ObjectAlreadyExistsException is the correct exception starting weaviate-client 2.0.0
            except weaviate.exceptions.ObjectAlreadyExistsException as taee: 
                print ("%s already created" % dict['name'])
            
        pass
    
    def testPersons(self):
        return
        w = self.getClient()

        schema = {
        #"actions": {"classes": [],"type": "action"}, `actions` and `things` were removed in weaviate v1.0 and removed in weaviate-client v2.0
        # Now there is only `objects`
        "classes": [
            {
            "class": "Person",
            "description": "A person such as humans or personality known through culture",
            "properties": [
                {
                    #"cardinality": "atMostOne", were removed in weaviate v1.0 and weaviate-client v2.0
                    "dataType": ["text"],
                    "description": "The name of this person",
                    "name": "name"
                }
            ]
            }
            ]
        }
        # instead of w.create_schema(schema)
        w.schema.create(schema) 
        
        # instead of  w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
        w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person")
        w.data_object.create({"name": "Alan Turing"}, "Person")
        w.data_object.create({"name": "John von Neumann"}, "Person")
        w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
        
    def testEventSchema(self):    
        '''
        https://stackoverflow.com/a/63077495/1497139
        '''
        return
        schema = {
            # "things": { , were removed in weaviate v1.0 and weaviate-client v2.0
            # "type": "thing", was removed in weaviate v1.0 and weaviate-client v2.0
            "classes": [
                {
                "class": "Event",
                "description": "event",
                "properties": [
                    {
                    "name": "acronym",
                    "description": "acronym",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "inCity",
                    "description": "city reference",
                    "dataType": [
                        "City"
                    ],
                    # "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
                    }
                ]
                },
                {
                "class": "City",
                "description": "city",
                "properties": [
                    {
                    "name": "name",
                    "description": "name",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "hasEvent",
                    "description": "event references",
                    "dataType": [
                        "Event"
                    ],
                    # "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
                    }
                ]
                }
            ]
        }


        client = self.getClient()

        # this test is going to fail if you are using the same Weaviate instance
        # We already created a schema in the test above so the new schme is not going to be created
        # and will result in an error.
        # we can delete the schema and create a new one.
        
        # instead of client.contains_schema()
        if client.schema.contains():
            # delete the existing schema, (removes all the data objects too)
            client.schema.delete_all()
        # instead of client.create_schema(schema)
        client.schema.create(schema)

        event = {"acronym": "example"}
        # instead of client.create(...)
        client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
        city = {"name": "Amsterdam"}
        client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")

        time.sleep(2.0)
        # instead of client.add_reference(...), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#cross-references
        client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()
于 2021-04-09T15:18:42.657 回答
3

上面显示的连接、模式和数据对象的单元测试与 Python 客户端 v1.x 的工作方式类似(有关更改的内容,请参见内联注释):

import unittest
import weaviate
import time
#import getpass

class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html

    def setUp(self):
        self.port=8153
        self.host="localhost"
        #if getpass.getuser()=="wf":
        #    self.host="zeus"
        #    self.port=8080
        pass
    
    def getClient(self):
        self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
        return self.client

    def tearDown(self):
        pass
        
    def testRunning(self):
        '''
        make sure weaviate is running
        '''
        w=self.getClient()
        self.assertTrue(w.is_live())
        self.assertTrue(w.is_ready())
            

    def testWeaviateSchema(self):
        ''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
        w = self.getClient()
        #contains_schema = w.schema.contains()
        try:
            w.schema.create("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json") # instead of w.create_schema, see https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html#creating-your-first-schema-with-the-python-client
        except:
            pass
        entries=[
            [ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
            [ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
            [ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
        ]
        for entry in entries:
            dict,type,uid=entry
            try:
                w.data_object.create(dict,type,uid) # instead of w.create(dict,type,uid), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#example-request-1
            except weaviate.exceptions.ThingAlreadyExistsException as taee:
                print ("%s already created" % dict['name'])
            
        pass
    
    def testPersons(self):
        return
        w = self.getClient()

        schema = {
        "actions": {"classes": [],"type": "action"},
        "things": {"classes": [{
            "class": "Person",
            "description": "A person such as humans or personality known through culture",
            "properties": [
                {
                    "cardinality": "atMostOne",
                    "dataType": ["text"],
                    "description": "The name of this person",
                    "name": "name"
                }
            ]}],
            "type": "thing"
        }
        }
        w.schema.create(schema) # instead of w.create_schema(schema)
        
        w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person") # instead of  w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
        w.data_object.create({"name": "Alan Turing"}, "Person")
        w.data_object.create({"name": "John von Neumann"}, "Person")
        w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
        
    def testEventSchema(self):    
        '''
        https://stackoverflow.com/a/63077495/1497139
        '''
        return
        schema = {
            "things": {
            "type": "thing",
            "classes": [
                {
                "class": "Event",
                "description": "event",
                "properties": [
                    {
                    "name": "acronym",
                    "description": "acronym",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "inCity",
                    "description": "city reference",
                    "dataType": [
                        "City"
                    ],
                    "cardinality": "many"
                    }
                ]
                },
                {
                "class": "City",
                "description": "city",
                "properties": [
                    {
                    "name": "name",
                    "description": "name",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "hasEvent",
                    "description": "event references",
                    "dataType": [
                        "Event"
                    ],
                    "cardinality": "many"
                    }
                ]
                }
            ]
            }
        }


        client = self.getClient()

        if not client.contains_schema():
            client.schema.create(schema) # instead of client.create_schema(schema)

        event = {"acronym": "example"}
        client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
        city = {"name": "Amsterdam"}
        client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")

        time.sleep(2.0)
        client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde"), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#add-a-cross-reference


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()

尚不支持从 dict (或其他格式)列表中自动派生模式。正如您所提到的,这可能是一个很好的便利功能,因此我们将其添加到 Weaviate 的功能建议中!

于 2020-09-22T08:20:09.227 回答