我是 Google Cloud Platform 的新手,我正在尝试创建一个 Feature Store 来填充来自 Google Cloud Storage 的 csv 文件中的值。目的是从 Python 的本地笔记本中做到这一点。由于我正在使用信用卡公共数据集,因此我基本上遵循此处的代码,进行了适当的更改。运行代码时出现的错误如下:
GoogleAPICallError: None Unexpected state: Long-running operation had neither response nor error set.
它发生在从 csv 文件中提取数据的过程中。
这是我正在处理的代码:
import os
from datetime import datetime
from google.cloud import bigquery
from google.cloud import aiplatform
from google.cloud.aiplatform_v1.types import feature as feature_pb2
from google.cloud.aiplatform_v1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1.types import \
featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1.types import FeatureSelector, IdMatcher
credential_path = r"C:\Users\...\.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path
## Constants
PROJECT_ID = "my-project-ID"
REGION = "us-central1"
API_ENDPOINT = "us-central1-aiplatform.googleapis.com"
INPUT_CSV_FILE = "my-input-file.csv"
FEATURESTORE_ID = "fraud_detection"
## Output dataset
DESTINATION_DATA_SET = "fraud_predictions"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DESTINATION_DATA_SET = "{prefix}_{timestamp}".format(
prefix=DESTINATION_DATA_SET, timestamp=TIMESTAMP
)
## Output table. Make sure that the table does NOT already exist;
## the BatchReadFeatureValues API cannot overwrite an existing table
DESTINATION_TABLE_NAME = "training_data"
DESTINATION_PATTERN = "bq://{project}.{dataset}.{table}"
DESTINATION_TABLE_URI = DESTINATION_PATTERN.format(
project=PROJECT_ID, dataset=DESTINATION_DATA_SET,
table=DESTINATION_TABLE_NAME
)
## Create dataset
client = bigquery.Client(project=PROJECT_ID)
dataset_id = "{}.{}".format(client.project, DESTINATION_DATA_SET)
dataset = bigquery.Dataset(dataset_id)
dataset.location = REGION
dataset = client.create_dataset(dataset)
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
## Create client for CRUD and data_client for reading feature values.
client = aiplatform.gapic.FeaturestoreServiceClient(
client_options={"api_endpoint": API_ENDPOINT})
data_client = aiplatform.gapic.FeaturestoreOnlineServingServiceClient(
client_options={"api_endpoint": API_ENDPOINT})
BASE_RESOURCE_PATH = client.common_location_path(PROJECT_ID, REGION)
## Create featurestore (only the first time)
create_lro = client.create_featurestore(
featurestore_service_pb2.CreateFeaturestoreRequest(
parent=BASE_RESOURCE_PATH,
featurestore_id=FEATURESTORE_ID,
featurestore=featurestore_pb2.Featurestore(
online_serving_config=featurestore_pb2.Featurestore.OnlineServingConfig(
fixed_node_count=1
),
),
)
)
## Wait for LRO to finish and get the LRO result.
print(create_lro.result())
client.get_featurestore(
name=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID)
)
## Create credit card entity type (only the first time)
cc_entity_type_lro = client.create_entity_type(
featurestore_service_pb2.CreateEntityTypeRequest(
parent=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
entity_type_id="creditcards",
entity_type=entity_type_pb2.EntityType(
description="Credit card entity",
),
)
)
## Create fraud entity type (only the first time)
fraud_entity_type_lro = client.create_entity_type(
featurestore_service_pb2.CreateEntityTypeRequest(
parent=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
entity_type_id="frauds",
entity_type=entity_type_pb2.EntityType(
description="Fraud entity",
),
)
)
## Create features for credit card type (only the first time)
client.batch_create_features(
parent=client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "creditcards"),
requests=[
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v1",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v2",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v3",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v4",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v5",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v6",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v7",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v8",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v9",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v10",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v11",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v12",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v13",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v14",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v15",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v16",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v17",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v18",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v19",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v20",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v21",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v22",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v23",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v24",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v25",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v26",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v27",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v28",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="amount",
),
],
).result()
## Create features for fraud type (only the first time)
client.batch_create_features(
parent=client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "frauds"),
requests=[
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="class",
),
],
).result()
## Import features values for credit cards
import_cc_request = aiplatform.gapic.ImportFeatureValuesRequest(
entity_type=client.entity_type_path(
PROJECT_ID, REGION, FEATURESTORE_ID, "creditcards"),
csv_source=aiplatform.gapic.CsvSource(gcs_source=aiplatform.gapic.GcsSource(
uris=["gs://fraud-detection-19102021/dataset/cc_details_train.csv"])),
entity_id_field="cc_id",
feature_specs=[
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v1"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v2"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v3"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v4"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v5"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v6"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v7"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v8"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v9"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v10"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v11"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v12"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v13"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v14"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v15"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v16"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v17"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v18"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v19"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v20"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v21"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v22"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v23"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v24"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v25"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v26"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v27"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v28"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="amount"),
],
feature_time_field='time',
worker_count=1,
)
## Start to import
ingestion_lro = client.import_feature_values(import_cc_request)
## Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()
## Import features values for frauds
import_fraud_request = aiplatform.gapic.ImportFeatureValuesRequest(
entity_type=client.entity_type_path(
PROJECT_ID, REGION, FEATURESTORE_ID, "frauds"),
csv_source=aiplatform.gapic.CsvSource(gcs_source=aiplatform.gapic.GcsSource(
uris=["gs://fraud-detection-19102021/dataset/data_fraud_train.csv"])),
entity_id_field="fraud_id",
feature_specs=[
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="class"),
],
feature_time_field='time',
worker_count=1,
)
## Start to import
ingestion_lro = client.import_feature_values(import_fraud_request)
## Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()
当我Ingestion Jobs
从Feature
Google Cloud Console 的部分检查时,我看到作业已完成,但没有向我的功能添加任何值。
任何建议都非常宝贵。
谢谢你们。
编辑 1
在下图中,有一个我用作输入 ( cc_details_train.csv
) 的 csv 文件第一行的示例。所有看不见的特征都是相似的,该特征class
可以假设为 0 或 1 值。注入作业持续大约 5 分钟以导入(理想情况下)3000 行,但它没有错误地结束,也没有导入任何值。