我正在实施 CUDF 以加快我的 python 进程。首先,我导入 CUDF 并删除多处理代码,并使用 CUDF 初始化变量。更改为 CUDF 后,会出现字典错误。
如何删除这些循环以进行有效实施?
代码
import more_itertools
import pandas as pd
import numpy as np
import itertools
from os import cpu_count
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc
import matplotlib.pyplot as plt
import json
import os
import gc
from tqdm import tqdm
import cudf
gc.collect()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
import logging
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)
with open(Path(__file__).parent / "ageDB.json", "r") as f:
identities = json.load(f)
positives = cudf.DataFrame()
for value in tqdm(identities.values(), desc="Positives"):
positives = positives.append(cudf.DataFrame(itertools.combinations(value, 2), columns=["file_x", "file_y"]),
ignore_index=True)
positives["decision"] = "Yes"
print(positives)
samples_list = list(identities.values())
negatives = cudf.DataFrame()
######################====================Functions=============##############
def compute_cross_samples(x):
return cudf.DataFrame(itertools.product(*x), columns=["file_x", "file_y"])
####################################
if Path("positives_negatives.csv").exists():
df = cudf.read_csv("positives_negatives.csv")
else:
for combos in tqdm(more_itertools.ichunked(itertools.combinations(identities.values(), 2), cpu_count())):
for cross_samples in (compute_cross_samples, combos):
negatives = negatives.append(cross_samples)
negatives["decision"] = "No"
negatives = negatives.sample(positives.shape[0])
df = cudf.concat([positives, negatives]).reset_index(drop=True)
df.to_csv("positives_negatives.csv", index=False)
df.file_x = "deepface/tests/dataset/" + df.file_x
df.file_y = "deepface/tests/dataset/" + df.file_y
追溯
Traceback (most recent call last):
File "Ensemble-Face-Recognition.py", line 36, in <module>
positives = positives.append(cudf.DataFrame(itertools.combinations(value, 2), columns=["file_x", "file_y"]),
File "/home/khawar/anaconda3/envs/rapids-0.17/lib/python3.7/contextlib.py", line 74, in inner
return func(*args, **kwds)
File "/home/khawar/anaconda3/envs/rapids-0.17/lib/python3.7/site-packages/cudf/core/dataframe.py", line 289, in __init__
raise TypeError("data must be list or dict-like")
TypeError: data must be list or dict-like