python - 如何在 python 中使用对 PCA 和 LDA 的绘图来实现 Q 和 T 统计测试？

Question

我一直在研究这篇文章基于主成分分析的结构健康监测：损伤检测、定位和分类，它实施 PCA 模型以识别钢框架结构中的损伤。在这里，它讨论了对 PCA 模型使用 Q、T 以及（Q 和 T）统计和 I 统计测试的组合，以识别损坏并将其定位在结构中。我知道使用 PCA 和hotelling t 统计可以使用以下链接完成：

Hotelling 在 python 中的 T^2 分数

我的问题如下：

有没有更好的方法来计算hotelling t 统计量？
如何计算python中的Q统计量？
是否可以将这些参数实现到 LDA？如果是，我需要从 LDA 中提取什么来计算 Q、T 以及（Q 和 T）和 I 统计检验的组合？
（额外问题）如何使用统计测试从上面的文章中获得贡献方法？

编辑1：

我在网上搜索并找到以下链接T-Squared Q 残差和贡献，并尝试计算 T-Squared 和 Q 残差计算的方程，但是，我在 Q 残差计算中不断出错。我的代码中可能有什么错误？（我的输入矩阵大小 [50, 7]）另外，是否可以用我的输入绘制 Q 与 T 的关系？

代码：

def hotelling_tsquared_PCA(input_features):
    n_samples = input_features.shape[0]
    
    ##### Hyperparameter optimisation:
    # Running Bayesian Optimisation to get the best parameters:
    start = time.time()
    
    # Create the algorithms
    tpe_algo = tpe.suggest
    # rand_algo = rand.suggest
    # atpe_algo = atpe.suggest
    
    # Assigning model:
    model = 'pca'
  
    # Creating the trial objects:
    hypopt_trials = Trials()
    
    # Getting the best parameters:
    best_params = fmin(obj_fnc, search_space(model), algo=tpe_algo, max_evals=500, trials=hypopt_trials)
    print("Best params: ", best_params)
    print('Best accuracy: ', hypopt_trials.best_trial['result']['loss'])
    print("[INFO] Baye. Opt. search took {:.2f} seconds".format(time.time() - start))
    
    # Calling parameters:
    ## PCA:
    svd_solver = ["auto", "full", "arpack", "randomized"]
    
    # Creating the PCA models:
    #### Implementing hyperopt Search:
    pca = PCA(n_components=2, svd_solver=svd_solver[best_params['svd_solver']])
    
    pca = pca.fit(input_features)
    PCA_scores = pca.transform(input_features)
    print('PCA Score matrix shape:', np.array(PCA_scores).shape)
    PCA_loading = (pca.components_).T
    print('PCA Loading matrix shape:', np.array(PCA_loading).shape)
    eigenvalues = pca.explained_variance_

    t2 = np.linalg.multi_dot([input_features, PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T, input_features.T])
    # print(t2)
    print('PCA hotellings T^2 matrix shape:', np.array(Q_res).shape)
    return t2

def Q_Residual_PCA(input_features):
    n_samples = input_features.shape[0]
    
    ##### Hyperparameter optimisation:
    # Running Bayesian Optimisation to get the best parameters:
    start = time.time()
    
    # Create the algorithms
    tpe_algo = tpe.suggest
    # rand_algo = rand.suggest
    # atpe_algo = atpe.suggest
    
    # Assigning model:
    model = 'pca'
  
    # Creating the trial objects:
    hypopt_trials = Trials()
    
    # Getting the best parameters:
    best_params = fmin(obj_fnc, search_space(model), algo=tpe_algo, max_evals=500, trials=hypopt_trials)
    print("Best params: ", best_params)
    print('Best accuracy: ', hypopt_trials.best_trial['result']['loss'])
    print("[INFO] Baye. Opt. search took {:.2f} seconds".format(time.time() - start))
    
    # Calling parameters:
    ## PCA:
    svd_solver = ["auto", "full", "arpack", "randomized"]
    
    # Creating the PCA models:
    #### Implementing hyperopt Search:
    pca = PCA(svd_solver=svd_solver[best_params['svd_solver']])
    
    print('Model: ', pca)
    pca = pca.fit(input_features)
    PCA_scores = pca.transform(input_features)
    print('PCA Score matrix shape:', np.array(PCA_scores).shape)
    PCA_loading = (pca.components_).T
    print('PCA Loading matrix shape:', np.array(PCA_loading).shape)
    
    Q_eq_1 = np.dot(PCA_loading, PCA_loading.T)
    # print(pd.DataFrame(Q_eq_1))
    print('PCA Loading matrix product shape:', np.array(Q_eq_1).shape)
    Q_eq_2 = np.identity(n_samples) - Q_eq_1
    # Q_eq_2 = np.eye(50,7) - Q_eq_1
    # print(pd.DataFrame(Q_eq_2))
    print('PCA Sub matrix shape:', np.array(Q_eq_2).shape)
    Q_res = np.linalg.multi_dot([input_features.T, Q_eq_2, input_features])
    # print(pd.DataFrame(Q_res))
    print('PCA Q Residual matrix shape:', np.array(Q_res).shape)
    return Q_res

编辑2：

我设法整理出使用 Q-Residual、Hotelling 的 T 平方统计量和 Phi（Q-Residual 和 Hotelling 的 T 平方的组合）到 PCA 模型的方程，我使用了以下具有贡献的方程：

def hotelling_tsquared_index_PCA(input_features, PCA_scores, PCA_loading, eigenvalues):
    PCA_scores = PCA_scores
    PCA_loading = PCA_loading
    eigenvalues = eigenvalues

    #### Working and correct:    
    # t2 = np.array(np.diag(np.linalg.multi_dot([input_features, PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T, input_features.T])))
    t2 = np.array([xi.dot(PCA_loading).dot(np.diag(eigenvalues**-1)).dot(PCA_loading.T).dot(xi.T) for xi in input_features])

    # Contribution:
    #### CDC:
    # t2_CDC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    t2_CDC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    t2_CDC = np.array(np.diag(np.linalg.multi_dot([input_features, (t2_CDC_eq_1 ** 0.5), eye_M.T, eye_M, (t2_CDC_eq_1 ** 0.5), input_features.T])))
    t2_CDC = np.where(np.isnan(t2_CDC), 0, t2_CDC)
    
    #### PDC:
    # t2_PDC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    t2_PDC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    t2_PDC = np.array(np.diag(np.linalg.multi_dot([input_features, t2_PDC_eq_1, eye_M.T, eye_M, input_features.T])))
    t2_PDC = np.where(np.isnan(t2_PDC), 0, t2_PDC)
    
    #### DC:
    # t2_DC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    t2_DC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    t2_DC = np.array(np.diag(np.linalg.multi_dot([input_features, eye_M.T, eye_M, t2_DC_eq_1, eye_M.T, eye_M, input_features.T])))
    t2_DC = np.where(np.isnan(t2_DC), 0, t2_DC)
    
    #### RBC:
    # t2_RBC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    t2_RBC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    t2_RBC = np.array(np.diag(np.linalg.multi_dot([input_features, t2_RBC_eq_1, eye_M.T, np.linalg.pinv(np.linalg.multi_dot([eye_M, t2_RBC_eq_1, eye_M.T])), eye_M, t2_RBC_eq_1, input_features.T])))
    t2_RBC = np.where(np.isnan(t2_RBC), 0, t2_RBC)
    
    #### ABC (1):
    # # t2_ABC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    # t2_ABC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    # print(t2_ABC_eq_1)
    # print('Contribution Equation [1] of PCA hotellings T^2 matrix shape:', np.array(t2_ABC_eq_1).shape)
    # t2_ABC = t2_RBC / t2_ABC_eq_1
    # # print(t2_ABC)
    # print('Contribution of PCA hotellings T^2 matrix shape:', np.array(t2_ABC).shape)
    # t2_ABC = np.where(np.isnan(t2_ABC), 0, t2_ABC)
    # print(t2_ABC)

    # #### ABC (2):    
    # eye_M = np.identity(7)
    # t2_ABC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), input_features])
    # t2_ABC_eq_2 = (np.linalg.multi_dot([eye_M.T, t2_ABC_eq_1, PCA_loading.T]) ** 2)
    # t2_ABC_eq_3 = np.linalg.multi_dot([eye_M.T, t2_ABC_eq_1, eye_M, input_features.T, t2_ABC_eq_1, input_features])
    # print(t2_ABC_eq_1)
    # print('Contribution Equation [1] of PCA hotellings T^2 matrix shape:', np.array(t2_ABC_eq_1).shape)
    # t2_ABC = t2_ABC_eq_2 / t2_ABC_eq_3
    # # print(t2_ABC)
    # print('Contribution of PCA hotellings T^2 matrix shape:', np.array(t2_ABC).shape)
    # t2_ABC = np.where(np.isnan(t2_ABC), 0, t2_ABC)
    # print(t2_ABC)
    # return t2
    return t2, t2_CDC, t2_PDC, t2_DC, t2_RBC

def Q_Residual_index_PCA(input_features, PCA_scores, PCA_loading, eigenvalues):
    PCA_scores = PCA_scores
    PCA_loading = PCA_loading
    eigenvalues = eigenvalues
    
    # Equation 1:
    Q_eq_1 = np.dot(PCA_scores, PCA_loading.T)
    Q_err = input_features - Q_eq_1
    Q_res = np.sum(Q_err**2, axis=1)

    # Equation 2:
    Q_eq_1 = np.linalg.multi_dot([LDA_loading.T, LDA_loading])
    Q_eq_2 = np.identity(7) - Q_eq_1
    Q_res = np.linalg.multi_dot([input_features, Q_eq_2, input_features.T])
    
    # Contribution:
    #### CDC:
    # Q_res_CDC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    Q_res_CDC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    Q_res_CDC = np.array(np.diag(np.linalg.multi_dot([input_features, (Q_res_CDC_eq_1 ** 0.5), eye_M.T, eye_M, (Q_res_CDC_eq_1 ** 0.5), input_features.T])))
    Q_res_CDC = np.where(np.isnan(Q_res_CDC), 0, Q_res_CDC)
    
    #### PDC:
    # Q_res_PDC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    Q_res_PDC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    Q_res_PDC = np.array(np.diag(np.linalg.multi_dot([input_features, Q_res_PDC_eq_1, eye_M.T, eye_M, input_features.T])))
    Q_res_PDC = np.where(np.isnan(Q_res_PDC), 0, Q_res_PDC)
    
    #### DC:
    # Q_res_DC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    Q_res_DC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    Q_res_DC = np.array(np.diag(np.linalg.multi_dot([input_features, eye_M.T, eye_M, Q_res_DC_eq_1, eye_M.T, eye_M, input_features.T])))
    Q_res_DC = np.where(np.isnan(Q_res_DC), 0, Q_res_DC)
    
    #### RBC:
    # Q_res_RBC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    Q_res_RBC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    eye_M = np.identity(7)
    Q_res_RBC_eq_2 = np.linalg.multi_dot([eye_M, Q_res_RBC_eq_1, eye_M.T])
    Q_res_RBC = np.array(np.diag(np.linalg.multi_dot([input_features, Q_res_RBC_eq_1, eye_M.T, np.linalg.pinv(Q_res_RBC_eq_2), eye_M, Q_res_RBC_eq_1, input_features.T])))
    Q_res_RBC = np.where(np.isnan(Q_res_RBC), 0, Q_res_RBC)
    
    # #### ABC (1):
    # # # Q_res_ABC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    # # Q_res_ABC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    # # print(Q_res_ABC_eq_1)
    # # print('Contribution Equation [1] of PCA hotellings T^2 matrix shape:', np.array(Q_res_ABC_eq_1).shape)
    # # Q_res_ABC = Q_res_RBC / Q_res_ABC_eq_1
    # # # print(Q_res_ABC)
    # # print('Contribution of PCA hotellings T^2 matrix shape:', np.array(Q_res_ABC).shape)
    # # Q_res_ABC = np.where(np.isnan(Q_res_ABC), 0, Q_res_ABC)
    # # print(Q_res_ABC)

    # #### ABC (2):    
    # eye_M = np.identity(7)
    # Q_res_ABC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), input_features])
    # Q_res_ABC_eq_2 = (np.linalg.multi_dot([eye_M.T, Q_res_ABC_eq_1, PCA_loading.T]) ** 2)
    # Q_res_ABC_eq_3 = np.linalg.multi_dot([eye_M.T, Q_res_ABC_eq_1, eye_M, input_features.T, Q_res_ABC_eq_1, input_features])
    # print(Q_res_ABC_eq_1)
    # print('Contribution Equation [1] of PCA hotellings T^2 matrix shape:', np.array(Q_res_ABC_eq_1).shape)
    # Q_res_ABC = Q_res_ABC_eq_2 / Q_res_ABC_eq_3
    # # print(Q_res_ABC)
    # print('Contribution of PCA hotellings T^2 matrix shape:', np.array(Q_res_ABC).shape)
    # Q_res_ABC = np.where(np.isnan(Q_res_ABC), 0, Q_res_ABC)
    # print(Q_res_ABC)    
    return Q_res, Q_res_CDC, Q_res_PDC, Q_res_DC, Q_res_RBC

def Phi_index_PCA(input_features, PCA_scores, PCA_loading, eigenvalues):
    PCA_scores = PCA_scores
    PCA_loading = PCA_loading
    eigenvalues = eigenvalues
    
    Q_eq_1 = np.linalg.multi_dot([PCA_loading, PCA_loading.T])
    Q_eq_2 = np.identity(7) - Q_eq_1
    t2_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    Phi_eq_1 = Q_eq_2 + t2_eq_1
    Phi_res = np.array(np.diag(np.linalg.multi_dot([input_features, Phi_eq_1, input_features.T])))
    
    # Contribution:
    #### CDC:
    Phi_res_CDC_eq_1 = Phi_eq_1
    eye_M = np.identity(7)
    Phi_res_CDC = np.array(np.diag(np.linalg.multi_dot([input_features, (Phi_res_CDC_eq_1 ** 0.5), eye_M.T, eye_M, (Phi_res_CDC_eq_1 ** 0.5), input_features.T])))
    Phi_res_CDC = np.where(np.isnan(Phi_res_CDC), 0, Phi_res_CDC)
    
    #### PDC:
    Phi_res_PDC_eq_1 = Phi_eq_1
    eye_M = np.identity(7)
    Phi_res_PDC = np.array(np.diag(np.linalg.multi_dot([input_features, Phi_res_PDC_eq_1, eye_M.T, eye_M, input_features.T])))
    Phi_res_PDC = np.where(np.isnan(Phi_res_PDC), 0, Phi_res_PDC)
    
    #### DC:
    Phi_res_DC_eq_1 = Phi_eq_1
    eye_M = np.identity(7)
    Phi_res_DC = np.array(np.diag(np.linalg.multi_dot([input_features, eye_M.T, eye_M, Phi_res_DC_eq_1, eye_M.T, eye_M, input_features.T])))
    Phi_res_DC = np.where(np.isnan(Phi_res_DC), 0, Phi_res_DC)
    
    #### RBC:
    Phi_res_RBC_eq_1 = Phi_eq_1
    eye_M = np.identity(7)
    Phi_res_RBC = np.array(np.diag(np.linalg.multi_dot([input_features, Phi_res_RBC_eq_1, eye_M.T, np.linalg.pinv(np.linalg.multi_dot([eye_M, Phi_res_RBC_eq_1, eye_M.T])), eye_M, Phi_res_RBC_eq_1, input_features.T])))
    Phi_res_RBC = np.where(np.isnan(Phi_res_RBC), 0, Phi_res_RBC)
    
    # #### ABC (1):
    # # # Phi_res_ABC_eq_1 = np.linalg.multi_dot(PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T)
    # # Phi_res_ABC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), PCA_loading.T])
    # # print(Phi_res_ABC_eq_1)
    # # print('Contribution Equation [1] of PCA hotellings T^2 matrix shape:', np.array(Phi_res_ABC_eq_1).shape)
    # # Phi_res_ABC = Phi_res_RBC / Phi_res_ABC_eq_1
    # # # print(Phi_res_ABC)
    # # print('Contribution of PCA hotellings T^2 matrix shape:', np.array(Phi_res_ABC).shape)
    # # Phi_res_ABC = np.where(np.isnan(Phi_res_ABC), 0, Phi_res_ABC)
    # # print(Phi_res_ABC)

    # #### ABC (2):    
    # eye_M = np.identity(7)
    # Phi_res_ABC_eq_1 = np.linalg.multi_dot([PCA_loading, np.linalg.inv(np.diag(eigenvalues)), input_features])
    # Phi_res_ABC_eq_2 = (np.linalg.multi_dot([eye_M.T, Phi_res_ABC_eq_1, PCA_loading.T]) ** 2)
    # Phi_res_ABC_eq_3 = np.linalg.multi_dot([eye_M.T, Phi_res_ABC_eq_1, eye_M, input_features.T, Phi_res_ABC_eq_1, input_features])
    # print(Phi_res_ABC_eq_1)
    # print('Contribution Equation [1] of PCA hotellings T^2 matrix shape:', np.array(Phi_res_ABC_eq_1).shape)
    # Phi_res_ABC = Phi_res_ABC_eq_2 / Phi_res_ABC_eq_3
    # # print(Phi_res_ABC)
    # print('Contribution of PCA hotellings T^2 matrix shape:', np.array(Phi_res_ABC).shape)
    # Phi_res_ABC = np.where(np.isnan(Phi_res_ABC), 0, Phi_res_ABC)
    # print(Phi_res_ABC)    
    return Phi_res, Phi_res_CDC, Phi_res_PDC, Phi_res_DC, Phi_res_RBC

然而，当涉及到 LDA 模型的 Q-residual 时，它会不断地围绕这个等式出现错误：

Q_eq_2 = np.identity(7) - Q_eq_1

我使用以下内容来获取加载和分数，

主成分分析：

print('Model: ', pca)
# pca = PCA(n_components=2)
PrincipalComponents = pca.fit_transform(X_std)
pca_f = pca.fit(X_std)
PCA_scores = pca_f.transform(X_std)
PCA_loading = (pca_f.components_).T
eigenvalues = pca_f.explained_variance_

低密度脂蛋白：

DiscriminantComponents = lda.fit_transform(X_std, y_Cat)
lda_f = lda.fit(X_std, y_Cat)
LDA_scores = lda_f.transform(X_std)
# LDA_loading = (lda_f.fit_transform(X_std, y_Cat)).T
LDA_loading = (lda_f.fit_transform(X_std, y_Cat)).T
eigenvalues = lda_f.explained_variance_ratio_

如何使用 LDA 获取三个索引？

python - 如何在 python 中使用对 PCA 和 LDA 的绘图来实现 Q 和 T 统计测试？

0 回答 0

Related

Reference