我对 tensorflow 和 python 还很陌生,我正在研究一个能够对 Urban Sound 数据集 ( https://urbansounddataset.weebly.com/urbansound8k.html ) 的声音进行分类的神经网络。
我使用机器学习教程速成课程(https://colab.research.google.com/notebooks/mlcc/multi-class_classification_of_handwritten_digits.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=multiclass-colab&hl=en)作为我的程序的基础,模型很好,但现在我想分析不同音频文件的预测。
首先我在一个文件中创建分类器,然后(下面的代码)我调用模型并计算对我留下的测试数据集的检查
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.python.data import Dataset
from sklearn import metrics
urbansound_dataframe = pd.read_csv("C:\\Users\\UrbanSound8K\\metadata\\dataset_train_fin.csv", sep=',')
urbansound_dataframe_test = pd.read_csv("C:\\Users\\UrbanSound8K\\metadata\\dataset_test_fin.csv", sep=',')
def construct_feature_columns(input_features):
return set([tf.feature_column.numeric_column(my_feature)
for my_feature in input_features])
def parse_labels_and_features(dataset):
"""Args:
dataset: A Pandas `Dataframe`, containing the label on the last column
Returns:
A `tuple` `(labels, features)`:
labels: A Pandas `Series`.
features: A Pandas `DataFrame`.
"""
labels = dataset['classID']
features = dataset[[ ###metto i nomi delle features che voglio utilizzare
"mfcc_1", "mfcc_2", "mfcc_3", "mfcc_4", "mfcc_5", "mfcc_6", "mfcc_7", "mfcc_8", "mfcc_9", "mfcc_10", "mfcc_11",
"mfcc_12", "mfcc_13", "mfcc_14", "mfcc_15", "mfcc_16", "mfcc_17", "mfcc_18", "mfcc_19", "mfcc_20", "mfcc_21",
"mfcc_22", "mfcc_23", "mfcc_24", "mfcc_25", "mfcc_26", "mfcc_27", "mfcc_28", "mfcc_29", "mfcc_30", "mfcc_31",
"mfcc_32", "mfcc_33", "mfcc_34", "mfcc_35", "mfcc_36", "mfcc_37", "mfcc_38", "mfcc_39", "mfcc_40"
]]
return labels, features
def create_predict_input_fn(features, labels, batch_size):
"""Args:
features: The features to base predictions on.
labels: The labels of the prediction examples.
Returns:
A function that returns features and labels for predictions.
"""
def _input_fn():
raw_features = {key: np.array(value) for key, value in dict(features).items()}
raw_targets = np.array(labels)
ds = Dataset.from_tensor_slices((raw_features, raw_targets))
ds = ds.batch(batch_size)
# Return the next batch of data.
feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
return feature_batch, label_batch
return _input_fn
percentage = 75
training_targets, training_examples = parse_labels_and_features(urbansound_dataframe.head(int(((len(urbansound_dataframe)*percentage)/100))))
validation_targets, validation_examples = parse_labels_and_features(urbansound_dataframe.tail(len(urbansound_dataframe)-int(((len(urbansound_dataframe)*percentage)/100))))
learning_rate=0.004
my_optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
hidden_units=[2000,2000,2000]
my_classifier = tf.estimator.DNNClassifier(feature_columns=construct_feature_columns(training_examples),
n_classes=11,
hidden_units=hidden_units,
optimizer=my_optimizer,
config=tf.contrib.learn.RunConfig(keep_checkpoint_max=1),
model_dir="C:\\Users\\PycharmProjects\\MODEL_UrbanSound_05_NN")
###test on unused directories
test_targets, test_examples = parse_labels_and_features(urbansound_dataframe_test)
predict_test_input_fn = create_predict_input_fn(test_examples, test_targets, batch_size=400)
test_predictions = my_classifier.predict(input_fn=predict_test_input_fn)
test_predictions = np.array([item['class_ids'][0] for item in test_predictions])
accuracy = metrics.accuracy_score(test_targets, test_predictions)
print("Accuracy on test data: ", (accuracy*100),"%")
print("fine test")
# Output a plot of the confusion matrix.
cm = metrics.confusion_matrix(test_targets, test_predictions)
# Normalize the confusion matrix by row (i.e by the number of samples
# in each class).
cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
ax = sns.heatmap(cm_normalized, cmap="bone_r")
ax.set_aspect(1)
plt.title("Confusion matrix on test data")
plt.ylabel("True label")
plt.xlabel("Predicted label")
plt.show()
我的主要问题是如何转换代码以预测单个文件,该文件的特征由程序的另一部分提取。我认为_我必须在 input_fn 上工作
def _input_fn():
raw_features = {key: np.array(value) for key, value in dict(features).items()}
raw_targets = np.array(labels)
ds = Dataset.from_tensor_slices((raw_features, raw_targets))
ds = ds.batch(batch_size)
# Return the next batch of data.
feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
return feature_batch, label_batch
return _input_fn
但我不知道如何改变它。欢迎任何帮助,谢谢。