Code to split data into a training / test subsets is below. Note that data_points is one long vector of size items*attr, and data_labels is a vector of size items.
int split_data(int items, int attr, double *data_points, int *data_labels, double **split_train_points, int **split_train_labels, double **split_test_points, int **split_test_labels)
{
srand(time(NULL));
int i, j;
double temp0, temp1;
double sorter[items][2];
*split_train_points = malloc(floor(SPLIT_PROP*items * attr) * sizeof(double));
*split_train_labels = malloc(floor(SPLIT_PROP*items ) * sizeof(int));
*split_test_points = malloc(ceil((1-SPLIT_PROP)*items * attr) * sizeof(double));
*split_test_labels = malloc(ceil((1-SPLIT_PROP)*items ) * sizeof(int));
// create a 2d array with element number in one column and a random number in the other
for (i = 0; i < items; i++) {
sorter[i][0] = i;
sorter[i][1] = rand() / (double)RAND_MAX;
}
// sort by the random number column
for (i = items-1; i > 0; i--) {
for (j = 1; j <= i; j++) {
if (sorter[j-1][1] > sorter[j][1]) {
temp0 = sorter[j-1][0];
temp1 = sorter[j-1][1];
sorter[j-1][0] = sorter[j][0];
sorter[j-1][1] = sorter[j][1];
sorter[j][0] = temp0;
sorter[j][1] = temp1;
}
}
}
int cutoff = floor(SPLIT_PROP*items);
int element = 0;
// now we have a bunch of indices in a random order. we select the first 70% to store into our split_train datasets
for (i = 0; i < cutoff; i++) {
element = (int)sorter[i][0];
*split_train_labels[i] = data_labels[element];
printf("success!\n");
for (j = 0; j < attr; j++) {
printf("j: %d, data_points_element: %d\n",j,attr*element+j);
//SEGFAULT OCCURS HERE WHEN J=4 EVERY TIME EVEN AS ELEMENT VALUE CHANGES DUE TO RANDOMNESS
*split_train_points[attr*i+j] = data_points[attr*element+j];
printf("j out! %d\n",j);
}
}
for (i = cutoff; i < items; i++) {
*split_train_labels[i - cutoff] = data_labels[(int)sorter[i][0]];
for (j = 0; j < attr; j++) {
*split_train_points[attr*(i-cutoff)+j] = data_points[attr*(int)sorter[i][0]+j];
}
}
return 0;
}
As noted in the code, SEGFAULT occurs at the same line, at j=4, even though "element" is a random number.