-1

我正在尝试使用strsep删除 CSV 文件中的多余字符。问题是当我运行它时,它给了我分段错误,我不知道为什么。这是代码:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <ctype.h>

typedef struct {

    int id, followers, following, public_gists; 
    int public_repos;
    char *login;
    char *type;
    char *created_at;
    int *follower_list;
    int *following_list;
        
} *User;

void checkUsersFile();
FILE *createCSV();
void corrigirFicheiro();
User criarUser();

int count = 0;

void checkUsersFile() {
    //Ficheiro "users.csv"
    FILE *file = fopen("ficheirosG1/users-set2.csv", "r");
    
    //Verifica se o ficheiro "users.csv" existe
    if(!file) {
        printf("Ficheiro não encontrado");
        return;
    }

    //Cria ficheiro "users-ok.csv"
    FILE *newFile = createCSV("users-ok.csv");

    corrigirFicheiro(file, newFile);

    printf("%d\n", count);
}

//Cria e retorna ficheiro "users-ok.csv"
FILE *createCSV(char *nome) {
    FILE *file = fopen(nome, "w");
    return file;
}

//Função responsável por intrepretar o ficheiro "users.csv" e colocar os dados corretos no ficheiro "users-ok.csv"
void corrigirFicheiro(FILE *file, FILE *newFile) {

    //imprimirPrimeiraLinha(file, newFile);

    char string[200000];
    //Uma linha do ficheiro com, no máximo, 200.000 caracteres

    while ((fgets(string, 200000, file))) {
        if (string[0] != '\0') {
            //1. Criar user
            //2. Print user

            User user = criarUser(&string);
            if (user != NULL) {
                printf("ok\n");
            }
            free(user);
            
        }
    }

    //free(string);

}

//Cria um User a partir de uma linha do ficheiro
User criarUser(char *str) {
    
    User novoUser;

    novoUser = (User) malloc(sizeof(User));
    
    for(int i = 0; i<10; i++) {

        //char *a = strdup(strsep(&str, ";"));
        //char *b = strdup(strsep(&a, "\n"));
        char *p = strsep(&str, ";\n\r");

        if (strlen(p) == 0) {
            count++;
            free(novoUser);
            return NULL;
        }
            
    }

    return novoUser;
}


int main(){
    checkUsersFile();

    return 0;
}

使用gdb调试代码,它说它发生在行中if(strlen(p) == 0 { 所以它甚至没有进入switch case。我不知道为什么会这样。

谢谢

4

2 回答 2

2

我认为没有理由认为该strsep()调用对您遇到的错误负责。

然而这是错误的:

    User novoUser = (User) malloc(sizeof(User));

它很可能对您的错误负责

User是指针类型,指针sizeof(User)的大小也是如此,对于 a 指向的那种结构来说,它还不够大User。当您稍后尝试分配它指向的结构的成员(省略)或访问它们printUser()(也省略)时,您将超出分配对象的范围。这正是可能导致段错误的事情。

一个很好的表达分配的习惯用法,例如使用接收变量来确定要分配的空间量:

    User novoUser = malloc(sizeof(*novoUser));

请注意,我还删除了不需要的演员表。


然而,正如我在评论中所表达的那样,将指针性质隐藏在 a 后面是一种糟糕的风格typedef,就像你所做的那样,而且就个人而言,即使是大多数避免这种陷阱的 s User,我也不太在意。typedef

以下是如何使用更好的 typedef 来做到这一点:

typedef struct {
    int id, followers, following, public_gists; 
    int public_repos;
    char *login;
    char *type;
    char *created_at;
    int *follower_list;
    int *following_list;
} User;  // not a pointer

// ...

User *criarUser(char *str) {
    // ...
    User *novoUser = malloc(sizeof(*novoUser));  // Note: no change on the right-hand side
    // ...
    return novoUser;
}

但这就是我的做法,没有 typedef:

struct user {
    int id, followers, following, public_gists; 
    int public_repos;
    char *login;
    char *type;
    char *created_at;
    int *follower_list;
    int *following_list;
};

// ...

struct user *criarUser(char *str) {
    // ...
    struct user *novoUser = malloc(sizeof(*novoUser));  // still no change on the right-hand side
    // ...
    return novoUser;
}
于 2021-11-09T19:25:03.753 回答
1

In answer to you question I wrote a short example that illustrates what is needed. In essence you need to allocate storage for string and pass the address of string to criarUser(). You cannot use an array because the type passed to criarUser() would be pointer-to-array not pointer-to-pointer. (note: you can use the array so long as it is allowed to decay to a pointer so taking the address does not result in pointer-to-array -- example at end below)

An (close) example of corrigirFicheiro() with needed changes to use with allocated storage is:

void corrigirFicheiro(FILE *file, FILE *newFile)
{

    char *string = malloc (200000);           /* allocate for string */
    /* valdiate allocation here */
    char *original_ptr = string;              /* save original pointer */

    while ((fgets(string, 200000, file))) {
        if (string[0] != '\0') {
            User *user = criarUser(&string);  /* pass address of string */
            if (user != NULL) {
                printUser(user, newFile);
            }
            free(user);
        }
    }
    free (original_ptr);                      /* free original pointer */
}

An abbreviated struct was used in the following example, but the principal is the same. For sample input, you can simply pipe a couple of lines from printf and read on stdin and write to stdout. I used:

$ printf "one;two;3\nfour;five;6\n" | ./bin/strsep_example

A short MCVE (where I have taken liberty to remove the typedef'ed pointer) would be

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXC      128
#define DELIM     ";\r\n"
#define NFIELDS   3

typedef struct {
  char login[MAXC];
  char type[MAXC];
  int public_repos;
} User;

void printUser (User *user, FILE *newFile)
{
  fprintf (newFile, "\nlogin : %s\ntype  : %s\nrepo  : %d\n",
            user->login, user->type, user->public_repos);
}

User *criarUser(char **str)
{
    User *novoUser = malloc(sizeof *novoUser);
    /* validate allocation here */
    
    for(int i = 0; i<NFIELDS; i++) {

        char *p = strsep(str, DELIM);
        switch (i) {
          case 0: strcpy (novoUser->login, p);
            break;
          case 1: strcpy (novoUser->type, p);
            break;
          case 2: novoUser->public_repos = atoi(p);
            break;
        }
        if (strlen(p) == 0) {
            free(novoUser);
            return NULL;
        }
            
    }

    return novoUser;
}

void corrigirFicheiro(FILE *file, FILE *newFile)
{

    char *string = malloc (200000);           /* allocate for string */
    /* valdiate allocation here */
    char *original_ptr = string;              /* save original pointer */

    while ((fgets(string, 200000, file))) {
        if (string[0] != '\0') {
            User *user = criarUser(&string);  /* pass address of string */
            if (user != NULL) {
                printUser(user, newFile);
            }
            free(user);
        }
    }
    free (original_ptr);                      /* free original pointer */
}

int main (int argc, char **argv) {
  
  /* use filename provided as 1st argument (stdin by default) */
  FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

  if (!fp) {  /* validate file open for reading */
    perror ("file open failed");
    return 1;
  }
  
  corrigirFicheiro(fp, stdout);
  
  if (fp != stdin)   /* close file if not stdin */
    fclose (fp);
}

Example Use/Output

$ printf "one;two;3\nfour;five;6\n" | ./bin/strsep_example

login : one
type  : two
repo  : 3

login : four
type  : five
repo  : 6

Memory Use/Error Check

In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.

It is imperative that you use a memory error checking program to ensure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.

For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.

$ printf "one;two;3\nfour;five;6\n" | valgrind ./bin/strsep_example
==6411== Memcheck, a memory error detector
==6411== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==6411== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==6411== Command: ./bin/strsep_example
==6411==

login : one
type  : two
repo  : 3

login : four
type  : five
repo  : 6
==6411==
==6411== HEAP SUMMARY:
==6411==     in use at exit: 0 bytes in 0 blocks
==6411==   total heap usage: 5 allocs, 5 frees, 205,640 bytes allocated
==6411==
==6411== All heap blocks were freed -- no leaks are possible
==6411==
==6411== For counts of detected and suppressed errors, rerun with: -v
==6411== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

Always confirm that you have freed all memory you have allocated and that there are no memory errors.

Look things over and let me know if you have questions.

Using An Array

When you pass the array as a parameter it decays to a pointer by virtue of array/pointer conversion. If that is done and the "array" type is no longer associated with the pointer you can then take the address and use automatic storage with strsep() as pointed out by @thebusybee.

The changes to the program above to do so would be:

User *criarUser(char *str)
{
    User *novoUser = malloc(sizeof *novoUser);
    /* validate allocation here */
    
    for(int i = 0; i<NFIELDS; i++) {

        char *p = strsep(&str, DELIM);
        switch (i) {
          case 0: strcpy (novoUser->login, p);
            break;
          case 1: strcpy (novoUser->type, p);
            break;
          case 2: novoUser->public_repos = atoi(p);
            break;
        }
        if (strlen(p) == 0) {
            free(novoUser);
            return NULL;
        }
            
    }

    return novoUser;
}

void corrigirFicheiro(FILE *file, FILE *newFile)
{

    char string[200000];

    while ((fgets(string, 200000, file))) {
        if (string[0] != '\0') {
            User *user = criarUser(string);  /* pass string, as pointer */
            if (user != NULL) {
                printUser(user, newFile);
            }
            free(user);
        }
    }
}

But note, without the benefit of array/pointer conversion when passing string as a parameter, you must use allocated storage. Up to you, but know the caveat.

于 2021-11-09T19:41:45.033 回答