0

我是一个新的 C 程序员,所以你必须原谅我缺乏知识。慢慢地,但我肯定在进步。我想知道为什么在将结构的成员发送到打算修改它们的函数后,我无法访问它们。此问题出现在下面显示的代码中。基本上,我的程序分析 XML 文档并寻找某些模式来提取文本。在我的主要功能中,我制作了 struct:artinfo artIn[128] = {};在我的头文件中定义为:

#ifndef FILE_TABLE_H
#define FILE_TABLE_H
#ifdef  __cplusplus
extern "C" {
#endif
    typedef struct article_info {
        char author1[512];
        char author2[512];
        char author3[512];
        char author4[512];
        char author5[512];
        char author6[512];
        char title[2048];
        char version[4];
        char year[4];
        char page[64];
        char abstract[4096];
        char notes[4096];
} artinfo;

#ifdef  __cplusplus
}
#endif
#endif  

在实例化它并使用 memset 清除它之后,我继续将它发送到另一个函数,该函数将提取文本并打算将其放回结构中。功能如下图。

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo *artIn[]){

int isNewline;             /* Boolean indicating we've read a CR or LF */
long lFileLen;             /* Length of file */
long lIndex;               /* Index into cThisLine array */
long lLineCount;           /* Current line number */
long lLineLen;             /* Current line length */
long lStartPos;            /* Offset of start of current line */
long lTotalChars;          /* Total characters read */
char cThisLine[BUFSIZE];     /* Contents of current line */
char *cFile;                  /* Dynamically allocated buffer (entire file) */
char *cThisPtr;               /* Pointer to current position in cFile */
char cNextLine[BUFSIZE];
char buffer[BUFSIZE];
char title[] = "<text top=\"245\"";
char ending[] = "$";
char author[] = "$</text>";
bool tfound, afound;
char *match, *cNextLinePtr;
long lNextLineCount;
int i, j;

//initialize some values
tfound = false;
afound = false;
fseek(input, 0L, SEEK_END);  /* Position to end of file */
lFileLen = ftell(input);     /* Get file length */
rewind(input);               /* Back to start of file */
memset(&cThisLine,0,sizeof(cThisLine));
memset(&cNextLine,0,sizeof(cNextLine));
memset(&buffer,0,sizeof(buffer));

printf("TEST: Entered read parser\n");
cFile = calloc(lFileLen + 1, sizeof(char));
printf("TEST:\n");
if(cFile == NULL )
{
    printf("\nInsufficient memory to read file.\n");
    return 0;
}

fread(cFile, lFileLen, 1, input); /* Read the entire file into cFile */
printf("TEST: read the file in\n");
lLineCount  = 0L;
lTotalChars = 0L;

cThisPtr    = cFile;              /* Point to beginning of array */
printf("TEST: Got to here.\n");

while (*cThisPtr)                 /* Read until reaching null char */
{
    //printf("TEST: Got to here.\n");
    lIndex    = 0L;                 /* Reset counters and flags */
    isNewline = 0;
    lStartPos = lTotalChars;

    while (*cThisPtr)               /* Read until reaching null char */
    {
        if (!isNewline)               /* Haven't read a CR or LF yet */
        {
            if (*cThisPtr == CR || *cThisPtr == LF) /* This char IS a CR or LF */
            isNewline = 1;                        /* Set flag */
            //printf("Flag was set");
            //exit(0);
        }

        else if (*cThisPtr != CR && *cThisPtr != LF) /* Already found CR or LF */
            break;                                     /* Done with line */

        cThisLine[lIndex++] = *cThisPtr++; /* Add char to output and increment */
        ++lTotalChars;

    } /* end while (*cThisPtr) */

    cThisLine[lIndex] = '\0';     /* Terminate the string */
    ++lLineCount;                 /* Increment the line counter */
    lLineLen = strlen(cThisLine); /* Get length of line */
    /* THIS is where I look for the matches to the patterns for my info. */
//    printf("TEST: Printing 1 line\n%s", cThisLine);
//    exit(0);

    if(strstr(cThisLine,title)!= NULL && tfound == false)
    {
        printf("TEST: Looking for title info.\n");
        match = strstr(cThisLine,">");
        //printf("TEST: match first points to %c\n", *match);
        //exit(0);
        j = 0;
        match++;
        //printf("TEST: match points to %c\n", *match);
        while(*match!='<')
        {
            //pridntf("TEST: match is %c\n", *match);
            //printf("TEST: %c", *match);
            buffer[j] = *match;
            //printf("TEST: %c", buffer);
            j++;
            match++;
        }
        lNextLineCount = lLineCount;
        do
        {
            lNextLineCount = lNextLineCount + 1;
            readaheadone(cFile, lNextLineCount, cNextLinePtr, lbuf);
            strcpy(cNextLine, lbuf->bline);
            cNextLinePtr = cNextLine;
            printf("TEST: the current line is - %s\nthe next line is %s\n",cThisLine,cNextLine);
            //printf("TEST: Before test exit");
            //exit(0);
            if(strstr(cNextLinePtr,ending)!=NULL)
            {
                printf("TEST: Author Info spans more than 1 line.\n");
                match = strstr(cThisLine,">");
                j++; //i DON'T RESET COUNTER SO IT JUST KEEPS FILLING THE BUFFER AFTER LEAVING A SPACE
                match++;
                //printf("TEST: match points to %c\n", *match);
                while(*match!='<')
                {
                    //pridntf("TEST: match is %c\n", *match);
                    //printf("TEST: %c", *match);
                    buffer[j] = *match;
                    //printf("TEST: %c", buffer);
                    j++;
                    match++;
                }
            }

        } while(strstr(cNextLinePtr,ending)!=NULL);

    strcpy((*artIn[0]).title, buffer);
    printf("The title is: %s\n", buffer);//artinfo[0].author);
    printf("The title is: %s\n", (*artIn[0]).title);
    tfound = true;
    }

    if(strstr(cThisLine,author)!= NULL && afound == false)
    {
        printf("TEST: Looking for author info.\n");
        match = strstr(cThisLine,">");

    }

一切似乎都很好,直到它到达:strcpy((*artIn[0]).title, buffer);printf("The title is: %s\n", ((*artIn[0]).title);语句。我总是收到错误和堆栈转储 - 即 1 [main]Parst_Text 7296 open_stackdumpfile: Dumping stack trace to Parse_Text.exe.stackdump 我不确定我做错了什么 - 我认为这是访问成员的调用struct 数组,好像我删除了这些语句,程序运行没有缺陷。我不确定我是否只是在堆栈上放了太多东西,或者如何使用堆栈转储来找出问题所在。感谢所有建议,但请保持建设性。谢谢。另外请让我知道我是否超载了这个问题,因为我不确定要提供多少细节。

编辑:根据请求,这里是调用阅读器解析器的函数

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>   //for close()
#include <ctype.h>    //for toupper()
#include <stdbool.h>  //for bool type
#include <errno.h>    //where the standard error is written for calls to OS
#include "file_table.h"
#define BUFSIZE 1024
#define CR 13            /* Decimal code of Carriage Return char */
#define LF 10            /* Decimal code of Line Feed char */
#define EOF_MARKER 26    /* Decimal code of DOS end-of-file marker */

void fillFileTable(char *filedir, ftable *table, bool populate, ftinfo *info);
bool readerParser(FILE *InputFile, FILE *OutputFile, textlbuf *lbuf, artinfo *artIn[]);
void readaheadone(char *cFile, long nextLineCount ,char *currentPtr, textlbuf *lbuf);

int main()
{
    /*Define variables & Instantiate Structs.*/
    ftable files[128];
    ftinfo info;
    artinfo artIn[128] = {};
    textlbuf store[1];
    char buffer[BUFSIZE];
    char dir[260] = "C:/DTEST/Parse_Text/91_1_text"; //would be better if user could enter this - fix for later
    ftinfo *fti = &info; 
    textlbuf *lbuf = &store;
    FILE *fp, *op; //fp is input file, op is output file
    int i, j, k;
    bool iReadReturn;

    /*Clear Out Memory Structures*/
    memset(&files,0,sizeof(files));
    memset(&info,0,sizeof(info));
    memset(&artIn,0,sizeof(artIn));
    memset(&buffer,0,sizeof(buffer));
    memset(&store,0,sizeof(store));

    /*Fill the File Table*/
    printf("TEST: Preparing to fill the file table...\n");
    fillFileTable(dir,files,true,fti);
    if(info.success == false)
    {
        printf("Something went wrong. Now exiting");
        exit(1);
    }
    printf("File table has been filled: %s\n",info.notes);
    printf("File table contains: %d\n", info.fileCount);
    printf("TEST: 'fillFileTable' is successful? --- %s\n", info.success?"true":"false");
    for(i=0;i<info.fileCount;i++)
    {
        if(files[i].type == 'd')
            printf("File Folder is: %s\t its size is %d\n",files[i].fileName,files[i].fileSize);
        else
            printf("File is: %s\t its size is %d\n",files[i].fileName,files[i].fileSize);
    }
    printf("\n\n");
   //printf("TESTd: Exiting after file table printout.\n\n"); exit(0);

   op=fopen("./test_Out.txt", "a");
   if (op == NULL )             /* Could not open file */
    {
        printf("Error opening output file: %s (%u)\n", strerror(errno), errno);
        return 1;
    }

    //for(i=0;i<info.fileCount;i++) //Figure out how to loop through all files - improvement for later
    fp=fopen("./91_1_text/test1.txt", "r");
    if (fp == NULL )             /* Could not open file */
    {
        printf("Error opening input file: %s (%u)\n", strerror(errno), errno);
        return 1;
    }

    iReadReturn = readerParser(fp, op, lbuf, artIn); /* Read the file and print output */
    if (iReadReturn == false)
    {
        printf("ERROR: The file did not read correctly.\n");
        exit(1);
    }
    k = fclose(fp);
    if (k != 0)
        exit(k);

    k = fclose(op);
    if (k != 0)
        exit(k);
    printf("Program Completed Successfuly.\n");
    return 0;

}

4

2 回答 2

2

使用此函数声明:

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo *artIn[])

第 4 个参数是指向 artinfo 结构的指针数组,而不是指向 artinfo 结构数组的指针。当它试图取消引用这些假定的指针时,它会失败。

相反,它应该是:

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo (*artIn)[])

此外,修复这些至少会使您的程序更加健壮。

  1. 似乎没有检查溢出buffer
  2. 您没有明确地将空字符放在buffer. 尽管您在进入循环之前对其进行了 memset,但如果它确实溢出,那么不仅它可能太长,而且它也可能不是以空值终止的。
  3. 您应该使用strncpy而不是 strcpy,否则您将面临溢出目标字符串中分配的空间的风险。同样正如 Matt 所指出的,您应该在调用 strncpy 之后手动将空字符添加到字符串的末尾。
于 2014-05-20T04:20:26.187 回答
0

已经有很多关于如何提高代码健壮性的建议。我将回答您在原始帖子中提到的主要问题。

你说,

一切似乎都很好,直到它到达:strcpy((*artIn[0]).title, buffer);printf("The title is: %s\n", ((*artIn[0]).title);语句。我总是收到错误和堆栈转储 - 即1 [main]Parst_Text 7296 open_stackdumpfile: Dumping stack trace to Parse_Text.exe.stackdump我不确定我做错了什么

这个问题的罪魁祸首是你有

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo *artIn[])

但是当您调用该函数时,您正在使用:

readerParser(fp, op, lbuf, artIn);

我很惊讶编译器没有标记警告或错误。artIn声明为:

artinfo artIn[128] = {};

当您artIn在函数调用中使用时,它会降级为 type 的指针artinfo*。当参数的参数类型是artinfo* artInorartinfo artIn[]但不是时会很好artinfo* artIn[]时会很好。

您应该在声明和定义中更改artInin的类型。readerParser

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo artIn[])

*artIn然后用 just 替换该函数中的所有用法artIn

希望这可以帮助。

于 2014-05-20T17:07:48.920 回答