1

我有一个文件“data.txt”,其中包含以下内容:http: //pastebin.com/FY9ZTQX6

我试图在“<”之前和之后得到这个词。左边是旧词,右边是新词。这是我到目前为止所拥有的:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/*
Name: Marcus Lorenzana
Assignment: Final
*/

//binary tree struct to hold left and right node
//as well as the word and number of occurrences
typedef struct node
{
    char *word;
    int count;
    struct node *left;
    struct node *right;
}
node;

//,.?!:;-
int punctuation[7];


void insert(node ** dictionary, node * entry);
char* readFile(char* filename);
void printDictionary(node * tree);
void toLower(char** word);
void getReplacementWords(char *filecontents, char **newWord, char **oldWord)  ;


int main()
{

    char *word;
    char* filecontents = readFile("data.txt");
    char* oldWord;
    char* newWord;

    //create dictionary node
    node *dictionary;
    node *entry;

    //read words and punctuation in from the text file
    word = strtok (filecontents, " \n");

    dictionary = NULL;

    while (word != NULL)
    {
        //word = strlwr(word); 
        entry = (node *) malloc(sizeof(node));
        entry->left = entry->right = NULL;
        entry->word = malloc(sizeof(char)*(strlen(word)+1));
        entry->word = word;
        insert(&dictionary,entry);
        word = strtok (NULL, " \n");
    }

    //printDictionary(dictionary);

    filecontents = readFile("data.txt"); 
    getReplacementWords(filecontents,&newWord,&oldWord);




    return 0;
}

void insert(node ** dictionary, node * entry)
{
    if(!(*dictionary))
    {
        *dictionary = entry;
        entry->count=1;
        return;
    }


    int result = strcmp(entry->word,(*dictionary)->word);

    if(result<0){
        insert(&(*dictionary)->left, entry);
        entry->count++; 
    }
    else if(result>0){
        insert(&(*dictionary)->right, entry);
        entry->count++; 
    } else {
        entry->count++; 
    }

}

//put file contents in string for strtok
char* readFile(char* filename)
{
    FILE* file = fopen(filename,"r");
    if(file == NULL)
    {
        return NULL;
    }

    fseek(file, 0, SEEK_END);
    long int size = ftell(file);
    rewind(file);

    char* content = calloc(size + 1, 1);

    fread(content,1,size,file);

    return content;
}

void printDictionary(node * dictionary)
{
    if(dictionary->left) {
        printDictionary(dictionary->left);
    }

    printf("%s\n",dictionary->word);

    if(dictionary->right) {
        printDictionary(dictionary->right);
    }
}


void getReplacementWords(char *filecontents, char **newWord, char **oldWord) {
    char *word;
    word = strtok (filecontents, " \n");

    while (word != NULL)
    {
        printf("\n%s",word); 
        int result = strcmp(word,"<"); 
        if (result == 0) {
            printf("\nFound replacement identifier"); 
        }
        word = strtok (NULL, " \n");
    }
}
4

4 回答 4

2

您可以使用 fscanf(filename , "%s < %s" , firstStringContainer , secondStringContainer)

在使用fseek到达包含<字符的行之后,这将获得 < 之前的字符串以存储在firstStringContainer 中,之后的字符串secondStringContainer 是一个代码,建议您使用:

int found = 0;
char buffer[chooseYourSize];
char firstStringContainer[chooseYourSize] , secondStringContainer[chooseYourSize];
while(fgets(buffer , sizeof(buffer) , filename) != NULL)
{
    if(strchr(buffer , '<'))
    {
         found++;
         break;
    }
}
if(found)
{
     fscanf(file , "%s < %s" , firstStringContainer , secondStringContainer);
}

当然,这仅在目标行仅包含三个元素 string < string 时才有效,这里就是这种情况

于 2013-08-15T20:13:21.533 回答
1

如果您的数据格式为 STRING1 < STRING2,您可以执行以下操作:

fscanf(file,"%s < %s", string1, string2);

如果它在一条线上的某个地方,那将变得更加困难。您可以做的是从文件中抓取行并将它们放入缓冲区中,然后找到>,回到第一个字符串的开头,然后读取您想要的内容。

while(fgets(buff,sizeof(buff),file) != NULL 
{
    if( (pointer = strstr(buff," > ")) != NULL) 
     {
        //now you have located the > just go back 
        //in the buff till you reach the start of 
        //string1 and then use
        sscanf(buff+(pointer * sizeof(char)),"%s > %s",string1, string2)
     }
}

自从我这样做以来已经有一段时间了,所以可能存在语法错误

于 2013-08-15T20:33:16.163 回答
0

使用fgets()strchr()<.

while (strchr (fgets (buffer, sizeof (buffer), file), '<') == NULL)
     ;       // do nothing

然后用于strtok()解析缓冲区中的当前行

strcpy (oldword, strtok (buffer, "<"));
strcpy (newword, strtok (NULL, "\n"));
于 2013-08-15T20:43:36.370 回答
0

您可以fseek()在循环中使用来向前/向后跳过 1 个元素并验证它是否是space>(来自string.hother needed character的另一个函数)。

当你找到这个符号时,你可以将指针向前/向后移动到另一个spaceother needed character,记住跳过的字符数N,然后将N个符号复制到字符串变量中。

substitute < replacement
           ^ find this symbol

substitute < replacement
          ^ make a loop that  makes `counter++` when it finds `space`
            (int counter  = 0;)

 substitute < replacement
^ the loop will continue and will find the 2nd `space`, and make `counter++`
  when `counter == 2` (1 space after and 1 before the word) the loop stops. 
  Now `file` pointer points to the `space` symbol before the 1st word.
  Then skip 1 element forward (using `fseek()`) and now you have
  `file` pointer that points to the 1st word.
  And now you can do whatever you want!

执行相同的操作来查找第二个单词(file指针将指向第二个单词,因此您将能够再次调用此函数:它将>在您的文本中查找 thw 2nd)并创建一个函数findWordsNearArrow()或类似的东西。

您可以在循环中调用此函数,以便在找到EOF时返回特定值,您可以使用该值退出循环。

再想想。(C)

于 2013-08-15T20:14:31.883 回答