我想使用以下代码读取 C (Cygwin/GCC) 中的 unicode 文件:
#include <stdio.h>
#include <stdlib.h>
#include <glib.h>
void split_parse(char* text){
char** res = g_strsplit(text, "=", 2);
printf("Key = %s : ", res[0]);
printf("Value = %s", res[1]);
printf("\n");
}
int main(int argc, char **argv)
{
setenv ("CYGWIN", "nodosfilewarning", 1);
GIOChannel *channel;
GError *err = NULL;
int reading = 0;
const gchar* enc;
guchar magic[2] = { 0 };
gsize bytes_read = 0;
const char* filename = "C:\\CONFIG";
channel = g_io_channel_new_file (filename, "r", &err);
if (!channel) {
g_print("%s", err->message);
return 1;
}
if (g_io_channel_set_encoding(channel, NULL, &err) != G_IO_STATUS_NORMAL) {
g_print("g_io_channel_set_encoding: %s\n", err->message);
return 1;
}
if (g_io_channel_read_chars(channel, (gchar*) magic, 2, &bytes_read, &err) != G_IO_STATUS_NORMAL) {
g_print("g_io_channel_read_chars: %s\n", err->message);
return 1;
}
if (magic[0] == 0xFF && magic[1] == 0xFE)
{
enc = "UTF-16LE";
}
else if (magic[0] == 0xFE && magic[1] == 0xFF)
{
enc = "UTF-16BE";
}
else
{
enc = "UTF-8";
if (g_io_channel_seek_position(channel, 0, G_SEEK_CUR, &err) == G_IO_STATUS_ERROR)
{
g_print("g_io_channel_seek: failed\n");
return 1;
}
}
if (g_io_channel_set_encoding (channel, enc, &err) != G_IO_STATUS_NORMAL) {
g_print("%s", err->message);
return 1;
}
reading = 1;
GIOStatus status;
char* str = NULL;
size_t len;
while(reading){
status = g_io_channel_read_line(channel, &str, &len, NULL, &err);
switch(status){
case G_IO_STATUS_EOF:
reading = 0;
break;
case G_IO_STATUS_NORMAL:
if(len == 0) continue;
split_parse(str);
break;
case G_IO_STATUS_AGAIN: continue;
case G_IO_STATUS_ERROR:
default:
//throw error;
reading = 0;
break;
}
}
g_free(str);
g_io_channel_unref(channel);
return(EXIT_SUCCESS);
}
文件(C:\CONFIG)内容如下:
h-debug="1"
name=ME
ÃÆÿЮ©=2¾1¼
在阅读它时,我总是在 while 循环内的“g_io_channel_read_line”处收到以下错误消息:
0x800474f8 “转换输入中的字节序列无效”
我究竟做错了什么?如何使用 glib 在 C 中读取这样的文件?
编辑:文件的十六进制转储