0

我编写了这个 Java hadoop 程序,它将执行文件的并行索引。该文件是在 eclipse 中创建的

package org.myorg;

import java.io.*;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

public class ParallelIndexation {


public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { 
     private final static IntWritable zero = new IntWritable(0); 
     private Text word = new Text();
     public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { 
        String line = value.toString();
        int CountComputers;
        //DataInputStream ConfigFile = new DataInputStream( new FileInputStream("countcomputers.txt"));
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // путь к файлу
        DataInputStream in = new DataInputStream(fstream);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String result = br.readLine(); // читаем как строку
        CountComputers = Integer.parseInt(result); // переводим строку в число
        //CountComputers=ConfigFile.readInt();
        in.close();
        fstream.close();
        ArrayList<String> paths = new ArrayList<String>();
        StringTokenizer tokenizer = new StringTokenizer(line, "\n");
        while (tokenizer.hasMoreTokens()) 
        {
          paths.add(tokenizer.nextToken());
        }
        String[] ConcatPaths= new String[CountComputers];
        int NumberOfElementConcatPaths=0;
        if (paths.size()%CountComputers==0)
        {
            for (int i=0; i<CountComputers; i++)
            {
                ConcatPaths[i]=paths.get(NumberOfElementConcatPaths);
                NumberOfElementConcatPaths+=paths.size()/CountComputers;
                for (int j=1; j<paths.size()/CountComputers; j++)
                {
                    ConcatPaths[i]+="\n"+paths.get(i*paths.size()/CountComputers+j);
                }
            }
        }
        else
        {
            NumberOfElementConcatPaths=0;
            for (int i=0; i<paths.size()%CountComputers; i++)
            {
                ConcatPaths[i]=paths.get(NumberOfElementConcatPaths);
                NumberOfElementConcatPaths+=paths.size()/CountComputers+1;              
                for (int j=1; j<paths.size()/CountComputers+1; j++)
                {
                    ConcatPaths[i]+="\n"+paths.get(i*(paths.size()/CountComputers+1)+j);
                }           
            }
            for (int k=paths.size()%CountComputers; k<CountComputers; k++)
            {
                ConcatPaths[k]=paths.get(NumberOfElementConcatPaths);
                NumberOfElementConcatPaths+=paths.size()/CountComputers;                
                for (int j=1; j<paths.size()/CountComputers; j++)
                {
                    ConcatPaths[k]+="\n"+paths.get((k-paths.size()%CountComputers)*paths.size()/CountComputers+paths.size()%CountComputers*(paths.size()/CountComputers+1)+j);
                }                   
            }
        }
        //CountComputers=ConfigFile.readInt();
        for (int i=0; i<ConcatPaths.length; i++)
        {
            word.set(ConcatPaths[i]);
            output.collect(word, zero);
        }
     }
}



public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { 
    public native long Traveser(String Path);
    public native void Configure(String Path);
    public void reduce(Text key, IntWritable value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { 
        long count;
        String line = key.toString();
        ArrayList<String> ProcessedPaths = new ArrayList<String>();
        StringTokenizer tokenizer = new StringTokenizer(line, "\n");
        while (tokenizer.hasMoreTokens()) 
        {
          ProcessedPaths.add(tokenizer.nextToken());
        }       
        Configure("/etc/nsindexer.conf");
        for (int i=0; i<ProcessedPaths.size(); i++)
        {
            count=Traveser(ProcessedPaths.get(i));
        }
        output.collect(key, new LongWritable(count));
      }
    static
    {
        System.loadLibrary("nativelib");
    } 
}

public static void main(String[] args) throws Exception { 
      JobConf conf = new JobConf(ParallelIndexation.class); 
      conf.setJobName("parallelindexation");
      conf.setOutputKeyClass(Text.class);
      conf.setOutputValueClass(LongWritable.class);
      conf.setMapperClass(Map.class);
      conf.setCombinerClass(Reduce.class);
      conf.setReducerClass(Reduce.class);
      conf.setInputFormat(TextInputFormat.class);
      conf.setOutputFormat(TextOutputFormat.class);
      FileInputFormat.setInputPaths(conf, new Path(args[0]));
      FileOutputFormat.setOutputPath(conf, new Path(args[1]));
      JobClient.runJob(conf);
  } 
}

通过团队在 Nexenta Illumos 操作系统 (solaris) 中编译的结果

javac -classpath hadoop-examples-1.0.1.jar -d folder/classes folder/src/ParallelIndexation.java,

收到以下错误

folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                     ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                      ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                       ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                        ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                          ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                            ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                             ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                              ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                               ^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
        FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
                                                                                                ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                          ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                           ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                            ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                             ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                              ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                               ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                 ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                  ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                   ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                     ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                      ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                       ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                        ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                         ^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
        String result = br.readLine(); // ������ ��� ������
                                                          ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                      ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                       ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                        ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                         ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                          ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                           ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                            ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                             ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                              ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                 ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                  ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                   ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                    ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                     ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                       ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                         ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                          ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                           ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                            ^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
        CountComputers = Integer.parseInt(result); // ��������� ������ � �����
                                                                             ^
46 errors

如何在eclipse中更改UTF8的编码?

4

2 回答 2

1

在 Eclipse 中,您可以在三个地方设置文本文件编码:

  • 在工作区级别:首选项/常规/工作区/文本文件编码
  • 在项目级别:属性(在项目上)/资源/文本文件编码
  • 在文件级别:属性(文件上)/资源/文本文件编码

我建议将其设置在工作区和项目级别,并且仅在必要时在文件级别进行设置。

设置文件编码并不能真正转换文件本身。您可能需要为此使用单独的工具(例如iconv)。

于 2012-10-23T07:01:39.703 回答
0

Java 6 对不可映射的字符给出了“警告”。

Java 7 为不可映射的字符提供了一个“错误”,并且失败了。

检查文件编码

file -bi [filename]
于 2014-10-18T00:57:33.167 回答