0

我必须比较每个文件中有 50K 记录的两个文件。记录在文本文件中,但格式如下:

文件格式

每行都有记录。

  1. 如果两个文件中的记录相同(逐行比较),那么我们必须找到相应记录的差异。
  2. 如果记录不同(在第 4 行检查),则将文本 file2 中的行增加一行并将 file2 的此记录打印为 file1 中未找到的新记录并递增直到找到 file1 中的记录,然后比较两者中的记录线。

我们是否可以比较这种格式的两个文件。

编辑

    private void compareFiles(File sourceFile, File targetFile, XlxsDataUtility resultFile)
        throws IOException {

    FixedFormatManager manager = new FixedFormatManagerImpl();

    FileInputStream fis = new FileInputStream(sourceFile);
    DataInputStream dis = new DataInputStream(fis);

    BufferedReader sourceReader = new BufferedReader(new InputStreamReader(dis));
    String sourceLine;

    FileInputStream fis2 = new FileInputStream(targetFile);
    DataInputStream dis2 = new DataInputStream(fis2);

    BufferedReader targetReader = new BufferedReader(new InputStreamReader(dis2));
    String targetLine;

    sourceReader.readLine();
    targetReader.readLine();

    StringBuilder stringBuilder = new StringBuilder();
    StringBuilder differetLines = new StringBuilder();
    int line = 1;
    while ((sourceLine = sourceReader.readLine()) != null && (targetLine = targetReader.readLine()) != null) {
        line++;

        // here i have used fixedformatManger ancientprogramming api to parse the text.
        Record1 record1 = manager.load(Record1.class, sourceLine);
        Record2 record2 = manager.load(Record2.class, targetLine);

        if (record1.getBlock().trim().equals(record2.getBlock().trim())
                && record1.getId().trim().equals(record2.getId().trim())) {

            int minimum = Math.min(sourceLine.length(), targetLine.length());
            int maximum = Math.max(sourceLine.length(), targetLine.length());

            int index = 0;

            String fromIndex = null;
            String toIndex = null;

            while (index < minimum) {

                char sourceChar = sourceLine.charAt(index);
                char targetChar = targetLine.charAt(index);

                if (sourceChar != targetChar) {
                    stringBuilder.append(stringBuilder.length() > 0 ? ", " : "").append(index + 1).append(" - ");
                    while ((index < minimum) && (sourceChar != targetChar))
                        index++;

                    if (index == minimum) {
                        stringBuilder.append(maximum);
                        index = maximum;
                    } else {
                        stringBuilder.append(index);
                    }
                    index++;
                    resultFile.addRowData(record2.getId().trim(), String.valueOf(sourceChar),
                            String.valueOf(targetChar), stringBuilder.toString(), record1.getBlock(),
                            String.valueOf(line));
                }
                index++;
                // resultFile.addRowData(stringBuilder.toString());
                stringBuilder.delete(0, stringBuilder.length());
            }

            if (minimum != maximum && index < maximum) {
                stringBuilder.append(stringBuilder.length() > 0 ? ", " : "").append(minimum + 1).append(" - ")
                        .append(maximum);
                resultFile.addRowData(record1.getId().trim(), record2.getId().trim(), stringBuilder.toString(),
                        record1.getBlock(), String.valueOf(line));
                stringBuilder.delete(0, stringBuilder.length());
            }

            // System.out.println(stringBuilder.toString());
        } else {
            // records in both lines are different
            targetReader.readLine(); // I am not sure it works here or not
            differetLines.append(record1.getBlock() + record1.getId().trim() + " is not found in "
                    + record2.getBlock() + record2.getId().trim() + " at line Number :: " + line + "\n");
        }
    }

    sourceReader.close();
    targetReader.close();

    writeDifferenceTofile(differetLines.toString(),"Flat_File New_Records");

}

}

4

0 回答 0