javascript - 如何解析光标ANSI转义码？

Question

我正在编写用于处理 jQuery 终端光标的 ANSI 转义码的代码。但是有问题，不知道它应该如何工作，我得到了奇怪的结果。

我正在使用ervy library进行测试。

并使用此代码：

function scatter_plot() {
     const scatterData = [];

     for (let i = 1; i < 17; i++) {
         i < 6 ? scatterData.push({ key: 'A', value: [i, i], style: ervy.fg('red', '*') })
           : scatterData.push({ key: 'A', value: [i, 6], style: ervy.fg('red', '*') });
     }

     scatterData.push({ key: 'B', value: [2, 6], style: ervy.fg('blue', '# '), side: 2 });
     scatterData.push({ key: 'C', value: [0, 0], style: ervy.bg('cyan', 2) });

     var plot = ervy.scatter(scatterData, { legendGap: 18, width: 15 });
     // same as Linux XTERM where 0 code is interpreted as 1.
     var formatting = $.terminal.from_ansi(plot.replace(/\x1b\[0([A-D])/g, '\x1b[1$1'));
     return formatting;
}

$.terminal.defaults.formatters = [];
var term = $('body').terminal();
term.echo(scatter_plot());

它在 Linux Xterm 中应该是这样的：

但是看起来是这样的，看codepen demo

当我写这个问题时，在移动光标时改变了几个 +1 和 -1（请参阅处理代码中的 AF ANSI 转义）给出了这个结果（代码片段有最新的代码）。

第一行被空格覆盖，整个情节从上到下，从右到右（除了应该在“|”下方的 0,0 青色点和 2 个字符宽，所以你应该看到它的右半部分，这个是正确的，但是其余的不是）

这是我处理光标的新代码，我在处理颜色之前这样做，所以代码并不复杂。

// -------------------------------------------------------------------------------
var ansi_re = /(\x1B\[[0-9;]*[A-Za-z])/g;
var cursor_re = /(.*)\r?\n\x1b\[1A\x1b\[([0-9]+)C/;
var move_cursor_split = /(\x1b\[[0-9]+[A-G])/g;
var move_cursor_match = /^\x1b\[([0-9]+)([A-G])/;
// -------------------------------------------------------------------------------
function parse_ansi_cursor(input) {
    /*
        (function(log) {
            console.log = function(...args) {
                if (true || cursor.y === 11) {
                    return log.apply(console, args);
                }
            };
        })(console.log);
        */
    function length(text) {
        return text.replace(ansi_re, '').length;
    }
    function get_index(text, x) {
        var splitted = text.split(ansi_re);
        var format = 0;
        var count = 0;
        var prev_count = 0;
        for (var i = 0; i < splitted.length; i++) {
            var string = splitted[i];
            if (string) {
                if (string.match(ansi_re)) {
                    format += string.length;
                } else {
                    count += string.length;
                    if (count >= x) {
                        var rest = x - prev_count;
                        return format + rest;
                    }
                    prev_count = count;
                }
            }
        }
        return i;
    }
    // ansi aware substring, it just and add removed ansi escapes
    // at the beginning we don't care if the were disabled with 0m
    function substring(text, start, end) {
        var result = text.substring(start, end);
        if (start === 0 || !text.match(ansi_re)) {
            return result;
        }
        var before = text.substring(0, start);
        var match = before.match(ansi_re);
        if (match) {
            return before.match(ansi_re).join('') + result;
        }
        return result;
    }
    // insert text at cursor position
    // result is array of splitted arrays that form single line
    function insert(text) {
        if (!text) {
            return;
        }
        if (!result[cursor.y]) {
            result[cursor.y] = [];
        }
        var index = 0;
        var sum = 0;
        var len, after;
        function inject() {
            index++;
            if (result[cursor.y][index]) {
                result[cursor.y].splice(index, 0, null);
            }
        }
        if (cursor.y === 11) {
            //debugger;
        }
        if (text == "[46m  [0m") {
            //debugger;
        }
        console.log({...cursor, text});
        if (cursor.x === 0 && result[cursor.y][index]) {
            source = result[cursor.y][0];
            len = length(text);
            var i = get_index(source, len);
            if (length(source) < len) {
                after = result[cursor.y][index + 1];
                if (after) {
                    i = get_index(after, len - length(source));
                    after = substring(after, i);
                    result[cursor.y].splice(index, 2, null, after);
                } else {
                    result[cursor.y].splice(index, 1, null);
                }
            } else {
                after = substring(source, i);
                result[cursor.y].splice(index, 1, null, after);
            }
        } else {
            var limit = 100000; // infite loop guard
            var prev_sum = 0;
            // find in which substring to insert the text
            while (index < cursor.x) {
                if (!limit--) {
                    warn('[WARN] To many loops');
                    break;
                }
                var source = result[cursor.y][index];
                if (!source) {
                    result[cursor.y].push(new Array(cursor.x - prev_sum).join(' '));
                    index++;
                    break;
                }
                if (sum === cursor.x) {
                    inject();
                    break;
                }
                len = length(source);
                prev_sum = sum;
                sum += len;
                if (sum === cursor.x) {
                    inject();
                    break;
                }
                if (sum > cursor.x) {
                    var pivot = get_index(source, cursor.x - prev_sum);
                    var before = substring(source, 0, pivot);
                    var end = get_index(source, length(text));
                    after = substring(source, pivot + end);
                    if (!after.length) {
                        result[cursor.y].splice(index, 1, before);
                    } else {
                        result[cursor.y].splice(index, 1, before, null, after);
                    }
                    index++;
                    break;
                } else {
                    index++;
                }
            }
        }
        cursor.x += length(text);
        result[cursor.y][index] = text;
    }
    if (input.match(move_cursor_split)) {
        var lines = input.split('\n').filter(Boolean);
        var cursor = {x: 0, y: -1};
        var result = [];
        for (var i = 0; i < lines.length; ++i) {
            console.log('-------------------------------------------------');
            var string = lines[i];
            cursor.x = 0;
            cursor.y++;
            var splitted = string.split(move_cursor_split).filter(Boolean);
            for (var j = 0; j < splitted.length; ++j) {
                var part = splitted[j];
                console.log(part);
                var match = part.match(move_cursor_match);
                if (match) {
                    var ansi_code = match[2];
                    var value = +match[1];
                    console.log({code: ansi_code, value, ...cursor});
                    if (value === 0) {
                        continue;
                    }
                    switch (ansi_code) {
                        case 'A': // UP
                            cursor.y -= value;
                            break;
                        case 'B': // Down
                            cursor.y += value - 1;
                            break;
                        case 'C': // forward
                            cursor.x += value + 1;
                            break;
                        case 'D': // Back
                            cursor.x -= value + 1;
                            break;
                        case 'E': // Cursor Next Line
                            cursor.x = 0;
                            cursor.y += value - 1;
                            break;
                        case 'F': // Cursor Previous Line
                            cursor.x = 0;
                            cursor.y -= value + 1;
                            break;
                    }
                    if (cursor.x < 0) {
                        cursor.x = 0;
                    }
                    if (cursor.y < 0) {
                        cursor.y = 0;
                    }
                } else {
                    insert(part);
                }
            }
        }
        return result.map(function(line) {
            return line.join('');
        }).join('\n');
    }
    return input;
}

in 代码是行数组，result = [];当在光标处插入文本时，单行可能会被拆分为多个子字符串，如果它们是字符串数组，代码可能会更简单。现在我只想修复光标位置。

这是嵌入了 from_ansi 函数的codepen 演示（里面有 parse_ansi_cursor 是有问题的）。抱歉，代码很多，但解析 ANSI 转义码并不简单。

我不确定应该如何移动光标（现在它有 + 1 或 - 1，我不确定）我也不确定是否应该在每行之前增加 cursor.y。我不是 100% 确定这应该如何工作。我查看了 Linux Xterm 代码，但没有找到任何线索。查看了 Xterm.js，但那些散点图的 ervy 图完全被破坏了。

我的 from_ansi 函数的原始代码正在处理一些像这样的 ANSI 光标代码：

        input = input.replace(/\x1b\[([0-9]+)C/g, function(_, num) {
            return new Array(+num + 1).join(' ');
        });

只有 C，向前只是添加空白，它适用于 ANSI 艺术，但不适用于 ervy 散点图。

我认为它并不太宽泛，只是关于使用 ANSI 转义码移动光标和处理换行符的问题。也假设是简单的情况，光标应该只在单个字符串内移动，而不是像在真实终端中那样移动（ervy plot 输出 ANSI 转义码）。

我对解释如何处理字符串以及如何移动有效的光标的答案很好，但如果你能提供对代码的修复，我会很棒。我更喜欢修复我的代码现在全新的实现，除非它更简单并且它是一个函数parse_ansi_cursor(input)并且与其余代码相同但具有固定的光标移动。

编辑： 我发现我input.split('\n').filter(Boolean)错了，应该是：

            var lines = input.split('\n');
            if (input.match(/^\n/)) {
                lines.shift();
            }
            if (input.match(/\n$/)) {
                lines.pop();
            }

似乎一些旧的 ANSI 转义规范说 0 不是零，而是默认占位符 1。这已从规范中删除，但 Xterm 仍在使用它。所以我添加了这一行来解析代码，如果有 0A 或 A 得到值 1。

var value = match[1].match(/^0?$/) ? 1 : +match[1];

情节看起来更好，但光标仍然存在问题。（我认为它是光标 - 我不是 100% 确定）。

我再次更改了 +1/-1，现在它更接近了（几乎与 XTerm 中的相同）。巴斯仍然需要在我的代码中存在错误。

编辑：

@jerch 的回答我尝试使用node ansi parser，有同样的问题不知道如何处理光标：

var cursor = {x:0,y:0};
result = [];
var terminal = {
    inst_p: function(s) {
        var line = result[cursor.y];
        if (!line) {
            result[cursor.y] = s;
        } else if (cursor.x === 0) {
            result[cursor.y] = s + line.substring(s.length);
        } else if (line.length < cursor.x) {
            var len = cursor.x - (line.length - 1);
            result[cursor.y] += new Array(len).join(' ') + s;
        } else if (line.length === cursor.x) {
            result[cursor.y] += s;
        } else {
            var before = line.substring(0, cursor.x);
            var after = line.substring(cursor.x + s.length);
            result[cursor.y] = before + s + after;
        }
        cursor.x += s.length;
        console.log({s, ...cursor, line: result[cursor.y]});
    },
    inst_o: function(s) {console.log('osc', s);},
    inst_x: function(flag) {
        var code = flag.charCodeAt(0);
        if (code === 10) {
            cursor.y++;
            cursor.x = 0;
        }
    },
    inst_c: function(collected, params, flag) {
        console.log({collected, params, flag});
        var value = params[0] === 0 ? 1 : params[0];
        switch(flag) {
            case 'A': // UP
                cursor.y -= value;
                break;
            case 'B': // Down
                cursor.y += value - 1;
                break;
            case 'C': // forward
                cursor.x += value;
                break;
            case 'D': // Back
                cursor.x -= value;
                break;
            case 'E': // Cursor Next Line
                cursor.x = 0;
                cursor.y += value;
                break;
            case 'F': // Cursor Previous Line
                cursor.x = 0;
                cursor.y -= value;
                break;
        }
    },
    inst_e: function(collected, flag) {console.log('esc', collected, flag);},
    inst_H: function(collected, params, flag) {console.log('dcs-Hook', collected, params, flag);},
    inst_P: function(dcs) {console.log('dcs-Put', dcs);},
    inst_U: function() {console.log('dcs-Unhook');}
};
var parser = new AnsiParser(terminal);
parser.parse(input);
return result.join('\n');

这只是一个简单的例子，它忽略了除换行符和光标移动之外的所有内容。

这是输出：

更新：

It seems that every cursor movement should be just += value or -= value and my value - 1; was just correcting to bug in ervy library that was not working on clear terminal.

score 2 · Accepted Answer

To begin with - a Regexp based approach is not ideal to handle escape sequences. The reason for this are complicated interactions between various terminal sequences, as some break a former not yet closed one while others keep working in the middle of another (like some control codes) and the "outer" sequence would still finish correctly. You would have to pull in all these edge cases into every single regexp (see https://github.com/xtermjs/xterm.js/issues/2607#issuecomment-562648768 for an illustration).

In general parsing escape sequences is quite tricky, we even have an issue regarding that in terminal-wg. Hopefully we manage to get some minimal parsing requirements from this in the future. Most certainly it will not be regexp-based ;)

All that said, its much easier to go with a real parser, that deals with all the edge cases. A good starting point for a DEC compatible parser is https://vt100.net/emu/dec_ansi_parser. For cursor handling you have to handle at least these states with all actions:

ground
escape
csi_entry
csi_ignore
csi_param
csi_intermediate

plus all other states as dummy entries. Also control codes need special care (action execute), as they might interfer anytime with any other sequence with different results.

更糟糕的是，官方的 ECMA-48 规范在某些方面与 DEC 解析器略有不同。目前使用的大多数模拟器仍然试图以兼容 DEC VT100+ 为目标。

如果您不想自己编写解析器，您可以使用/修改我的旧解析器或我们在xterm.js中的解析器（后者可能更难集成，因为它在 UTF32 代码点上运行）。

javascript - 如何解析光标ANSI转义码？

1 回答 1

Related

Reference