11

我正在开发一个为自定义语言提供一些功能的 Visual Studio 扩展。我已经完成了简单的语法高亮,我希望继续进行语法错误高亮、大括号匹配、大纲等方面的工作。我现在看到的主要问题是这些都需要不同的标签类型,这(据我所知)需要不同的标签器。但是,我看不到任何在标记器之间共享信息的直观方式,因为所有这三件事都可以在一次内容解析中完成。我的意思是,我可以解析它三遍,但这听起来不是一个好的解决方案。

如何从一个标记器返回多个标记类型(也许使用 ITag?)或在多个标记器之间共享信息?

我目前的结构是这样的:

    internal class HighlightWordTagger : ITagger<ClassificationTag>
    {
        ITextBuffer TextBuffer;
        IClassificationType Keyword;
        IClassificationType Comment;
        IClassificationType Literal;

        // Probably a giant memory leak
        Dictionary<ITextSnapshot, List<TagSpan<ClassificationTag>>> SnapshotResults = new Dictionary<ITextSnapshot, List<TagSpan<ClassificationTag>>>();

        public HighlightWordTagger(ITextBuffer sourceBuffer, IClassificationTypeRegistryService typeService)
        {
            TextBuffer = sourceBuffer;

            TextBuffer.Changed += (sender, args) =>
            {
                LexSnapshot(args.After);

                TagsChanged(this, new SnapshotSpanEventArgs(new SnapshotSpan(args.After, new Span(0, args.After.Length))));
            };
            Keyword = typeService.GetClassificationType("WideKeyword");
            Comment = typeService.GetClassificationType("WideComment");
            Literal = typeService.GetClassificationType("WideLiteral");
        }

        public IEnumerable<ITagSpan<ClassificationTag>> GetTags(NormalizedSnapshotSpanCollection spans)
        {
            LexSnapshot(spans[0].Snapshot);
            foreach (var snapshotspan in SnapshotResults[spans[0].Snapshot])
            {
                foreach (var span in spans)
                {
                    if (snapshotspan.Span.IntersectsWith(span))
                    {
                        yield return snapshotspan;
                    }
                }
            }
        }

        Span SpanFromLexer(Lexer.Range range)
        {
            return new Span((int)range.begin.offset, (int)(range.end.offset - range.begin.offset));
        }

        void LexSnapshot(ITextSnapshot shot)
        {
            if (SnapshotResults.ContainsKey(shot))
                return;

            var lexer = new Lexer();
            var list = new List<TagSpan<ClassificationTag>>();
            SnapshotResults[shot] = list;
            lexer.Read(
                shot.GetText(),
                (where, what) =>
                {
                    if (what == Lexer.Failure.UnlexableCharacter)
                        return false;
                    var loc = new Span(
                        (int)where.offset,
                        (int)shot.Length - (int)where.offset
                    );
                    if (what == Lexer.Failure.UnterminatedComment)
                        list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, loc), new ClassificationTag(Comment)));
                    if (what == Lexer.Failure.UnterminatedStringLiteral)
                        list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, loc), new ClassificationTag(Literal)));
                    return false;
                }, 
                where =>
                {
                    // Clamp this so it doesn't go over the end when we add \n in the lexer.
                    where.end.offset = where.end.offset > shot.Length ? (uint)(shot.Length) : where.end.offset;
                    var loc = SpanFromLexer(where);
                    list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, loc), new ClassificationTag(Comment)));
                },
                token => {
                    var location = SpanFromLexer(token.location);
                    if (token.type == Lexer.TokenType.String || token.type == Lexer.TokenType.Integer)
                    {
                        list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, location), new ClassificationTag(Literal)));
                    }
                    if (lexer.IsKeyword(token.type))
                    {
                        list.Add(new TagSpan<ClassificationTag>(new SnapshotSpan(shot, location), new ClassificationTag(Keyword)));
                    }
                    return false;
                }
            );
        }

        public event EventHandler<SnapshotSpanEventArgs> TagsChanged = delegate { };
    }

我可能会做得更好,不要过多地重新进行词法分析,但这是另一个问题。

4

2 回答 2

5

我最终不得不将这些担忧分开。您可以使用ITextBuffer.Properties.GetOrCreateSingletonProperty将您选择的任意对象与文本缓冲区相关联。我最终创建了一个单独的词法分析器类,将它与文本缓冲区相关联,然后简单地执行几乎所有逻辑,除了那里的标记。然后在每个标记器的实现中,我只是轮询词法分析器以获取结果,然后标记它们。这允许多个标注器依赖于同一个词法分析器实例。

考虑到大多数词法分析器和解析器会产生不止一种标签,我很惊讶 VS 让你如此糟糕地破解以产生这种结果。

于 2013-06-26T22:10:44.653 回答
3

正如puppy 所说,您可以使用多个单独的标记器,它们可以通过 相互访问GetOrCreateSingletonProperty,但我认为不能保证标记器的创建顺序,因此初始化过程可能不方便。

同时,我已经成功地将多个标注器组合在一个类中,我什至制作了一个完整的“示例语言”来演示这种技术。这里是:

/// <summary>Boilerplate factory class that associates <see cref="SampleLanguageForVS"/>,
/// and file extension .samplelang, with content type "Sample Language".</summary>
[Export(typeof(IClassifierProvider))]
[Export(typeof(ITaggerProvider))]
[TagType(typeof(ClassificationTag))]
[TagType(typeof(ErrorTag))]
[ContentType("Sample Language")]
internal class SampleLanguageForVSProvider : IClassifierProvider, ITaggerProvider
{
    [Export]
    [Name("Sample Language")] // Must match the [ContentType] attributes
    [BaseDefinition("code")]
    internal static ContentTypeDefinition _ = null;
    [Export]
    [FileExtension(".samplelang")]
    [ContentType("Sample Language")]
    internal static FileExtensionToContentTypeDefinition _1 = null;

    [Import] IClassificationTypeRegistryService _registry = null; // Set via MEF

    public static SampleLanguageForVS Get(IClassificationTypeRegistryService registry, ITextBuffer buffer)
    {
        return buffer.Properties.GetOrCreateSingletonProperty<SampleLanguageForVS>(
            delegate { return new SampleLanguageForVS(registry, buffer); });
    }
    public IClassifier GetClassifier(ITextBuffer buffer)
    {
        return Get(_registry, buffer);
    }
    public ITagger<T> CreateTagger<T>(ITextBuffer buffer) where T : ITag
    {
        return Get(_registry, buffer) as ITagger<T>;
    }
}

internal class SampleLanguageForVS : IClassifier,
    ITagger<ClassificationTag>,
    ITagger<ErrorTag>,
    IBackgroundAnalyzerImpl<object, IList<ITagSpan<ITag>>>
{
    protected IClassificationTypeRegistryService _registry;
    protected ITextBuffer _buffer;
    protected IClassificationType _commentType;
    protected ClassificationTag _outerParenTag;
    protected IList<ITagSpan<ITag>> _resultTags;
    protected BackgroundAnalyzerForVS<object, IList<ITagSpan<ITag>>> _parseHelper;

    public SampleLanguageForVS(IClassificationTypeRegistryService registry,ITextBuffer buffer)
    {
        _registry = registry;
        _buffer = buffer;
        _commentType = registry.GetClassificationType(PredefinedClassificationTypeNames.Comment);
        _outerParenTag = MakeTag(PredefinedClassificationTypeNames.Keyword);
        _parseHelper = new BackgroundAnalyzerForVS<object, IList<ITagSpan<ITag>>>(buffer, this, true);
    }
    ClassificationTag MakeTag(string name)
    {
        return new ClassificationTag(_registry.GetClassificationType(name));
    }

    #region Classifier (lexical analysis)

    public event EventHandler<ClassificationChangedEventArgs> ClassificationChanged;

    public IList<ClassificationSpan> GetClassificationSpans(SnapshotSpan span)
    {
        List<ClassificationSpan> spans = new List<ClassificationSpan>();
        var line = span.Snapshot.GetLineFromPosition(span.Start);
        do {
            var cspan = GetLineClassification(line);
            if (cspan != null)
                spans.Add(cspan);

            if (line.EndIncludingLineBreak.Position >= span.Snapshot.Length) break;
            line = span.Snapshot.GetLineFromPosition(line.EndIncludingLineBreak.Position);
        } while (line.EndIncludingLineBreak < span.End.Position);
        return spans;
    }

    public ClassificationSpan GetLineClassification(ITextSnapshotLine line)
    {
        var span = new Span(line.Start.Position, line.Length);
        var sspan = new SnapshotSpan(line.Snapshot, span);
        int i;
        for (i = span.Start; i < line.Snapshot.Length && char.IsWhiteSpace(line.Snapshot[i]); i++) { }
        if (i < line.Snapshot.Length && 
            (line.Snapshot[i] == '#' ||
             line.Snapshot[i] == '/' && i + 1 < line.Snapshot.Length && line.Snapshot[i+1] == '/'))
            return new ClassificationSpan(sspan, _commentType);
        return null;
    }

    #endregion

    #region Background analysis (the two taggers)

    public object GetInputSnapshot()
    {
        return null; // this example has no state to pass to the analysis thread.
    }
    public IList<ITagSpan<ITag>> RunAnalysis(ITextSnapshot snapshot, object input, System.Threading.CancellationToken cancelToken)
    {
        List<ITagSpan<ITag>> results = new List<ITagSpan<ITag>>();
        // On analysis thread: produce classification tags for nested [(parens)]
        // and warning tags for backslashes.
        int parenLevel = 0;
        for (int i = 0; i < snapshot.Length; i++)
        {
            char c = snapshot[i];
            if (c == '\\')
                results.Add(new TagSpan<ErrorTag>(
                    new SnapshotSpan(snapshot, new Span(i, 1)),
                    new ErrorTag("compiler warning", "Caution: that's not really a slash, it's a backslash!!")));
            bool open = (c == '[' || c == '(');
            bool close = (c == ']' || c == ')');
            if (close) {
                if (parenLevel > 0)
                    parenLevel--;
                else {
                    results.Add(new TagSpan<ErrorTag>(
                        new SnapshotSpan(snapshot, new Span(i, Math.Min(2, snapshot.Length-i))),
                        new ErrorTag("syntax error", "Caution: closing parenthesis without matching opener")));
                }
            }
            if ((open || close) && parenLevel == 0)
                results.Add(new TagSpan<ClassificationTag>(
                    new SnapshotSpan(snapshot, new Span(i, 1)), 
                    _outerParenTag));
            if (open)
                parenLevel++;
        }
        return results;
    }
    public void OnRunSucceeded(IList<ITagSpan<ITag>> results)
    {
        _resultTags = results;
        // We don't know which tags changed unless we do some fancy diff, so
        // act as if everything changed.
        if (TagsChanged != null) // should always be true
            TagsChanged(this, new SnapshotSpanEventArgs(new SnapshotSpan(_buffer.CurrentSnapshot, new Span(0, _buffer.CurrentSnapshot.Length))));
    }

    #endregion

    #region ITagger<ClassificationTag> and ITagger<ErrorTag> Members

    IEnumerable<ITagSpan<ErrorTag>> ITagger<ErrorTag>.GetTags(NormalizedSnapshotSpanCollection spans)
    {
        return GetTags<ErrorTag>(spans);
    }
    IEnumerable<ITagSpan<ClassificationTag>> ITagger<ClassificationTag>.GetTags(NormalizedSnapshotSpanCollection spans)
    {
        return GetTags<ClassificationTag>(spans);
    }
    public IEnumerable<ITagSpan<TTag>> GetTags<TTag>(NormalizedSnapshotSpanCollection spans) where TTag : ITag
    {
        if (_resultTags == null)
            return null;

        // TODO: make more efficient for large files with e.g. binary search
        int start = spans[0].Start.Position, end = spans[spans.Count-1].End.Position;
        return _resultTags.Where(ts => ts.Span.End >= start && ts.Span.Start <= end).OfType<ITagSpan<TTag>>();
    }

    public event EventHandler<SnapshotSpanEventArgs> TagsChanged;

    #endregion
}

上面缺少的只是using语句(请参阅完整的源文件)和BackgroundAnalyzerForVS类。如果将此代码插入 vsix 项目,您将获得“词法分析”、延迟“解析”、警告和错误标记。演示文件:

Open this in Visual Studio to see "sample" syntax highlighting.
  // Backslashes are underlined.
  \\ <-- Such as those ones.
When you start a parenthetical (like this) the parens are highlighted, 
but ([nested parens (like this)]) are not highlighted.
# Do not write a closing ")" without an opening "(".
于 2014-07-24T00:15:02.583 回答