c++ - 是否会针对 STL 迭代器优化后缀 ++/-- 运算符的低效率？

Question

我知道增量/减量运算符的后缀版本通常会由编译器针对内置类型进行优化（即不会进行复制），但是对于iterators 是这种情况吗？

它们本质上只是重载的运算符，并且可以通过多种方式实现，但是由于它们的行为是严格定义的，它们可以被优化吗？如果是这样，它们是由任何/许多编译器来实现的吗？

#include <vector> 

void foo(std::vector<int>& v){
  for (std::vector<int>::iterator i = v.begin();
       i!=v.end();
       i++){  //will this get optimised by the compiler?
    *i += 20;
  }
}

score 9 · Accepted Answer

在 GNU GCC 的 STL 实现（版本 4.6.1）的特定情况下std::vector，我认为在足够高的优化级别上不会有性能差异。

前向迭代器的实现vector由__gnu_cxx::__normal_iterator<typename _Iterator, typename _Container>. 让我们看看它的构造函数和后缀++运算符：

  explicit
  __normal_iterator(const _Iterator& __i) : _M_current(__i) { }

  __normal_iterator
  operator++(int)
  { return __normal_iterator(_M_current++); }

及其实例化vector：

  typedef __gnu_cxx::__normal_iterator<pointer, vector> iterator;

如您所见，它在内部对普通指针执行后缀递增，然后将原始值传递给自己的构造函数，将其保存到本地成员。这段代码应该很容易通过死值分析消除。

但它真的优化了吗？让我们来了解一下。测试代码：

#include <vector>

void test_prefix(std::vector<int>::iterator &it)
{
    ++it;
}

void test_postfix(std::vector<int>::iterator &it)
{
    it++;
}

输出组件（打开-Os）：

    .file   "test.cpp"
    .text
    .globl  _Z11test_prefixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE
    .type   _Z11test_prefixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE, @function
_Z11test_prefixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE:
.LFB442:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    movl    8(%ebp), %eax
    addl    $4, (%eax)
    popl    %ebp
    .cfi_def_cfa 4, 4
    .cfi_restore 5
    ret
    .cfi_endproc
.LFE442:
    .size   _Z11test_prefixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE, .-_Z11test_prefixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE
    .globl  _Z12test_postfixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE
    .type   _Z12test_postfixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE, @function
_Z12test_postfixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE:
.LFB443:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    movl    8(%ebp), %eax
    addl    $4, (%eax)
    popl    %ebp
    .cfi_def_cfa 4, 4
    .cfi_restore 5
    ret
    .cfi_endproc
.LFE443:
    .size   _Z12test_postfixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE, .-_Z12test_postfixRN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEE
    .ident  "GCC: (Debian 4.6.0-10) 4.6.1 20110526 (prerelease)"
    .section    .note.GNU-stack,"",@progbits

如您所见，在两种情况下都会输出完全相同的程序集。

当然，对于自定义迭代器或更复杂的数据类型，这可能不一定是这种情况。但是，vector具体而言，前缀和后缀（不捕获后缀返回值）似乎具有相同的性能。

c++ - 是否会针对 STL 迭代器优化后缀 ++/-- 运算符的低效率？

1 回答 1

Related

Reference