3

我刚刚发现了 SDL 的一个奇怪行为。

我编写了一个简单的粒子渲染器,由于某种原因,软件渲染器的运行速度比硬件渲染器快 6 倍。

这是源代码:

主文件

#define _USE_MATH_DEFINES
#include <iostream>
#include <cstdlib>
#include <Windows.h>
#include <vector>
#include <math.h>
#include <time.h>
#include <SDL.h>

#include "Particle.h"

const int SCREEN_WIDTH = 1024;
const int SCREEN_HEIGHT = 600;
const int PARTICLE_NUMBER = 50000;
const int MAX_SPEED = 200;
const int MIN_SPEED = 5;

long long getMs (void) {
    SYSTEMTIME stime;
    GetLocalTime(&stime);
    long long ms = stime.wMilliseconds +
        stime.wSecond * 1000 +
        stime.wMinute * 60000 +
        stime.wHour * 3600000 +
        stime.wDay * 86400000 +
        stime.wMonth * 2592000000 +
        (stime.wYear - 1970) * 31104000000;
    return ms;
}

int main(int argc, char *argv[])
{
    bool hardwareAccelerated = true;

    if (argc == 2)
    {
        if (strncmp(argv[1], "-software", 9) == 0)
        {
            hardwareAccelerated = false;
        }
    }

    char title [100];
    sprintf(title, "Particles: %d - (%s)", PARTICLE_NUMBER, (hardwareAccelerated ? "HARDWARE ACCELERATED" : "SOFTWARE RENDERING"));

    Particle<double> *particles = (Particle<double>*) malloc(sizeof(Particle<double>) * PARTICLE_NUMBER);

    for (int i = 0; i < PARTICLE_NUMBER; i++)
    {
        double x = rand() % SCREEN_WIDTH;
        double y = rand() % SCREEN_HEIGHT;
        double direction = (((double) rand() / (double) RAND_MAX) - 0.5f) * 2 * M_PI;
        double speed = rand() % (MAX_SPEED - MIN_SPEED) + MIN_SPEED;
        (particles+i)->setPos(x, y);
        (particles+i)->setDirection(direction);
        (particles+i)->setSpeed(speed);
        // std::cout << (particles+i) << std::endl;
    }



    if (SDL_Init(SDL_INIT_EVERYTHING) != 0) {
        return 1;
    }

    SDL_Window *window = SDL_CreateWindow(title,
        SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
        SCREEN_WIDTH, SCREEN_HEIGHT, SDL_WINDOW_SHOWN);
    if (window == nullptr) {
        return 2;
    }

    SDL_RendererFlags flags = (hardwareAccelerated ? SDL_RENDERER_ACCELERATED : SDL_RENDERER_SOFTWARE);
    SDL_Renderer *renderer = SDL_CreateRenderer(window, -1,
        flags);
    if (renderer == nullptr) {
        return 3;
    }

    bool quit = false;
    SDL_Event evt;

    long long lastFrame = getMs();
    double delta = 0.f;
    while (!quit)
    {
        long long currentTime = getMs();
        delta = currentTime - lastFrame;
        lastFrame = currentTime;

        std::cout << "delta: " << delta << std::endl;

        while(SDL_PollEvent(&evt) != 0)
        {
            if (evt.type == SDL_QUIT)
            {
                quit = true;
            }
        }
        SDL_SetRenderDrawColor(renderer, 0,0,0,1);
        SDL_RenderClear(renderer);
        SDL_SetRenderDrawColor(renderer, 255,0,0,255);
        for (int i = 0; i < PARTICLE_NUMBER; i++)
        {
            (particles+i)->tick(delta);
            double *pos = (particles+i)->getPos();
            SDL_RenderDrawPoint(renderer, pos[0], pos[1]);
        }
        SDL_RenderPresent(renderer);
    }

    SDL_DestroyRenderer(renderer);
    SDL_DestroyWindow(window);
    SDL_Quit();

    return 0;
}

粒子.h

#ifndef _H_PARTICLE
#define _H_PARTICLE
#include <math.h>

template <class T>
class Particle
{
public:
    Particle(void);

    void tick(double);

    void setPos(T, T);
    T* getPos(void);
    void setDirection(double);
    double getDirection(void);
    void setSpeed(T);
    T getSpeed(void);
    ~Particle(void);
private:
    T x;
    T y;
    T speed;
    double direction;
};

template <class T>
Particle<T>::Particle(void)
{
}

template <class T>
void Particle<T>::tick(double delta)
{
    double dt = delta / 1000;
    T d_speed = this->speed * dt;
    // std::cout << d_speed << std::endl;

    this->x += cos(this->direction) * d_speed;
    this->y += sin(this->direction) * d_speed;

    if (this->x > SCREEN_WIDTH) this->x = 0;
    if (this->y > SCREEN_HEIGHT) this->y = 0;
    if (this->x < 0) this->x = SCREEN_WIDTH;
    if (this->y < 0) this->y = SCREEN_HEIGHT;
}

template <class T>
void Particle<T>::setPos(T x, T y)
{
    this->x = x;
    this->y = y;
}

template <class T>
T* Particle<T>::getPos(void)
{
    T pos[2];
    pos[0] = this->x;
    pos[1] = this->y;
    return pos;
}

template <class T>
void Particle<T>::setDirection(double direction)
{
    this->direction = direction;
}

template <class T>
double Particle<T>::getDirection(void)
{
    return this->direction;
}

template <class T>
void Particle<T>::setSpeed(T speed)
{
    this->speed = speed;
}

template <class T>
T Particle<T>::getSpeed(void)
{
    return this->speed;
}

template <class T>
Particle<T>::~Particle(void)
{
}

#endif

为什么会这样?硬件渲染器不应该比软件渲染器快很多吗?

4

1 回答 1

1

SDL_RenderDrawPoint()调用SDL_RenderDrawPoints(),但计数为 1。 在渲染它需要的点数之前SDL_RenderDrawPoints()调用,以及何时完成。那可能是你的问题。你正在为你系统中的每一个粒子,每一帧做一个和。SDL_stack_alloc()SDL_stack_free()mallocfree

我认为 Retired Ninja 的想法是正确的 - 改为使用并且每帧SDL_RenderDrawPoints()只执行 一次。mallocfree

或者 - 使用不同的范例。一次创建一个 SDL_Surface。每一帧,你都会对所有需要的像素进行 blit(通过对特定像素进行直接操作SDL_SurfaceSDL_MapRGB(),然后在渲染时,将其转换SDL_Surface为 anSDL_Texture并将其呈现渲染器。

一些示例代码 - 如果 aParticle是一个类并且包含指向 an 的指针SDL_Surface,那么您可以有一个如下所示的绘图函数:

void Particle::draw()
{
  Uint32 x = m_position.getX();
  Uint32 y = m_position.getY();
  Uint32 * pixel = (Uint32*)m_screen->pixels+(y*(m_pitch/4))+x;

  Uint8 r1 = 0;
  Uint8 g1 = 0;
  Uint8 b1 = 0;
  Uint8 a1 = 0;
  GFX_RGBA_FROM_PIXEL(*pixel, m_screen->format, &r1, &g1, &b1, &a1);

  Uint32 * p = (Uint32*)m_screen->pixels+(y*(m_pitch/4))+x;
  *p = SDL_MapRGB(m_screen->format, m_r, m_g, m_b);
}

在哪里GFX_RGBA_FROM_PIXEL(从 Andreas Schiffler 的 SDL2_gfx 库中窃取)并定义为:

///////////////////////////////////////////////////////////////////
void GFX_RGBA_FROM_PIXEL(Uint32 pixel, SDL_PixelFormat * fmt, Uint8* r, Uint8* g, Uint8* b, Uint8* a)
{
  *r = ((pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss;
  *g = ((pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss;
  *b = ((pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss;
  *a = ((pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;
}

它可能工作得更快。我没有做过任何时间测试,但它可能是值得的,因为你直接操纵像素内存的颜色,然后简单地在每一帧中对其进行 blitting。你没有做任何mallocs 或frees。

于 2013-11-13T23:00:40.433 回答