Close but no cigar |

However, the biggest stumbling block came after the initial success as a seemingly identical image slowly filled up an image buffer. The two images are close but, alas, not identical. The basic impression of the shader, which is one of the most complicated in the game, has been successfully reproduced. Unfortunately, they need to be identical so this isn't of any use for the particular problem I'm trying to solve.

The problem is with differences between the floating (or possibly fixed) point number implementations on the GPU and on the CPU - small differences in these can lead to big differences when you're generating random numbers. This is why when building a procedural world in a game like minecraft, running the generator with the same seed on different platforms can yield very different random worlds, whilst still following all of the rules for building that world. Similarly, the two galaxies both work but they're both different places. If you're wondering: yes, you can zoom down to each star and they all have solar systems, all 20 million of them - that's for the next blog post however.

Whilst it's hardly complete, below you'll find he helper header I've used to enable me to compile that particular GLSL shader. There are a few gotchas - I couldn't get vertex swizzling to work with my vector classes as the GLSL vector swizzlers (e.g. 'vec.xy', 'vec.zw' etc.) aren't functions but a special case of structure access. Using anonymous unions within a struct might do it for contiguous elements but that would mess up constructors, which are also required by the syntax.

For many shaders though, just include this code, initialise your shader variables and pick up the result in 'gl_FragColor'. This was designed for shaders run as full-screen rectangles such as would be used for generating textures or ray casting scenes, although it could be adapted for a full rasterizer.

One thing this code isn't is fast: it's a proof of principle and is unoptimized. It's a clear candidate for implementation in a parallel library such as TBB. Also, obvious optimizations such as passing larger objects by const reference were omitted due to the fact that GLSL doesn't bother with that sort of thing and I wanted to compile the code within C++ with as little massaging as possible.

Despite having made no real attempts at optimizing anything, the tremendous speed difference between the CPU and GPU has given me renewed respect for the GPU engineers, as well as the author of the shader optimizer.

//--------8<--------- start glsl helper include file ------------------------------------------------

inline float pow(float v, float exp) { return ::powf(v, exp); }

inline vec2 sin(const vec2 &a) { return vec2(::sinf(a.x), ::sinf(a.y)); }

inline vec3 sin(const vec3 &a) { return vec3(::sinf(a.x), ::sinf(a.y), ::sinf(a.z)); }

inline vec4 sin(const vec4 &a) { return vec4(::sinf(a.x), ::sinf(a.y), ::sinf(a.z), ::sinf(a.w)); }

inline vec2 cos(const vec2 &a) { return vec2(::cosf(a.x), ::cosf(a.y)); }

inline vec3 cos(const vec3 &a) { return vec3(::cosf(a.x), ::cosf(a.y), ::cosf(a.z)); }

inline vec4 cos(const vec4 &a) { return vec4(::cosf(a.x), ::cosf(a.y), ::cosf(a.z), ::cosf(a.w)); }

inline vec2 fract(const vec2 &a) { return vec2(fract(a.x), fract(a.y)); }

inline vec3 fract(const vec3 &a) { return vec3(fract(a.x), fract(a.y), fract(a.z)); }

inline vec4 fract(const vec4 &a) { return vec4(fract(a.x), fract(a.y), fract(a.z), fract(a.w)); }

inline vec2 floor(const vec2 &a) { return vec2(::floorf(a.x), ::floorf(a.y)); }

inline vec3 floor(const vec3 &a) { return vec3(::floorf(a.x), ::floorf(a.y), ::floorf(a.z)); }

inline vec4 floor(const vec4 &a) { return vec4(::floorf(a.x), ::floorf(a.y), ::floorf(a.z), ::floorf(a.w)); }

inline vec2 clamp(const vec2 &v, float a, float b) { return vec2(clamp(v.x, a, b), clamp(v.y, a, b)); }

inline vec3 clamp(const vec3 &v, float a, float b) { return vec3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); }

inline vec4 clamp(const vec4 &v, float a, float b) { return vec4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); }

inline float mod(float a, float b) { return ::fmod(a, b); }

inline vec2 normalize(const vec2 &v) { return v.Normal(); }

inline vec3 normalize(const vec3 &v) { return v.Normal(); }

inline vec4 normalize(const vec4 &v) { return v.Normal(); }

template <class t="">

inline T smoothstep(const T &edge0, const T &edge1, float x)

{

// Scale, bias and saturate x to 0..1 range

x = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);

// Evaluate polynomial

return x*x*(3 - 2 * x);

}

template <class t="">

inline T mix(const T &edge0, const T &edge1, float x)

{

return edge0 + (edge1 - edge0) * x;

}

inline vec3 operator * (const vec3 &v, const mat3 &mat) { return mat.PostMultiplyVector(v); } //pre

inline vec3 operator * (const mat3 &mat, const vec3 &v) { return mat.PostMultiplyVector(v); }

vec4 texture2D(const sampler2D &sampler, const vec2 &pt, float bias = 1.0);

vec4 texture2D(const sampler2D &sampler, const vec3 &pt, float bias = 1.0);

extern vec4 gl_FragColor;

extern vec4 gl_FragCoord;

extern float gl_FragDepth;

}

//--------8<------------------------ end glsl helper include file --------------------------------

for (int y = 0; y < imagedata->mHeight; ++y)

{

glsl::gl_FragCoord.x = 0.0f;

for (int x = 0; x < imagedata->mWidth; ++x)

{

glsl::gl_FragColor = glsl::vec4(0, 0, 0, 0); //initialise the fragment colour

glsl::main(); //run the shader

glsl::gl_FragColor = glsl::vec4(glsl::gl_FragColor.w, glsl::gl_FragColor.z, glsl::gl_FragColor.y, glsl::gl_FragColor.x); //have to swap the channel order

*pixels = glsl::gl_FragColor.ToDWORDsat(); //saturated 4 float colour to

++pixels;

glsl::gl_FragCoord.x += 1.0f; //move to the next pixel to the right

}

glsl::gl_FragCoord.y -= 1.0f; //move to the next scanline (flipped in the y-axis)

}

<!--------------------------end--></class></class><!--------------------------start-->

inline float pow(float v, float exp) { return ::powf(v, exp); }

inline vec2 sin(const vec2 &a) { return vec2(::sinf(a.x), ::sinf(a.y)); }

inline vec3 sin(const vec3 &a) { return vec3(::sinf(a.x), ::sinf(a.y), ::sinf(a.z)); }

inline vec4 sin(const vec4 &a) { return vec4(::sinf(a.x), ::sinf(a.y), ::sinf(a.z), ::sinf(a.w)); }

inline vec2 cos(const vec2 &a) { return vec2(::cosf(a.x), ::cosf(a.y)); }

inline vec3 cos(const vec3 &a) { return vec3(::cosf(a.x), ::cosf(a.y), ::cosf(a.z)); }

inline vec4 cos(const vec4 &a) { return vec4(::cosf(a.x), ::cosf(a.y), ::cosf(a.z), ::cosf(a.w)); }

inline vec2 fract(const vec2 &a) { return vec2(fract(a.x), fract(a.y)); }

inline vec3 fract(const vec3 &a) { return vec3(fract(a.x), fract(a.y), fract(a.z)); }

inline vec4 fract(const vec4 &a) { return vec4(fract(a.x), fract(a.y), fract(a.z), fract(a.w)); }

inline vec2 floor(const vec2 &a) { return vec2(::floorf(a.x), ::floorf(a.y)); }

inline vec3 floor(const vec3 &a) { return vec3(::floorf(a.x), ::floorf(a.y), ::floorf(a.z)); }

inline vec4 floor(const vec4 &a) { return vec4(::floorf(a.x), ::floorf(a.y), ::floorf(a.z), ::floorf(a.w)); }

inline vec2 clamp(const vec2 &v, float a, float b) { return vec2(clamp(v.x, a, b), clamp(v.y, a, b)); }

inline vec3 clamp(const vec3 &v, float a, float b) { return vec3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); }

inline vec4 clamp(const vec4 &v, float a, float b) { return vec4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); }

inline float mod(float a, float b) { return ::fmod(a, b); }

inline vec2 normalize(const vec2 &v) { return v.Normal(); }

inline vec3 normalize(const vec3 &v) { return v.Normal(); }

inline vec4 normalize(const vec4 &v) { return v.Normal(); }

template <class t="">

inline T smoothstep(const T &edge0, const T &edge1, float x)

{

// Scale, bias and saturate x to 0..1 range

x = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0);

// Evaluate polynomial

return x*x*(3 - 2 * x);

}

template <class t="">

inline T mix(const T &edge0, const T &edge1, float x)

{

return edge0 + (edge1 - edge0) * x;

}

inline vec3 operator * (const vec3 &v, const mat3 &mat) { return mat.PostMultiplyVector(v); } //pre

inline vec3 operator * (const mat3 &mat, const vec3 &v) { return mat.PostMultiplyVector(v); }

vec4 texture2D(const sampler2D &sampler, const vec2 &pt, float bias = 1.0);

vec4 texture2D(const sampler2D &sampler, const vec3 &pt, float bias = 1.0);

extern vec4 gl_FragColor;

extern vec4 gl_FragCoord;

extern float gl_FragDepth;

}

//--------8<------------------------ end glsl helper include file --------------------------------

for (int y = 0; y < imagedata->mHeight; ++y)

{

glsl::gl_FragCoord.x = 0.0f;

for (int x = 0; x < imagedata->mWidth; ++x)

{

glsl::gl_FragColor = glsl::vec4(0, 0, 0, 0); //initialise the fragment colour

glsl::main(); //run the shader

glsl::gl_FragColor = glsl::vec4(glsl::gl_FragColor.w, glsl::gl_FragColor.z, glsl::gl_FragColor.y, glsl::gl_FragColor.x); //have to swap the channel order

*pixels = glsl::gl_FragColor.ToDWORDsat(); //saturated 4 float colour to

++pixels;

glsl::gl_FragCoord.x += 1.0f; //move to the next pixel to the right

}

glsl::gl_FragCoord.y -= 1.0f; //move to the next scanline (flipped in the y-axis)

}

<!--------------------------end--></class></class><!--------------------------start-->

## No comments:

## Post a Comment