Xbox 1 G-Buffer Attribute Packing Pixel Shader

// Single scene pass deferred shading on Xbox 1 using attribute packing.

// Written by Rich Geldreich, 2003

// This is the pixel shader I wanted to put in my GDC 2004 Deferred Shading Presentation on Deferred Shading

// At the time, this idea was pretty far out, and this "shader" (really combiner setup) was the most complex I ever wrote for Xbox 1.

// It effectively simulates two render targets of two components each: one for packed colors and a couple bits of gloss in the BA components,

// and a packed normal in RG. To pack the normal, the below shader uses 1-bit to hold the sign of Z, 7 bits for (I think X), and 8 for Y.

// Later lighting passes uses a couple 2D textures to unpack this format.

// This shader carefully shifts around bits and makes assumptions about the combiner precision on Xbox 1, so it probably only work on NV2A (or very similar) NVidia GPU's.

// c = A,R,G,B

// c0 = 0,1,0,0

// c1 = 0,0,1,0

// c4 = 0F,00,00,00

// c5 = 128,0,3,0

// c6 = 0F,00,C0,80

// c7 = 3F,00,40,80

// c8 = 00,00,40,7F

// c9 = 0,1,0,0 (R/X mask)

// c10 = BF,3F,00,00

// Diffuse/detail texture color component layout (all 0-255):

// texture.a = blue

// texture.r = green

// texture.g = gloss

// texture.b = red

// A A R R G G B B

// B R G X Y Y Z Z

// 4 4 6 2

xps.1.1

tex t0 // diffuse texture

tex t1 // cubemap normalize N (full range)

tex t2 // normal map, alpha has G

//dp3 r0, t0, c1

//xfc r0.rgb, 1-zero.rgb, zero.rgb, zero.rgb, zero.rgb, zero.rgb, 1-zero.a // c9.rgb = FF0000

// v0.rgb = S (range compressed)

// v1.rgb = T (range compressed)

xdd r0.rgba, t3.rgba, t2_bx2.rgb, c0.rgb, t0.rgb, c10.rgb //c0.rgb = (1,0,0)

// r0.a = tspace x

// t2_bx2.a = tspace y

// t2.b = tspace z

xmma discard.rgb, discard.rgb, r0.rgb, r0.a, v0_bx2.rgb, t2_bx2.a, v1_bx2.rgb

+xmma t3.a, t1.a, discard.a, t2_bx2.b, 1-zero.a, t0.a, c4.a

// r0.rgb = (r0.rgb + tspace.z * N)

mad t1.rgb, t3.a, t1_bx2.rgb, r0.rgb

+add_x2 t3.a, t3.b, t3.b

//------

xdd r1.rgb, r0.rgb, t1.rgb, c9.rgb, t0.rgb, c5.rgb

+add_x4 t1.a, t1.a, t1.a

xdm t2.rgb, v1.rgb, t1.rgb, t1.rgb, 1-zero.rgb, t3.a

+add r0.a, r1.b, c5.a

cnd v0.rgb, r0.a, c6.rgb, c7.rgb // select between 00C080 or 004080

+mov_d2 t2.a, 1-t2.b

// 2 * (.75*v - x*v)

xmma discard.rgb, discard.rgb, t1.rgb, t1.rgb, 1-zero.rgb, t1.rgb, t2.a

+add t1.a, t1.a, t1.a

mad t1.rgb, t1.rgb, c8.rgb, v0.rgb

+xmma discard.a, discard.a, t1.a, t1.a, 1-zero.a, t0.b, c6.a // t1.a+(t1.b>>4)

xfc sum.rgb, c9.rgb, zero.rgb, t1.rgb, zero.rgb, zero.rgb, t1.a // c9.rgb = FF0000