Xbox 1 G-Buffer Attribute Packing Pixel Shader
// Single scene pass deferred shading on Xbox 1 using attribute packing.
// Written by Rich Geldreich, 2003
// This is the pixel shader I wanted to put in my GDC 2004 Deferred Shading Presentation on Deferred Shading
// At the time, this idea was pretty far out, and this "shader" (really combiner setup) was the most complex I ever wrote for Xbox 1.
// It effectively simulates two render targets of two components each: one for packed colors and a couple bits of gloss in the BA components,
// and a packed normal in RG. To pack the normal, the below shader uses 1-bit to hold the sign of Z, 7 bits for (I think X), and 8 for Y.
// Later lighting passes uses a couple 2D textures to unpack this format.
// This shader carefully shifts around bits and makes assumptions about the combiner precision on Xbox 1, so it probably only work on NV2A (or very similar) NVidia GPU's.
// c = A,R,G,B
// c0 = 0,1,0,0
// c1 = 0,0,1,0
// c4 = 0F,00,00,00
// c5 = 128,0,3,0
// c6 = 0F,00,C0,80
// c7 = 3F,00,40,80
// c8 = 00,00,40,7F
// c9 = 0,1,0,0 (R/X mask)
// c10 = BF,3F,00,00
// Diffuse/detail texture color component layout (all 0-255):
// texture.a = blue
// texture.r = green
// texture.g = gloss
// texture.b = red
// A A R R G G B B
// B R G X Y Y Z Z
// 4 4 6 2
xps.1.1
tex t0 // diffuse texture
tex t1 // cubemap normalize N (full range)
tex t2 // normal map, alpha has G
//dp3 r0, t0, c1
//xfc r0.rgb, 1-zero.rgb, zero.rgb, zero.rgb, zero.rgb, zero.rgb, 1-zero.a // c9.rgb = FF0000
// v0.rgb = S (range compressed)
// v1.rgb = T (range compressed)
xdd r0.rgba, t3.rgba, t2_bx2.rgb, c0.rgb, t0.rgb, c10.rgb //c0.rgb = (1,0,0)
// r0.a = tspace x
// t2_bx2.a = tspace y
// t2.b = tspace z
xmma discard.rgb, discard.rgb, r0.rgb, r0.a, v0_bx2.rgb, t2_bx2.a, v1_bx2.rgb
+xmma t3.a, t1.a, discard.a, t2_bx2.b, 1-zero.a, t0.a, c4.a
// r0.rgb = (r0.rgb + tspace.z * N)
mad t1.rgb, t3.a, t1_bx2.rgb, r0.rgb
+add_x2 t3.a, t3.b, t3.b
//------
xdd r1.rgb, r0.rgb, t1.rgb, c9.rgb, t0.rgb, c5.rgb
+add_x4 t1.a, t1.a, t1.a
xdm t2.rgb, v1.rgb, t1.rgb, t1.rgb, 1-zero.rgb, t3.a
+add r0.a, r1.b, c5.a
cnd v0.rgb, r0.a, c6.rgb, c7.rgb // select between 00C080 or 004080
+mov_d2 t2.a, 1-t2.b
// 2 * (.75*v - x*v)
xmma discard.rgb, discard.rgb, t1.rgb, t1.rgb, 1-zero.rgb, t1.rgb, t2.a
+add t1.a, t1.a, t1.a
mad t1.rgb, t1.rgb, c8.rgb, v0.rgb
+xmma discard.a, discard.a, t1.a, t1.a, 1-zero.a, t0.b, c6.a // t1.a+(t1.b>>4)
xfc sum.rgb, c9.rgb, zero.rgb, t1.rgb, zero.rgb, zero.rgb, t1.a // c9.rgb = FF0000