diff --git a/progs/demos/Makefile b/progs/demos/Makefile
index 5b1d2a0..67bb854 100644
--- a/progs/demos/Makefile
+++ b/progs/demos/Makefile
@@ -19,6 +19,7 @@ PROGS = \
 	clearspd \
 	copypix \
 	cubemap \
+	cubemap_compressed \
 	dinoshade \
 	dissolve \
 	drawpix \
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index 8986191..14f0ba5 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -305,13 +305,20 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
    rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
 							"def_max_anisotropy");
 
-   if ( sPriv->drm_version.major == 1
-       && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+   if /*( sPriv->drm_version.major == 1 
+       && */ (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" )) {
+
+    if (screen->kernel_mm) {
+           rmesa->using_hyperz = GL_TRUE;
+
+      } else {
+
       if ( sPriv->drm_version.minor < 13 )
 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
 			  "disabling.\n", sPriv->drm_version.minor );
       else
 	 rmesa->using_hyperz = GL_TRUE;
+	 }
    }
  
    if ( sPriv->drm_version.minor >= 15 )
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index a1b5057..d71ed32 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -54,6 +54,65 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R200_TIMEOUT             512
 #define R200_IDLE_RETRY           16
 
+static void r200_hyper_clear(GLcontext *ctx, int flags)
+{
+    r200ContextPtr r200 = R200_CONTEXT(ctx);
+    BATCH_LOCALS(&r200->radeon);
+    struct radeon_renderbuffer *rrb;
+    int depthpixperline;
+    uint32_t clearmask;
+    uint32_t depthclearvalue;
+
+    rrb = radeon_get_depthbuffer(&r200->radeon);
+    if (!rrb)
+	return;
+
+    depthpixperline = rrb->pitch / rrb->cpp;
+    depthclearvalue = r200->radeon.state.depth.clear;
+
+    clearmask = 0; 
+
+	if (flags &  RADEON_USE_HIERZ)
+	clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
+    
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHCLEARVALUE, 0));
+    OUT_BATCH(depthclearvalue);
+    OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZMASKOFFSET, 0));
+    OUT_BATCH(0);
+    OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
+    OUT_BATCH(RADEON_RB3D_ZC_FLUSH_ALL);
+    END_BATCH();
+
+    if ((r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_HIERZ)) {
+      int tileoffset, nrtilesx, nrtilesy, j;
+	    
+      tileoffset = 0;
+      nrtilesx = (rrb->base.Width  >> 5);
+      nrtilesy = (rrb->base.Height >> 3);
+      for (j = 0; j < nrtilesy; j++) {
+	 BEGIN_BATCH_NO_AUTOSTATE(4);
+	 OUT_BATCH(CP_PACKET3(RADEON_CP_PACKET3_3D_CLEAR_ZMASK, 2));
+	 OUT_BATCH(tileoffset * 16);
+	 OUT_BATCH(nrtilesx + 1);
+	 OUT_BATCH(clearmask);
+	 END_BATCH();
+	 tileoffset += depthpixperline >> 5;
+      }
+      
+        if (flags &  RADEON_USE_HIERZ) {
+	BEGIN_BATCH_NO_AUTOSTATE(4);
+	OUT_BATCH(CP_PACKET3(RADEON_CP_PACKET3_3D_CLEAR_HIZ, 2));
+	OUT_BATCH(0x0); /* First tile */
+	OUT_BATCH(0x3cc0);
+	OUT_BATCH((0xff<<22)|(0xff<<6)| 0x003f003f); /* clearmask */
+	END_BATCH();
+	}
+
+    }
+}
+
+
 static void r200KernelClear(GLcontext *ctx, GLuint flags)
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
@@ -233,21 +292,56 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
    if ( !flags )
       return;
 
+/*  hyper-z experiment */
    if (rmesa->using_hyperz) {
-      flags |= RADEON_USE_COMP_ZBUF;
+//      flags |= RADEON_USE_COMP_ZBUF;
 /*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
-	 flags |= RADEON_USE_HIERZ; */
-      if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
-	    ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
-	  flags |= RADEON_CLEAR_FASTZ;
-      }
-   }
-
-   if (rmesa->radeon.radeonScreen->kernel_mm)
+//	 flags |= RADEON_USE_HIERZ; */
+//      if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+//	    ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
+//	  flags |= RADEON_CLEAR_FASTZ;
+//      } 
+
+       int hw_stencil = 0;
+       struct radeon_renderbuffer *rrbStencil
+	   = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+	
+       if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+        flags |= RADEON_USE_HIERZ; 
+
+       flags |= RADEON_USE_COMP_ZBUF;
+       hw_stencil = (rrbStencil && rrbStencil->bo);
+
+       /* want a depth clear and if we want a stencil clear + hw stencil
+	  and write mask */
+
+       if (flags & RADEON_DEPTH) {
+	   if (!hw_stencil)
+	       flags |= RADEON_CLEAR_FASTZ;
+	   else {
+	       if ((flags & RADEON_STENCIL) &&
+		   ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))
+		   flags |= RADEON_CLEAR_FASTZ;
+	   }
+       }
+
+    }
+
+ /*  if (rmesa->radeon.radeonScreen->kernel_mm)
       radeonUserClear(ctx, orig_mask);
    else {
       r200KernelClear(ctx, flags);
-      rmesa->radeon.hw.all_dirty = GL_TRUE;
+      rmesa->radeon.hw.all_dirty = GL_TRUE; */
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+        if (flags & RADEON_CLEAR_FASTZ) {
+	   orig_mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+	   /* clear depth using hyperz if we can */
+	   r200_hyper_clear(ctx, flags);
+        }
+        radeonUserClear(ctx, orig_mask);
+   } else {
+       r200KernelClear(ctx, flags);
+       rmesa->radeon.hw.all_dirty = GL_TRUE;
    }
 }
 
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
index e06437b..b7750ba 100644
--- a/src/mesa/drivers/dri/r200/r200_state_init.c
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -555,6 +555,10 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
    drb = radeon_get_depthbuffer(&r200->radeon);
    if (drb) {
      zbpitch = (drb->pitch / drb->cpp);
+// r200 hyperz experiment
+     if (r200->using_hyperz)
+       zbpitch |= RADEON_DEPTH_HYPERZ;
+// r200 hyperz exp end
      if (drb->cpp == 4)
         depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
      else
@@ -1266,8 +1270,8 @@ void r200InitState( r200ContextPtr rmesa )
    if (rmesa->using_hyperz) {
       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
 						  R200_Z_DECOMPRESSION_ENABLE;
-/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
-	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) 
+	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE; */
    }
 
    rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE 
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 46a9cd5..c24d0fa 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -446,5 +446,6 @@ enum {
 #define RADEON_CHIPSET_TCL		(1 << 2)	/* tcl support - any radeon */
 #define RADEON_CHIPSET_BROKEN_STENCIL	(1 << 3)	/* r100 stencil bug */
 #define R200_CHIPSET_YCBCR_BROKEN	(1 << 4)	/* r200 ycbcr bug */
+#define RADEON_CHIPSET_HIERZ	        (1 << 5)	/* r200 HierZ */
 
 #endif /* _RADEON_CHIPSET_H */
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 475e93b..b39b2c5 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -241,11 +241,15 @@ r100CreateContext( const __GLcontextModes *glVisual,
                                                  "def_max_anisotropy");
 
    if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-      if ( sPriv->drm_version.minor < 13 )
-	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
-			  "disabling.\n", sPriv->drm_version.minor );
-      else
-	 rmesa->using_hyperz = GL_TRUE;
+      if (screen->kernel_mm) {
+	    rmesa->using_hyperz = GL_TRUE;	 
+      } else {
+	 if ( sPriv->drm_version.minor < 13 )
+	    fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+		     "disabling.\n", sPriv->drm_version.minor );
+	 else
+	    rmesa->using_hyperz = GL_TRUE;
+      }
    }
 
    if ( sPriv->drm_version.minor >= 15 )
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index db0e4f2..cf90c3e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -553,6 +553,51 @@ static void radeonKernelClear(GLcontext *ctx, GLuint flags)
    UNLOCK_HARDWARE( &rmesa->radeon );
 }
 
+static void r100_hyper_clear(GLcontext *ctx, int flags)
+{
+    r100ContextPtr r100 = R100_CONTEXT(ctx);
+    BATCH_LOCALS(&r100->radeon);
+    struct radeon_renderbuffer *rrb;
+    int depthpixperline;
+    uint32_t clearmask;
+    uint32_t depthclearvalue;
+
+    rrb = radeon_get_depthbuffer(&r100->radeon);
+    if (!rrb)
+	return;
+
+    depthpixperline = rrb->pitch / rrb->cpp;
+    depthclearvalue = r100->radeon.state.depth.clear;
+    
+    clearmask = 0;
+    
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHCLEARVALUE, 0));
+    OUT_BATCH(depthclearvalue);
+    OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZMASKOFFSET, 0));
+    OUT_BATCH(0);
+    OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
+    OUT_BATCH(RADEON_RB3D_ZC_FLUSH_ALL);
+    END_BATCH();
+
+    if ((r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_HIERZ)) {
+      int tileoffset, nrtilesx, nrtilesy, j;
+	    
+      tileoffset = 0;
+      nrtilesx = (rrb->base.Width & ~63) >> 4;
+      nrtilesy = (rrb->base.Height >> 3);
+      for (j = 0; j < nrtilesy; j++) {
+	 BEGIN_BATCH_NO_AUTOSTATE(4);
+	 OUT_BATCH(CP_PACKET3(RADEON_CP_PACKET3_3D_CLEAR_ZMASK, 2));
+	 OUT_BATCH(tileoffset * 8);
+	 OUT_BATCH(nrtilesx + 4);
+	 OUT_BATCH(clearmask);
+	 END_BATCH();
+	 tileoffset += depthpixperline >> 6;
+      }
+    }
+}
+
 static void radeonClear( GLcontext *ctx, GLbitfield mask )
 {
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
@@ -610,20 +655,38 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
       return;
 
    if (rmesa->using_hyperz) {
-      flags |= RADEON_USE_COMP_ZBUF;
-/*      if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL)
-         flags |= RADEON_USE_HIERZ; */
-      if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
-	    ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
-	  flags |= RADEON_CLEAR_FASTZ;
-      }
+       int hw_stencil = 0;
+       struct radeon_renderbuffer *rrbStencil
+	   = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+
+       flags |= RADEON_USE_COMP_ZBUF;
+
+       hw_stencil = (rrbStencil && rrbStencil->bo);
+       
+       /* want a depth clear and if we want a stencil clear + hw stencil
+	  and write mask */
+
+       if (flags & RADEON_DEPTH) {
+	   if (!hw_stencil)
+	       flags |= RADEON_CLEAR_FASTZ;
+	   else {
+	       if ((flags & RADEON_STENCIL) &&
+		   ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))
+		   flags |= RADEON_CLEAR_FASTZ;
+	   }
+       }
    }
 
-   if (rmesa->radeon.radeonScreen->kernel_mm)
-     radeonUserClear(ctx, orig_mask);
-   else {
-      radeonKernelClear(ctx, flags);
-      rmesa->radeon.hw.all_dirty = GL_TRUE;
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+       if (flags & RADEON_CLEAR_FASTZ) {
+	   orig_mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+	   /* clear depth using hyperz if we can */
+	   r100_hyper_clear(ctx, flags);
+       }
+       radeonUserClear(ctx, orig_mask);
+   } else {
+       radeonKernelClear(ctx, flags);
+       rmesa->radeon.hw.all_dirty = GL_TRUE;
    }
 }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 93b6399..c95cfb8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -417,7 +417,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
    case PCI_CHIP_RADEON_QG:
       /* all original radeons (7200) presumably have a stencil op bug */
       screen->chip_family = CHIP_FAMILY_R100;
-      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL | RADEON_CHIPSET_HIERZ;
       break;
 
    case PCI_CHIP_RV200_QW:
@@ -425,7 +425,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
    case PCI_CHIP_RADEON_LW:
    case PCI_CHIP_RADEON_LX:
       screen->chip_family = CHIP_FAMILY_RV200;
-      screen->chip_flags = RADEON_CHIPSET_TCL;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_HIERZ;
       break;
 
    case PCI_CHIP_R200_BB:
@@ -434,7 +434,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
    case PCI_CHIP_R200_QL:
    case PCI_CHIP_R200_QM:
       screen->chip_family = CHIP_FAMILY_R200;
-      screen->chip_flags = RADEON_CHIPSET_TCL;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_HIERZ;
       break;
 
    case PCI_CHIP_RV250_If:
@@ -454,7 +454,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
    case PCI_CHIP_RV280_5C61:
    case PCI_CHIP_RV280_5C63:
       screen->chip_family = CHIP_FAMILY_RV280;
-      screen->chip_flags = RADEON_CHIPSET_TCL;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_HIERZ;
       break;
 
    case PCI_CHIP_RS300_5834:
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index 91718a4..d576f4c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -457,6 +457,8 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
    drb = radeon_get_depthbuffer(&r100->radeon);
    if (drb) {
      zbpitch = (drb->pitch / drb->cpp);
+     if (r100->using_hyperz)
+       zbpitch |= RADEON_DEPTH_HYPERZ;
      if (drb->cpp == 4)
         depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
      else
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
index 1b33de1..ab80663 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
@@ -1569,12 +1569,19 @@
 #       define RADEON_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
 #       define RADEON_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
 #define RADEON_RB3D_DEPTHOFFSET             0x1c24
+#define RADEON_RB3D_DEPTHCLEARVALUE	    0x3230
 #define RADEON_RB3D_DEPTHPITCH              0x1c28
 #       define RADEON_DEPTHPITCH_MASK         0x00001ff8
 #       define RADEON_DEPTH_HYPERZ            (3 << 16)
 #       define RADEON_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
 #       define RADEON_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
 #       define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_ZCACHE_MODE		0x3250
+#define RADEON_RB3D_ZCACHE_CTLSTAT	0x3254
+#	define RADEON_RB3D_ZC_FLUSH		(1 << 0)
+#	define RADEON_RB3D_ZC_FREE		(1 << 2)
+#	define RADEON_RB3D_ZC_FLUSH_ALL		0x5
+#	define RADEON_RB3D_ZC_BUSY		(1 << 31)
 #define RADEON_RB3D_PLANEMASK               0x1d84
 #define RADEON_RB3D_ROPCNTL                 0x1d80
 #       define RADEON_ROP_MASK              (15 << 8)
@@ -1603,6 +1610,7 @@
 #       define RADEON_STENCIL_WRITE_MASK      (0xff << 24)
 #define RADEON_RB3D_ZPASS_DATA              0x3290
 #define RADEON_RB3D_ZPASS_ADDR              0x3294
+#define RADEON_RB3D_ZMASKOFFSET		    0x3234
 #define RADEON_RB3D_ZSTENCILCNTL            0x1c2c
 #       define RADEON_DEPTH_FORMAT_MASK          (0xf << 0)
 #       define RADEON_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
@@ -2059,6 +2067,8 @@
 #define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
 #define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
 #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
+#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK	    0xC0003200
+#define RADEON_CP_PACKET3_3D_CLEAR_HIZ              0xC0003700
 #define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
 #define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
 #define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
