/*
 * Copyright 2006 Thomas Hellström. All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sub license,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "via_3d.h"
#include "via_3d_reg.h"
#include "h1hwreg.h"
#include <picturestr.h>
#include "via_driver.h"
#include "via_rotate.h" /* for rotate feature.*/
#include "via_exa.h"    /* for MarkSync()/WaitMarker() */
#include "via_exa_h5.h"
#include "via_exa_h6.h"

typedef struct
{
    Bool supported;
    CARD32 col0;
    CARD32 col1;
    CARD32 al0;
    CARD32 al1;
} ViaCompositeOperator;

typedef struct
{
    Bool supported;
    CARD32 clamp;    //23
    CARD32 camode;//14`19
    CARD32 cbmode;//7`12
    CARD32 ccmode;//0`5
    CARD32 Alclamp_mask;    //20`23
    CARD32 Aamode;//14`18
    CARD32 Abmode;//7`11
    CARD32 Acmode;//0`4
} ViaCompositeOperatorTexture;

typedef struct
{
    CARD32 pictFormat;
    Bool dstSupported;
    Bool texSupported;
    CARD32 dstFormat;
    CARD32 texFormat;
} Via3DFormat;
#define VIA_NUM_3D_OPCODES  21
#define VIA_NUM_3D_FORMATS 17
#define VIA_NUM_3D_TEXT_OPCODES 3

#ifndef MIN
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif

static ViaCompositeOperator viaOperatorModes[256];
static ViaCompositeOperatorTexture viaTextureOperatorModes[256];
/*
Equation of RGB:  Cout = (Ca * (Cb  Cop  Cc) + Cbias) << Cshift 
Equation of A:  Aout = (Aa * (Ab  Aop  Ac) + Abias) << Ashift 
OutReComp :Asrc=srA*maA Rsrc=srA*maR
AddComp    :Asrc=srA*maA Rsrc=srR*maR
*/
static const CARD32 viaOpTextureCodes[VIA_NUM_3D_TEXT_OPCODES][9] = {
    {src_Aa,        0x01, 0x10, 0x07, 0x00,/**/0x00,0x0B,0x04,0x03},      /*(inv 0)(Atex) (InvclrAa 0)(Atex)*/
    {src_Ca,        0x01, 0x10, 0x03, 0x00,/**/0x00,0x0B,0x04,0x03},      /*(inv 0)(RGBtex)(InvclrAa 0)(Atex)*/
    {mask_Ca,       0x01, 0x03, 0x04, 0x00,/**/0x01,0x04,0x02,0x03},      /*(RGBtex)(RGBcur) (Atex)(Acur)*/   
};
static Via3DFormat via3DFormats[256];

#define VIA_FMT_HASH(arg) (((((arg) >> 1) + (arg)) >> 8) & 0xFF)

#define PictOpOutReverseComp 19
#define PictOpInReverseComp   20
static const CARD32 viaOpCodes[VIA_NUM_3D_OPCODES][5] = {
    {PictOpClear, 0x05, 0x45, 0x40, 0x80},            /*(RGBsrc_cntRGB,RGBdst_cntRGB)(Asrc_0,Adst_Inv0)    [0,0] */
    {PictOpSrc, 0x15, 0x45, 0x50, 0x80},              /*(RGBsrc_InvcntRGB,RGBdst_cntRGB)(Asrc_Inv0,)         [Ca]*/
    {PictOpDst, 0x05, 0x55, 0x40, 0x90},              /*(RGBsrc_cntRGB,RGBdst_InvcntRGB)(Asrc_0,Adst_Inv0)[0,0]  */
    
    /*Csat_Ca_FCa,Cop_Cb_FCb,     Asat_Aa_FAa,Aop_Ab_FAb   ,HABLRFCaCb 0x00*/
    {PictOpOver, 0x15, 0x52, 0x50, 0x91},             /*(RGBsrc_InvcntRGB,RGBdst_[InvAsrc] )(Asrc_Inv0, Adst _InvAsrc)[Ca,Aa]RGB wrong/ACorret*/
    {PictOpOverReverse, 0x13, 0x45, 0x52, 0x90},      /*(RGBsrc_InvAdst ,RGBdst_cntRGB) (Asrc _InvAdst, Adst_Inv0)[Ca]                        */
    {PictOpIn, 0x03, 0x45, 0x42, 0x80},               /*(RGBsrc_Adst    ,RGBdst_cntRGB) (Asrc _Adst, Adst_0)  [Ca ]                           */
    {PictOpInReverse, 0x05, 0x42, 0x40, 0x81},        /*(RGBsrc_cntRGB,RGBdst_[Asrc])(Asrc_0,Adst_Asrc)       [Aa]        RGB wrong/ACorret   */
    {PictOpOut, 0x13, 0x45, 0x52, 0x80},              /*(RGBsrc_InvAdst,RGBdst_cntRGB)(Asrc _InvAdst,Adst_Inv0)[Ca]                           */
    {PictOpOutReverse, 0x05, 0x52, 0x40, 0x91},       /*(RGBsrc_cntRGB ,RGBdst_[InvAsrc])(Asrc_ 0, Adst _InvAsrc)[Aa]   RGB wrong/ACorret     */
    {PictOpAtop, 0x03, 0x52, 0x42, 0x91},             /*[Ca,Aa] */
    {PictOpAtopReverse, 0x13, 0x42, 0x52, 0x81},      /*[Ca,Aa] */
    {PictOpXor, 0x15, 0x52, 0x52, 0x91},              /*[Ca,Aa] */
    {PictOpAdd, 0x15, 0x55, 0x50, 0x90},              /*(RGBsrc_InvcntRGB,RGBdst_InvcntRGB)(Asrc_Inv0,Adst_Inv0 )CORRECT Asrc=srA*maA Rsrc=srR*maR*/

    {PictOpDisjointClear, 0x05, 0x45, 0x40, 0x80},    /*same as 123*/
    {PictOpDisjointSrc, 0x15, 0x45, 0x50, 0x80},      
    {PictOpDisjointDst, 0x05, 0x55, 0x40, 0x90},    
    
    {PictOpConjointClear, 0x05, 0x45, 0x40, 0x80},    /*same as 123*/
    {PictOpConjointSrc, 0x15, 0x45, 0x50, 0x80},   
    {PictOpConjointDst, 0x05, 0x55, 0x40, 0x90},   
    {PictOpOutReverseComp, 0x05, 0x50, 0x40, 0x91},   /*(RGBsrc_cntRGB,RGBdst_InvRGBsrc)(Asrc_0,Adst _InvAsrc)   CORRECT  Asrc=srA*maA Rsrc=srA*maR */
    {PictOpInReverseComp, 0x05, 0x00, 0x40, 0x91},    /*(RGBsrc_cntRGB,RGBdst_RGBsrc)(Asrc_0,Adst_Asrc)   CORRECT  Asrc=srA*maA Rsrc=srA*maR */
};

static const CARD32 viaFormats[VIA_NUM_3D_FORMATS][5] = {
    {PICT_x1r5g5b5, EXA_HC_HDBFM_RGB555, EXA_HC_HTXnFM_RGB555, 1, 1},
    {PICT_r5g6b5,   EXA_HC_HDBFM_RGB565, EXA_HC_HTXnFM_RGB565, 1, 1},
    {PICT_a4r4g4b4, EXA_HC_HDBFM_ARGB4444, EXA_HC_HTXnFM_ARGB4444, 1, 1},
    {PICT_a1r5g5b5, EXA_HC_HDBFM_ARGB1555, EXA_HC_HTXnFM_ARGB1555, 1, 1},
    {PICT_x1b5g5r5, EXA_HC_HDBFM_BGR555, EXA_HC_HTXnFM_BGR555, 1, 1},
    {PICT_b5g6r5,   EXA_HC_HDBFM_BGR565, EXA_HC_HTXnFM_BGR565, 1, 1},
    {PICT_a4b4g4r4, EXA_HC_HDBFM_ABGR4444, EXA_HC_HTXnFM_ABGR4444, 1, 1},
    {PICT_a1b5g5r5, EXA_HC_HDBFM_ABGR1555, EXA_HC_HTXnFM_ABGR1555, 1, 1},
    {PICT_x8r8g8b8, EXA_HC_HDBFM_ARGB0888, EXA_HC_HTXnFM_ARGB0888, 1, 1},
    {PICT_a8r8g8b8, EXA_HC_HDBFM_ARGB8888, EXA_HC_HTXnFM_ARGB8888, 1, 1},
    {PICT_x8b8g8r8, EXA_HC_HDBFM_ABGR0888, EXA_HC_HTXnFM_ABGR0888, 1, 1},
    {PICT_a8b8g8r8, EXA_HC_HDBFM_ABGR8888, EXA_HC_HTXnFM_ABGR8888, 1, 1},
    {PICT_a8, 0x00, EXA_HC_HTXnFM_A8, 0, 1},
    {PICT_a4, 0x00, EXA_HC_HTXnFM_A4, 0, 1},
    {PICT_a1, 0x00, EXA_HC_HTXnFM_A1, 0, 1},
    /* YUV formats */
    {PIXMAN_yv12, 0x00, EXA_HC_HTXnFM_YV12, 0, 1},
    {PIXMAN_yuy2, 0x00, EXA_HC_HTXnFM_YUY2, 0, 1}
};

static Bool
PictureTransformIsAffine(PictTransform *t)
{
    if (t == NULL)
	return TRUE;
    return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
}

#define xFixedToFloat(val) \
	((float)xFixedToInt(val) + ((float)xFixedFrac(val) / 65536.0))
#define FloatToxFixed(val) \
	((pixman_fixed_t)(val * 65536.0))

static Bool
_exa_transform_point (PictTransformPtr transform,
		       float		x,
		       float		y,
		       float		result[3])
{
    int		    j;

    for (j = 0; j < 3; j++)
    {
	result[j] = (xFixedToFloat (transform->matrix[j][0]) * x +
		     xFixedToFloat (transform->matrix[j][1]) * y +
		     xFixedToFloat (transform->matrix[j][2]));
    }
    if (!result[2])
	return FALSE;
    return TRUE;
}

/**
 * Returns the floating-point coordinates transformed by the given transform.
 *
 * transform may be null.
 */
Bool
exa_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
				 float *x_out, float *y_out)
{
    if (transform == NULL) {
	*x_out = x;
	*y_out = y;
    } else {
	float	result[3];

	if (!_exa_transform_point (transform, (float) x, (float) y, result))
	    return FALSE;
	*x_out = (result[0] / result[2]);
	*y_out = (result[1] / result[2]);
    }
    return TRUE;
}

static CARD32
via3DDstFormat(int format)
{
    return via3DFormats[VIA_FMT_HASH(format)].dstFormat;
}

static CARD32
via3DTexFormat(int format)
{
    return via3DFormats[VIA_FMT_HASH(format)].texFormat;
}

static Bool
via3DDstSupported(int format)
{
    Via3DFormat *fm = via3DFormats + VIA_FMT_HASH(format);

    if (fm->pictFormat != format)
        return FALSE;
    return fm->dstSupported;
}

static Bool
via3DTexSupported(int format)
{
    Via3DFormat *fm = via3DFormats + VIA_FMT_HASH(format);

    if (fm->pictFormat != format)
        return FALSE;
    return fm->texSupported;
}

static void
viaSet3DDestination(Via3DState * v3d, CARD32 offset, CARD32 pitch, int format)
{
    v3d->drawingDirty = TRUE;  /* Affects planemask format. */
    v3d->destDirty = TRUE;
    v3d->destOffset = offset;
    v3d->destPitch = pitch;
    v3d->destFormat = via3DDstFormat(format);
    v3d->destDepth = (v3d->destFormat < EXA_HC_HDBFM_ARGB0888) ? 16 : 32;
}

static void
viaSet3DDrawing(Via3DState * v3d, int rop,
                CARD32 planeMask, CARD32 solidColor, CARD32 solidAlpha)
{
    v3d->drawingDirty = TRUE;
    v3d->rop = rop;
    v3d->planeMask = planeMask;
    v3d->solidColor = solidColor;
    v3d->solidAlpha = solidAlpha;
}

static void
viaSet3DFlags(Via3DState * v3d, int numTextures,
              Bool writeAlpha, Bool writeColor, Bool blend)
{
    v3d->enableDirty = TRUE;
    v3d->blendDirty = TRUE;
    v3d->numTextures = numTextures;
    v3d->writeAlpha = writeAlpha;
    v3d->writeColor = writeColor;
    v3d->blend = blend;
}

Bool
viaOrder(CARD32 val, CARD32 * shift)
{
    *shift = 0;

    while (val > (1 << *shift))
        (*shift)++;
    return (val == (1 << *shift));
}

static void
viaSet3DCompositeTextureOperater(ViaTextureUnit *vTex /*,Via3DState * v3d*/, CARD8 opTexture,Bool src)
{
    ViaCompositeOperatorTexture *vOpT = viaTextureOperatorModes + opTexture;
	
    if (vTex && vOpT->supported) {
        vTex->texCsat = ((vOpT->clamp<< 23) | (vOpT->camode<< 14) | (vOpT->cbmode<< 7) |0x00/* vOpT->ccmode*/);
        vTex->texAsat = ((vOpT->Alclamp_mask<< 23)|(vOpT->Aamode<< 14)| ((vOpT->Abmode) << 7)|0x03/*vOpT->acmode*/);
    if(src){
        vTex->texRCa = 0x00000000;
        vTex->texRAa = 0x00000000;
        vTex->texBColDirty = TRUE;
        }
    }
}
static Bool viaSet3DTexUVOffset(Via3DState * v3d, int tex, CARD32 uoffset, CARD32 voffset)
{
    ViaTextureUnit *vTex = v3d->tex + tex;

    vTex->textureLevel0UOffset = uoffset;
    vTex->textureLevel0VOffset = voffset;
}

static Bool
viaSet3DTexture(Via3DState * v3d, int tex, CARD32 offset, CARD32 pitch, 
   Bool npot, CARD32 width, CARD32 height, int format, ViaTextureModes sMode,
   ViaTextureModes tMode, ViaTexBlendingModes blendingMode, Bool agpTexture, 
   PictTransformPtr matrix,  ViaTexFilterModes filter)
{
    ViaTextureUnit *vTex = v3d->tex + tex;

    vTex->textureLevel0Offset = offset;
    vTex->npot = npot;
    vTex->textureLevel0Pitch = pitch;
    viaOrder(width, &vTex->textureLevel0WExp);
    viaOrder(height, &vTex->textureLevel0HExp);

    vTex->textureLevel0Width = 1<<vTex->textureLevel0WExp;
    vTex->textureLevel0Height = 1<<vTex->textureLevel0HExp;
	
    if (pitch <= 4) {
        DEBUG(ErrorF("Warning: texture pitch <= 4 !\n"));
    }

    vTex->textureFormat = via3DTexFormat(format);
    vTex->textureBlendMode = blendingMode;
    vTex->transform = matrix;
    if(!PictureTransformIsAffine(vTex->transform)) 
    return FALSE;
	
    switch (blendingMode) {
        case via_src:
            vTex->texCsat = (0x01 << 23) | (0x10 << 14) | (0x03 << 7) | 0x00;
            vTex->texAsat = ((0x0B << 14)
                             | ((PICT_FORMAT_A(format) ? 0x04 : 0x02) << 7)
                             | 0x03);
            vTex->texRCa = 0x00000000;
            vTex->texRAa = 0x00000000;
            vTex->texBColDirty = TRUE;
            break;
        case via_src_onepix_mask:
            vTex->texCsat = (0x01 << 23) | (0x09 << 14) | (0x03 << 7) | 0x00;
            vTex->texAsat = ((0x03 << 14)
                             | ((PICT_FORMAT_A(format) ? 0x04 : 0x02) << 7)
                             | 0x03);
            break;
        case via_mask:
            vTex->texCsat = (0x01 << 23) | (0x07 << 14) | (0x04 << 7) | 0x00;
            vTex->texAsat = (0x01 << 23) | (0x04 << 14) | (0x02 << 7) | 0x03;
            break;
        case via_comp_mask:
        case via_src_onepix_comp_mask:
	    return FALSE;
        case src_Aa:
        case src_Ca:
            viaSet3DCompositeTextureOperater(vTex,(blendingMode-src_Aa) ,TRUE);
            break;
        case mask_Ca: 		
            viaSet3DCompositeTextureOperater(vTex,(blendingMode-src_Aa),FALSE);
            break;
        default:
            return FALSE;
    }

    switch (filter) {
        case via_FilterFast:
        case via_FilterNearest:
            vTex->texturefilter = 0x0;
            break;
        case via_FilterBest:
        case via_FilterGood:
        case via_FilterBilinear:
            vTex->texturefilter = 0x2490;
            break;
        default:
            vTex->texturefilter = 0x0;
	break;
    }

    vTex->textureDirty = TRUE;
    vTex->textureModesS = sMode - via_single;
    vTex->textureModesT = tMode - via_single;

    vTex->agpTexture = agpTexture;
    return TRUE;
}

static void
viaSet3DTexBlendCol(Via3DState * v3d, int tex, Bool component, CARD32 color)
{
    CARD32 alpha;
    ViaTextureUnit *vTex = v3d->tex + tex;

    vTex->texRAa = (color >> 8) & 0x00FF0000;
    if (component) {
        vTex->texRCa = (color & 0x00FFFFFF);
    } else {
        alpha = color >> 24;
        vTex->texRCa = alpha | (alpha << 8) | (alpha << 16) | (alpha << 24);
    }
    vTex->texBColDirty = TRUE;
}

/*
 * Check if the compositing operator is supported and
 * return the corresponding register setting.
 */
static void
viaSet3DCompositeOperator(Via3DState * v3d, CARD8 op,Bool comp)
{
    ViaCompositeOperator *vOp = viaOperatorModes + op;
    
    if((op==PictOpOutReverse)&&(comp==TRUE))
        vOp = viaOperatorModes + PictOpOutReverseComp; 
    if((op==PictOpInReverse)&&(comp==TRUE))
        vOp = viaOperatorModes + PictOpInReverseComp; 
    v3d->blendDirty = TRUE;
    if (v3d && vOp->supported) {
        v3d->blendCol0 = vOp->col0 << 4;
        v3d->blendCol1 = vOp->col1 << 2;
        v3d->blendAl0 = vOp->al0 << 4;
        v3d->blendAl1 = vOp->al1 << 2;
    }
}

static Bool
via3DOpSupported(CARD8 op)
{
    return viaOperatorModes[op].supported;
}

static void
via3DEmitQuad(Via3DState * v3d, ViaCommandBuffer * cb, int dstX, int dstY,
              int src0X, int src0Y, int src1X, int src1Y, int w, int h)
{
    CARD32 acmd;
    float dx1, dx2, dy1, dy2, sx1[2], sx2[2], sx3[2], sx4[2],sy1[2], sy2[2], sy3[2], sy4[2],wf;
    double scalex, scaley;
    int i, numTex;
    float min_X,min_Y,min_X_wasted,min_Y_wasted;
    ViaTextureUnit *vTex;
    CARD32 textureDrawable_width,textureDrawable_height;

    numTex = v3d->numTextures;
    dx1 = dstX;
    dx2 = dstX + w;
    dy1 = dstY;
    dy2 = dstY + h;

    if (numTex) {
        sx1[0] = src0X;
        sx1[1] = src1X;
        sy1[0] = src0Y;
        sy1[1] = src1Y;
		
        for (i = 0; i < numTex; ++i) {
            vTex = v3d->tex + i;
            sx2[i] = sx1[i] + w;
            sy2[i] = sy1[i];
            sx3[i] = sx1[i];
            sy3[i] = sy1[i] + h;
            sx4[i] = sx1[i] + w;
            sy4[i] = sy1[i] + h;			
            if (!exa_get_transformed_coordinates(sx1[i], sy1[i],
                vTex->transform,
                &sx1[i], &sy1[i]))
                return;

            if (!exa_get_transformed_coordinates(sx2[i], sy2[i],
                vTex->transform,
                &sx2[i], &sy2[i]))
                return;
            
            if (!exa_get_transformed_coordinates(sx3[i], sy3[i],
                vTex->transform,
                &sx3[i], &sy3[i]))
                return;

            if (!exa_get_transformed_coordinates(sx4[i], sy4[i],
                vTex->transform,
                &sx4[i], &sy4[i]))
                return;	

            if((sx1[i] > TEX_DIMENSION_LIMIT_INPIXEL || sx2[i] > TEX_DIMENSION_LIMIT_INPIXEL || sx3[i] > TEX_DIMENSION_LIMIT_INPIXEL || sx4[i]> TEX_DIMENSION_LIMIT_INPIXEL
                || sy1[i] > TEX_DIMENSION_LIMIT_INPIXEL || sy2[i] > TEX_DIMENSION_LIMIT_INPIXEL || sy3[i]> TEX_DIMENSION_LIMIT_INPIXEL || sy4[i]> TEX_DIMENSION_LIMIT_INPIXEL)){
                min_X = MIN(sx1[i], sx2[i]);
                min_X = MIN(min_X, sx3[i]);
                min_X = MIN(min_X, sx4[i]);
                min_Y = MIN(sy1[i], sy2[i]);
                min_Y = MIN(min_Y, sy3[i]);
                min_Y = MIN(min_Y, sy4[i]); 
                
                min_X_wasted = (float)((int)min_X % 256);
                min_X =min_X - min_X_wasted;
                min_Y_wasted = (float)((int)min_Y % 256);
                min_Y = min_Y - min_Y_wasted;

               if(!(vTex->textureRepeat && v3d->componentAlpha)) {
                vTex->textureLevel0Offset += (CARD32)min_Y * vTex->textureLevel0Pitch + (CARD32)min_X*vTex->bytePerPixel; 
                vTex->textureDirty = TRUE;
                v3d->emitState(v3d, cb, 0);
                }
                sx1[i] = sx1[i] - min_X;
                sx2[i] = sx2[i] - min_X;
                sx3[i] = sx3[i] - min_X;
                sx4[i] = sx4[i] - min_X;
                sy1[i] = sy1[i] - min_Y;
                sy2[i] = sy2[i] - min_Y;
                sy3[i] = sy3[i] - min_Y;
                sy4[i] = sy4[i] - min_Y;
                }
            }
        }

        for (i = 0; i < numTex; ++i) {
            vTex = v3d->tex + i;
            scalex = 1. / (double)((1 << vTex->textureLevel0WExp));
            scaley = 1. / (double)((1 << vTex->textureLevel0HExp));
            sx1[i] *= scalex;
            sy1[i] *= scaley;
            sx2[i] *= scalex;
            sy2[i] *= scaley;
            sx3[i] *= scalex;
            sy3[i] *= scaley;
            sx4[i] *= scalex;
            sy4[i] *= scaley;

        }
    
    wf = 0.05;

    /*
     * Vertex buffer. Emit two 3-point triangles. The W or Z coordinate
     * is needed for AGP DMA, and the W coordinate is for some obscure
     * reason needed for texture mapping to be done correctly. So emit
     * a w value after the x and y coordinates.
     */

    BEGIN_HEAD2_3D(EXA_HC_ParaType_CmdVdata, 22 + numTex * 6);
    acmd = ((1 << 14) | (1 << 13) | (1 << 11));
    if (numTex)
        acmd |= ((1 << 7) | (1 << 8));
    OUT_RING_SubA(0xEC, acmd);

    acmd = 2 << 16;
    OUT_RING_SubA(0xEE, acmd);

    OUT_RING(*((CARD32 *) (&dx1)));
    OUT_RING(*((CARD32 *) (&dy1)));
    OUT_RING(*((CARD32 *) (&wf)));
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx1 + i)));
        OUT_RING(*((CARD32 *) (sy1 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx2)));
    OUT_RING(*((CARD32 *) (&dy1)));
    OUT_RING(*((CARD32 *) (&wf)));
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx2 + i)));
        OUT_RING(*((CARD32 *) (sy2 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx1)));
    OUT_RING(*((CARD32 *) (&dy2)));
    OUT_RING(*((CARD32 *) (&wf)));
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx3 + i)));
        OUT_RING(*((CARD32 *) (sy3 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx1)));
    OUT_RING(*((CARD32 *) (&dy2)));
    OUT_RING(*((CARD32 *) (&wf)));
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx3 + i)));
        OUT_RING(*((CARD32 *) (sy3 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx2)));
    OUT_RING(*((CARD32 *) (&dy1)));
    OUT_RING(*((CARD32 *) (&wf)));
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx2 + i)));
        OUT_RING(*((CARD32 *) (sy2 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx2)));
    OUT_RING(*((CARD32 *) (&dy2)));
    OUT_RING(*((CARD32 *) (&wf)));
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx4 + i)));
        OUT_RING(*((CARD32 *) (sy4 + i)));
    }
    OUT_RING_SubA(0xEE,
                  acmd | EXA_HC_HPLEND_MASK | EXA_HC_HPMValidN_MASK | EXA_HC_HE3Fire_MASK);
    OUT_RING_SubA(0xEE,
                  acmd | EXA_HC_HPLEND_MASK | EXA_HC_HPMValidN_MASK | EXA_HC_HE3Fire_MASK);

}

static void
via3DEmitState(Via3DState * v3d, ViaCommandBuffer * cb, Bool forceUpload)
{
    int i;
    Bool saveHas3dState;
    ViaTextureUnit *vTex;

    /*
     * Destination buffer location, format and pitch.
     */

    if (forceUpload || v3d->destDirty) {
        v3d->destDirty = FALSE;
        BEGIN_HEAD2_3D(EXA_HC_ParaType_NotTex, 3);

        OUT_RING_SubA(EXA_HC_SubA_HDBBasL, v3d->destOffset & 0x00FFFFFF);
        OUT_RING_SubA(EXA_HC_SubA_HDBBasH, v3d->destOffset >> 24);
        OUT_RING_SubA(EXA_HC_SubA_HDBFM, v3d->destFormat |
                      (v3d->destPitch & EXA_HC_HDBPit_MASK) | EXA_HC_HDBLoc_Local);
    }

    if (forceUpload || v3d->blendDirty) {
        v3d->blendDirty = FALSE;
        BEGIN_HEAD2_3D(EXA_HC_ParaType_NotTex, 6);
        OUT_RING_SubA(EXA_HC_SubA_HABLRFCa, 0x00);
        OUT_RING_SubA(EXA_HC_SubA_HABLRFCb, 0x00);
        OUT_RING_SubA(EXA_HC_SubA_HABLCsat, v3d->blendCol0);
        OUT_RING_SubA(EXA_HC_SubA_HABLCop, v3d->blendCol1);
        OUT_RING_SubA(EXA_HC_SubA_HABLAsat, v3d->blendAl0);
        OUT_RING_SubA(EXA_HC_SubA_HABLAop, v3d->blendAl1);
    }

    if (forceUpload || v3d->drawingDirty) {

        CARD32 planeMaskLo, planeMaskHi;

        v3d->drawingDirty = FALSE;
        BEGIN_HEAD2_3D(EXA_HC_ParaType_NotTex, 4);

        /*
         * Raster operation and Planemask.
         */

        if ( /* v3d->destDepth == 16 Bad Docs? */ FALSE) {
            planeMaskLo = (v3d->planeMask & 0x000000FF) << 16;
            planeMaskHi = (v3d->planeMask & 0x0000FF00) >> 8;
        } else {
            planeMaskLo = v3d->planeMask & 0x00FFFFFF;
            planeMaskHi = v3d->planeMask >> 24;
        }

        OUT_RING_SubA(EXA_HC_SubA_HROP, ((v3d->rop & 0x0F) << 8) | planeMaskHi);
        OUT_RING_SubA(EXA_HC_SubA_HFBBMSKL, planeMaskLo);

        /*
         * Solid shading color and alpha. Pixel center at 
         * floating coordinates (X.5,Y.5).
         */

        OUT_RING_SubA(EXA_HC_SubA_HSolidCL,
                      (v3d->solidColor & 0x00FFFFFF) | (0 << 23));
        OUT_RING_SubA(EXA_HC_SubA_HPixGC,
                      (((v3d->solidColor & 0xFF000000) >> 16) | (0 << 23)
                       | (v3d->solidAlpha & 0xFF)));
    }

    if (forceUpload || v3d->enableDirty) {
        v3d->enableDirty = FALSE;
        BEGIN_HEAD2_3D(EXA_HC_ParaType_NotTex, 1);

        OUT_RING_SubA(EXA_HC_SubA_HEnable,
                      ((v3d->writeColor) ? EXA_HC_HenCW_MASK : 0) |
                      ((v3d->blend) ? EXA_HC_HenABL_MASK : 0) |
                      ((v3d->numTextures) ? EXA_HC_HenTXMP_MASK : 0) |
                      ((v3d->writeAlpha) ? EXA_HC_HenAW_MASK : 0)|
                      ((v3d->tex->textureFormat == EXA_HC_HTXnFM_YV12 ) ?
                      EXA_HC_HenForce1P_MASK : 0));

        if (v3d->numTextures) {
            BEGIN_HEAD2_3D((EXA_HC_ParaType_Tex | (EXA_HC_SubType_TexGeneral << 8)), 5);
            OUT_RING_SubA(EXA_HC_SubA_HTXSMD, (0 << 7) | (0 << 6) |
                          (((v3d->numTextures - 1) & 0x1) << 3) | (0 << 1) | 1);
            OUT_RING_SubA(EXA_HC_SubA_HTXSMD, (0 << 7) | (0 << 6) |
                          (((v3d->numTextures - 1) & 0x1) << 3) | (0 << 1) | 0);
            /* Used build-in SDTV mode */
            OUT_RING_SubA(EXA_HC_SubA_HTXYUV2RGB1, EXA_HTXYUV2RGB4BT601|0x13000B);
            OUT_RING_SubA(EXA_HC_SubA_HTXYUV2RGB2, 0xED1316);
            OUT_RING_SubA(EXA_HC_SubA_HTXYUV2RGB3, 0x071000);
        }
    }

    for (i = 0; i < v3d->numTextures; ++i) {
        vTex = v3d->tex + i;

        if (forceUpload || vTex->textureDirty) {
            vTex->textureDirty = FALSE;

            BEGIN_HEAD2_3D((EXA_HC_ParaType_Tex |
                      (((i == 0) ? EXA_HC_SubType_Tex0 : EXA_HC_SubType_Tex1) << 8)), 20);

            OUT_RING_SubA(EXA_HC_SubA_HTXnFM, (vTex->textureFormat |
                                           (vTex->agpTexture ? EXA_HC_HTXnLoc_AGP :
                                            EXA_HC_HTXnLoc_Local)));
            /* used for video Texture YV12 format only */
            if(vTex->textureFormat == EXA_HC_HTXnFM_YV12 ) {
                OUT_RING_SubA(EXA_HC_SubA_HTXnL0BasL,
                              vTex->textureLevel0Offset & 0x00FFFFFF);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL1BasL,
                              vTex->textureLevel0VOffset & 0x00FFFFFF);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL2BasL,
                              vTex->textureLevel0UOffset & 0x00FFFFFF);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL012BasH,
                              ((vTex->textureLevel0Offset & 0xFF000000) >> 24) | 
                              ((vTex->textureLevel0VOffset & 0xFF000000)>>16) |
                              ((vTex->textureLevel0UOffset & 0xFF000000)>> 8));
                OUT_RING_SubA(EXA_HC_SubA_HTXnL0Pit,
                            (vTex->textureLevel0Pitch & EXA_HC_HTXnLnPit_MASK) |
                            EXA_HC_HTXnEnPit_MASK);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL1Pit,
                            ((vTex->textureLevel0Pitch>>1) & EXA_HC_HTXnLnPit_MASK) |
                            EXA_HC_HTXnEnPit_MASK);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL2Pit,
                            ((vTex->textureLevel0Pitch>>1) & EXA_HC_HTXnLnPit_MASK) |
                            EXA_HC_HTXnEnPit_MASK);
            } else {            
                OUT_RING_SubA(EXA_HC_SubA_HTXnL0BasL,
                              vTex->textureLevel0Offset & 0x00FFFFFF);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL012BasH,
                              vTex->textureLevel0Offset >> 24);
                OUT_RING_SubA(EXA_HC_SubA_HTXnL0Pit,
                              (vTex->textureLevel0Pitch & EXA_HC_HTXnLnPit_MASK) |
                              EXA_HC_HTXnEnPit_MASK);
            }
            
            OUT_RING_SubA(EXA_HC_SubA_HTXnL0_5WE, vTex->textureLevel0WExp);
            OUT_RING_SubA(EXA_HC_SubA_HTXnL0_5HE, vTex->textureLevel0HExp);
            OUT_RING_SubA(EXA_HC_SubA_HTXnL0OS, 0x00);
            OUT_RING_SubA(EXA_HC_SubA_HTXnTB, vTex->texturefilter);
            OUT_RING_SubA(EXA_HC_SubA_HTXnMPMD,
                          ((((unsigned)vTex->textureModesT) << 19)
                           | (((unsigned)vTex->textureModesS) << 16)));

            OUT_RING_SubA(EXA_HC_SubA_HTXnTBLCsat, vTex->texCsat);
            OUT_RING_SubA(EXA_HC_SubA_HTXnTBLCop, (0x00 << 22) | (0x00 << 19) |
                          (0x00 << 14) | (0x02 << 11) |
                          (0x00 << 7) | (0x03 << 3) | 0x02);
            OUT_RING_SubA(EXA_HC_SubA_HTXnTBLAsat, vTex->texAsat);
            OUT_RING_SubA(EXA_HC_SubA_HTXnTBLRFog, 0x00);
        }
    }

    for (i = 0; i < v3d->numTextures; ++i) {
        vTex = v3d->tex + i;

        if (forceUpload || vTex->texBColDirty) {
            saveHas3dState = cb->has3dState;
            vTex->texBColDirty = FALSE;
            BEGIN_HEAD2_3D((EXA_HC_ParaType_Tex |
                      (((i == 0) ? EXA_HC_SubType_Tex0 : EXA_HC_SubType_Tex1) << 8)),
                     2);
            OUT_RING_SubA(EXA_HC_SubA_HTXnTBLRAa, vTex->texRAa);
            OUT_RING_SubA(EXA_HC_SubA_HTXnTBLRCa, vTex->texRCa);
            cb->has3dState = saveHas3dState;
        }
    }

}

static void via3DEmitPixelShader(Via3DState * v3d, ViaCommandBuffer * cb, 
    int srcFormat, int maskFormat, int  dstFormat)
{
    return;
}
/*
 * Cliprect. Considered not important for the DRM 3D State, so restore the
 * has3dState flag afterwards.
 */
static void
via3DEmitClipRect(Via3DState * v3d, ViaCommandBuffer * cb, int x, int y,
                  int w, int h)
{
    Bool saveHas3dState;

    saveHas3dState = cb->has3dState;
    BEGIN_HEAD2_3D(EXA_HC_ParaType_NotTex, 4);
    OUT_RING_SubA(EXA_HC_SubA_HClipTB, (HW_H2_CLIPRANG(y) << 12) | HW_H2_CLIPRANG(y + h));
    OUT_RING_SubA(EXA_HC_SubA_HClipLR, (HW_H2_CLIPRANG(x) << 12) | HW_H2_CLIPRANG(x + w));
    cb->has3dState = saveHas3dState;
}

/*
 * Cliprect. Considered not important for the DRM 3D State, so restore the
 * has3dState flag afterwards.
 */
static void
via3DEmitClipRect_H5(Via3DState * v3d, ViaCommandBuffer * cb, int x, int y,
                  int w, int h)
{
    Bool saveHas3dState;
    saveHas3dState = cb->has3dState;
    BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,10);/*Parameter type:0x01*/
    OUT_RING_SubA(INV_HC_SubA_HCClipTL, (HW_H5_CLIPRANG(y)<<12) | HW_H5_CLIPRANG(y+h));    /*Color Clipping window: Top & Bttom*/
    OUT_RING_SubA(INV_HC_SubA_HCClipBL, (HW_H5_CLIPRANG(x)<<12) | HW_H5_CLIPRANG(x+w));       /* Color Clipping window: Left & Right*/
    OUT_RING_SubA(INV_HC_SubA_HSClipTL, (HW_H5_CLIPRANG(y)<<12) | HW_H5_CLIPRANG(y+h));    /*Scissor Clipping window: Top & Bttom*/
    OUT_RING_SubA(INV_HC_SubA_HSClipBL, (HW_H5_CLIPRANG(x)<<12) | HW_H5_CLIPRANG(x+w));       /* Scissor Clipping window: Left & Right*/
    cb->has3dState = saveHas3dState;
}

void
viaInit3DState(Via3DState * v3d)
{
    ViaCompositeOperator *op;
    ViaCompositeOperatorTexture *op_text;
    int i;
    CARD32 tmp, hash;
    Via3DFormat *format;
	
    v3d->setDestination = viaSet3DDestination;
    v3d->setDrawing = viaSet3DDrawing;
    v3d->setFlags = viaSet3DFlags;
    v3d->setTexture = viaSet3DTexture;
    v3d->setTexUVOffset = viaSet3DTexUVOffset;
    v3d->setTexBlendCol = viaSet3DTexBlendCol;
    v3d->opSupported = via3DOpSupported;
    v3d->setCompositeOperator = viaSet3DCompositeOperator;
    v3d->emitQuad = via3DEmitQuad;
    v3d->emitState = via3DEmitState;
    v3d->emitPixelShader = via3DEmitPixelShader;
    v3d->emitClipRect = via3DEmitClipRect;
    v3d->dstSupported = via3DDstSupported;
    v3d->texSupported = via3DTexSupported;
    v3d->MarkSync = viaAccelMarkSync;
    v3d->WaitMarker = viaAccelWaitMarker;

    for (i = 0; i < 256; ++i) {
        viaOperatorModes[i].supported = FALSE;
    }

    for (i = 0; i < VIA_NUM_3D_OPCODES; ++i) {
        op = viaOperatorModes + viaOpCodes[i][0];
        op->supported = TRUE;
        op->col0 = viaOpCodes[i][1];
        op->col1 = viaOpCodes[i][2];
        op->al0 = viaOpCodes[i][3];
        op->al1 = viaOpCodes[i][4];
    }
    for (i = 0; i < 256; ++i) {
        viaTextureOperatorModes[i].supported = FALSE;
    }

    for (i = 0; i < VIA_NUM_3D_TEXT_OPCODES; ++i) {
        op_text = viaTextureOperatorModes + (viaOpTextureCodes[i][0]-src_Aa);
        op_text->supported = TRUE;
        op_text->clamp= viaOpTextureCodes[i][1];
        op_text->camode= viaOpTextureCodes[i][2];
        op_text->cbmode= viaOpTextureCodes[i][3];
        op_text->ccmode= viaOpTextureCodes[i][4];
        op_text->Alclamp_mask= viaOpTextureCodes[i][5];
        op_text->Aamode= viaOpTextureCodes[i][6];
        op_text->Abmode= viaOpTextureCodes[i][7];
        op_text->Acmode= viaOpTextureCodes[i][8];
    }

    for (i = 0; i < 256; ++i) {
        via3DFormats[i].pictFormat = 0x00;
    }
    for (i = 0; i < VIA_NUM_3D_FORMATS; ++i) {
        tmp = viaFormats[i][0];
        hash = VIA_FMT_HASH(tmp);
        format = via3DFormats + hash;
        if (format->pictFormat) {
            DEBUG(ErrorF("BUG: Bad hash function\n"));
        }
        format->pictFormat = tmp;
        format->dstSupported = (viaFormats[i][3] != 0x00);
        format->texSupported = (viaFormats[i][4] != 0x00);
        format->dstFormat = viaFormats[i][1];
        format->texFormat = viaFormats[i][2];
    }
}

static void
viaSet3DDestination_H5(Via3DState * v3d, CARD32 offset, CARD32 pitch, int format)
{
    v3d->drawingDirty = TRUE;  /* Affects planemask format. */
    v3d->destDirty = TRUE;
    v3d->destOffset = offset;
    v3d->destPitch = pitch;
    v3d->destFormat = via3DTexFormat(format);
    v3d->destDepth = (v3d->destFormat < EXA_HC_HTXnFM_ARGB0888) ? 16 : 32;
}


static void
via3DEmitQuad_H5(Via3DState * v3d, ViaCommandBuffer * cb, int dstX, int dstY,
              int src0X, int src0Y, int src1X, int src1Y, int w, int h)
{
    CARD32 acmd;
    float dx1, dx2, dy1, dy2, sx1[2], sx2[2], sx3[2], sx4[2],sy1[2], sy2[2], sy3[2], sy4[2],wf;

    double scalex, scaley;
    int i, numTex;
    float min_X,min_Y,min_X_wasted,min_Y_wasted;
    ViaTextureUnit *vTex;
    CARD32 textureDrawable_width,textureDrawable_height;

    numTex = v3d->numTextures;
    dx1 = dstX - 0.5f;
    dx2 = dstX + w - 0.5f;
    dy1 = dstY - 0.5f;
    dy2 = dstY + h - 0.5f;

    if (numTex) {
        sx1[0] = src0X;
        sx1[1] = src1X;
        sy1[0] = src0Y;
        sy1[1] = src1Y;
        for (i = 0; i < numTex; ++i) {
            vTex = v3d->tex + i;
            sx2[i] = sx1[i] + w;
            sy2[i] = sy1[i];
            sx3[i] = sx1[i];
            sy3[i] = sy1[i] + h;
            sx4[i] = sx1[i] + w;
            sy4[i] = sy1[i] + h;
			
            if (!exa_get_transformed_coordinates(sx1[i], sy1[i],
                vTex->transform,
                &sx1[i], &sy1[i]))
                return;

            if (!exa_get_transformed_coordinates(sx2[i], sy2[i],
                vTex->transform,
                &sx2[i], &sy2[i]))
                return;

            if (!exa_get_transformed_coordinates(sx3[i], sy3[i],
                vTex->transform,
                &sx3[i], &sy3[i]))
                return;

            if (!exa_get_transformed_coordinates(sx4[i], sy4[i],
                vTex->transform,
                &sx4[i], &sy4[i]))
                return;			
            if((sx1[i] > TEX_DIMENSION_LIMIT_INPIXEL || sx2[i] > TEX_DIMENSION_LIMIT_INPIXEL || sx3[i] > TEX_DIMENSION_LIMIT_INPIXEL || sx4[i]> TEX_DIMENSION_LIMIT_INPIXEL
                || sy1[i] > TEX_DIMENSION_LIMIT_INPIXEL || sy2[i] > TEX_DIMENSION_LIMIT_INPIXEL || sy3[i]> TEX_DIMENSION_LIMIT_INPIXEL || sy4[i]> TEX_DIMENSION_LIMIT_INPIXEL)){
                min_X = MIN(sx1[i], sx2[i]);
                min_X = MIN(min_X, sx3[i]);
                min_X = MIN(min_X, sx4[i]);
                min_Y = MIN(sy1[i], sy2[i]);
                min_Y = MIN(min_Y, sy3[i]);
                min_Y = MIN(min_Y, sy4[i]); 
                
                min_X_wasted = (float)((int)min_X % 256);
                min_X =min_X - min_X_wasted;
                min_Y_wasted = (float)((int)min_Y % 256);
                min_Y = min_Y - min_Y_wasted;

               if(!(vTex->textureRepeat && v3d->componentAlpha)) {
                vTex->textureLevel0Offset += (CARD32)min_Y * vTex->textureLevel0Pitch + (CARD32)min_X*vTex->bytePerPixel; 
                vTex->textureDirty = TRUE;
                v3d->emitState(v3d, cb, 0);
               }
                sx1[i] = sx1[i] - min_X;
                sx2[i] = sx2[i] - min_X;
                sx3[i] = sx3[i] - min_X;
                sx4[i] = sx4[i] - min_X;
                sy1[i] = sy1[i] - min_Y;
                sy2[i] = sy2[i] - min_Y;
                sy3[i] = sy3[i] - min_Y;
                sy4[i] = sy4[i] - min_Y;
                }
        }
    }
    
        for (i = 0; i < numTex; ++i) {
            vTex = v3d->tex + i;
            scalex = 1. / (double)((vTex->textureLevel0Width));
            scaley = 1. / (double)((vTex->textureLevel0Height));
            sx1[i] *= scalex;
            sy1[i] *= scaley;
            sx2[i] *= scalex;
            sy2[i] *= scaley;
            sx3[i] *= scalex;
            sy3[i] *= scaley;
            sx4[i] *= scalex;
            sy4[i] *= scaley;

        }

    wf = 0.05;

    /*
     * Vertex buffer. Emit two 3-point triangles. The W or Z coordinate
     * is needed for AGP DMA, and the W coordinate is for some obscure
     * reason needed for texture mapping to be done correctly. So emit
     * a w value after the x and y coordinates.
     */

    BEGIN_HEADER2_3D_H5(0x00000000,60); /*Parameter type:0x00, total 24 Dwords*/
	
    OUT_RING(*((CARD32 *) (&dx1)));
    OUT_RING(*((CARD32 *) (&dy1)));
    OUT_RING(0x00000000);         /* Z */
    OUT_RING(0x3f800000);         /* W=1.0 */
    OUT_RING(0xffff0000);         /* Cd */
    OUT_RING(0xffff0000);         /* Cs */
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx1 + i)));
        OUT_RING(*((CARD32 *) (sy1 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx2)));
    OUT_RING(*((CARD32 *) (&dy1)));
    OUT_RING(0x00000000);         /* Z */
    OUT_RING(0x3f800000);         /* W=1.0 */
    OUT_RING(0xffff0000);         /* Cd */
    OUT_RING(0xffff0000);         /* Cs */
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx2 + i)));
        OUT_RING(*((CARD32 *) (sy2 + i)));
    }

    OUT_RING(*((CARD32 *) (&dx2)));
    OUT_RING(*((CARD32 *) (&dy2)));
    OUT_RING(0x00000000);         /* Z */
    OUT_RING(0x3f800000);         /* W=1.0 */
    OUT_RING(0xffff0000);         /* Cd */
    OUT_RING(0xffff0000);         /* Cs */
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx4 + i)));
        OUT_RING(*((CARD32 *) (sy4 + i)));
    }

	OUT_RING(*((CARD32 *) (&dx1)));
	OUT_RING(*((CARD32 *) (&dy2)));
    OUT_RING(0x00000000);         /* Z */
    OUT_RING(0x3f800000);         /* W=1.0 */
    OUT_RING(0xffff0000);         /* Cd */
    OUT_RING(0xffff0000);         /* Cs */
    for (i = 0; i < numTex; ++i) {
        OUT_RING(*((CARD32 *) (sx3 + i)));
        OUT_RING(*((CARD32 *) (sy3 + i)));
    }
	
}

void 
Modulate_H5_via_mask2_no_alpha(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000003); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (2<<16) | 3); /* Length of ALU and TAU: 2 TAU instruction and 3 ALU instruction */
        OUT_RING(0x93000002); /* Instruction swtch from TAU to ALU after the second TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


	 BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); /* PS Instruction update, 1 TLU*/
        /* texld temp0.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 0;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301001b0);
#endif

        /* texld temp2.rgba  t1.rgba sample1*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 1;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 2;
        viaTAUIns->s0index = 1;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 1;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x311211b1);
#endif

        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);

		
        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,20); 
        /* PS Instruction update, 3 ALU, each ALU is 4 dword*/	
        /* mad temp1.rgb0, temp0.rgb0, temp2.aaa0 (0,0,0,0)*/
        /*fire temp0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_r|mask_g|mask_b;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_0f;
        viaALUIns->s1reg = src_temp;
        viaALUIns->s1index = 2;
        viaALUIns->s1sel_r = selA_a;
        viaALUIns->s1sel_g = selA_a;
        viaALUIns->s1sel_b = selA_a;
        viaALUIns->s1sel_a = selA_0f;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x0140b6d0);
        OUT_RING(0x500026dd);
        OUT_RING(0x00720005);
        OUT_RING(0x00101060);
#endif

        /* mad temp1.000a, temp2.000a, V0.000a (0,0,0,0)*/  
        /*fire temp2*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_a;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 2;
        viaALUIns->s0sel_r = selA_0f;
        viaALUIns->s0sel_g = selA_0f;
        viaALUIns->s0sel_b = selA_0f;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_color;
        viaALUIns->s1index = 0;
        viaALUIns->s1sel_r = selA_0f;
        viaALUIns->s1sel_g = selA_0f;
        viaALUIns->s1sel_b = selA_0f;
        viaALUIns->s1sel_a = selA_a;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x0140b6d0);
        OUT_RING(0xb0040b6b);
        OUT_RING(0x008202b6);
        OUT_RING(0x00101060);
#endif

        /* mov oC0.rgba temp1.rgba*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20105);
       OUT_RING(0x000d0820);
 #endif

        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);
}
void 
Modulate_H5_src_comp_Aa_Ca2(Via3DState *v3d, ViaCommandBuffer * cb,ViaTexBlendingModes mode)
{
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000003); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (2<<16) | 2); /* Length of ALU and TAU: 2 TAU instruction and 2 ALU instruction */
        OUT_RING(0x93000002); /* Instruction swtch from TAU to ALU after the second TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);
	    BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); /* PS Instruction update, 1 TLU*/

        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));
        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 0;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        OUT_RING(viaTAUIns->uint[0]);

        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));
        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 1;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 2;
        viaTAUIns->s0index = 1;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 1;
        OUT_RING(viaTAUIns->uint[0]);

        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);
        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,20); /* PS Instruction update, 3 ALU, each ALU is 4 dword*/

        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));
        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        
        if(mode==src_Aa){
        viaALUIns->s0sel_r = selA_a;
        viaALUIns->s0sel_g = selA_a;
        viaALUIns->s0sel_b = selA_a;
        viaALUIns->s0sel_a = selA_a;
        }else if(mode==src_Ca){
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        }else{
        	DEBUG(ErrorF("\n Ca or Aa"));
        }
        viaALUIns->s1fire = 1;
        viaALUIns->s1reg = src_temp;
        viaALUIns->s1index = 2;
        viaALUIns->s1sel_r = selA_r;
        viaALUIns->s1sel_g = selA_g;
        viaALUIns->s1sel_b = selA_b;
        viaALUIns->s1sel_a = selA_a;

        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);

        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));
        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);

}

void 
Modulate_H5_via_mask2(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000003); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (2<<16) | 2); /* Length of ALU and TAU: 2 TAU instruction and 2 ALU instruction */
        OUT_RING(0x93000002); /* Instruction swtch from TAU to ALU after the second TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


	 BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); /* PS Instruction update, 1 TLU*/
        /* texld temp0.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 0;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301001b0);
#endif

        /* texld temp2.rgba  t1.rgba sample1*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 1;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 2;
        viaTAUIns->s0index = 1;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 1;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x311211b1);
#endif

        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);

		
        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,20); /* PS Instruction update, 3 ALU, each ALU is 4 dword*/
	
        /* mad temp1.rgba, temp0.rgba, temp2.aaaa (0,0,0,0)*/
        /* fire temp0, temp2 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1fire = 1;
        viaALUIns->s1reg = src_temp;
        viaALUIns->s1index = 2;
        viaALUIns->s1sel_r = selA_a;
        viaALUIns->s1sel_g = selA_a;
        viaALUIns->s1sel_b = selA_a;
        viaALUIns->s1sel_a = selA_a;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x0140b6d0);
        OUT_RING(0x302026db);
        OUT_RING(0x00f20005);
        OUT_RING(0x00101060);
#endif

        /* mov oC0.rgba temp1.rgba*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20105);
        OUT_RING(0x000d0820);
#endif

        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);
}

void 
Modulate_H5_via_src_onepix_mask(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000001); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (1<<16) | 2); /* Length of ALU and TAU: 1 TAU instruction and 2 ALU instruction */
        OUT_RING(0x93000001); /* Instruction swtch from TAU to ALU after the first TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); 
	 /* PS Instruction update, 1 TLU*/
        /* texld temp0.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 0;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301001b0);
#endif

        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,15); 
	/* PS Instruction update, 3 ALU, each ALU is 4 dword*/
		
	/* mad temp1.rgba, temp0.rgba, const0.rgba, (0,0,0,0)*/
	/*fire temp0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_const;
        viaALUIns->s1index = 0;
        viaALUIns->s1sel_r = selA_r;
        viaALUIns->s1sel_g = selA_g;
        viaALUIns->s1sel_b = selA_b;
        viaALUIns->s1sel_a = selA_a;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
	 OUT_RING(0x0140b6d0);
        OUT_RING(0x30080053);
        OUT_RING(0x00f20005);
        OUT_RING(0x00101060);
#endif

        /* mov oC0.rgba temp1.rgba*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else        
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20105);
        OUT_RING(0x000d0820);
#endif
}


void 
Modulate_H5_via_src_onepix_mask_no_alpha(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000001); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (1<<16) | 3); /* Length of ALU and TAU: 1 TAU instruction and 3 ALU instruction */
        OUT_RING(0x93000001); /* Instruction swtch from TAU to ALU after the first TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


	 BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); 
        /* PS Instruction update, 1 TLU*/
        /* texld temp0.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 0;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301001b0);
#endif

        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,15); 
        /* PS Instruction update, 3 ALU, each ALU is 4 dword*/

        /* mad temp1.rgba, temp0.rgba, const0.rgba, (0,0,0,0)*/
        /*fire temp0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_const;
        viaALUIns->s1index = 0;
        viaALUIns->s1sel_r = selA_r;
        viaALUIns->s1sel_g = selA_g;
        viaALUIns->s1sel_b = selA_b;
        viaALUIns->s1sel_a = selA_a;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x0140b6d0);
        OUT_RING(0x30080053);
        OUT_RING(0x00f20005);
        OUT_RING(0x00101060);
#endif

        /* mad temp1.000a, const0.000a, V0.000a (0,0,0,0)*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_a;
        viaALUIns->s0reg = src_const;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_0f;
        viaALUIns->s0sel_g = selA_0f;
        viaALUIns->s0sel_b = selA_0f;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_color;
        viaALUIns->s1index = 0;
        viaALUIns->s1sel_r = selA_0f;
        viaALUIns->s1sel_g = selA_0f;
        viaALUIns->s1sel_b = selA_0f;
        viaALUIns->s1sel_a = selA_a;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x0140b6d0);
        OUT_RING(0xb0040b6b);
        OUT_RING(0x008080b6);
        OUT_RING(0x00101060);
#endif

        /* mov oC0.rgba temp1.rgba*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20105);
        OUT_RING(0x000d0820);
#endif
}

void 
Modulate_H5_via_src(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000001); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (1<<16) | 1); /* Length of ALU and TAU: 1 TAU instruction and 1 ALU instruction */
        OUT_RING(0x93000001); /* Instruction swtch from TAU to ALU after the first TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); 
        /* PS Instruction update, 1 TLU*/
        /* texld temp1.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 1;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301101b0);
#endif

        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,20); 
	/* PS Instruction update, 3 ALU, each ALU is 4 dword*/
        
        /* mov oC0.rgba temp1.rgba*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else        
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20105);
        OUT_RING(0x000d0820);
#endif
}


void 
Modulate_H5_via_src_no_alpha(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000001); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (1<<16) | 3); /* Length of ALU and TAU: 1 TAU instruction and 3 ALU instruction */
        OUT_RING(0x93000001); /* Instruction swtch from TAU to ALU after the first TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); 
        /* PS Instruction update, 1 TLU*/
        /* texld temp1.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 1;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301101b0);
#endif

        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,20); 
        /* PS Instruction update, 3 ALU, each ALU is 4 dword*/

        /* mov temp0.rgb0 temp1.rgb0*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_r|mask_g|mask_b;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_0f;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x01c00530);
        OUT_RING(0x601C0053);
        OUT_RING(0x00720106);
        OUT_RING(0x000d0020);
#endif

	 /* mov temp0.000a v0.000a*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_a;
        viaALUIns->s0reg = src_color;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_0f;
        viaALUIns->s0sel_g = selA_0f;
        viaALUIns->s0sel_b = selA_0f;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x01c00530);
        OUT_RING(0x601C0053);
        OUT_RING(0x0080040b);
        OUT_RING(0x000d0020);
#endif

        /* mov oC0.rgba temp0.rgba*/
        /* fire temp0 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else        
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20005);
        OUT_RING(0x000d0820);
#endif
}

void 
Modulate_H5_No_Tex(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90058000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000000); /* tn is not as an input to TAU?*/
        OUT_RING((0x92<<24) | (0<<16) | 1); /* Length of ALU and TAU: 0 TAU instruction and 1 ALU instruction */
        OUT_RING(0x93000000); /* Instruction swtch from TAU to ALU after none TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);

        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,20); 
	/* PS Instruction update, 3 ALU, each ALU is 4 dword*/
        /* mov oC0.rgba v0.rgba*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0reg = src_color;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else        
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f04005);
        OUT_RING(0x000d0820);
#endif

}


void 
Modulate_H5_via_mask(Via3DState *v3d, ViaCommandBuffer * cb)
{
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        Via3DTAUInstructionPtr    viaTAUIns = &v3d->viaTAUIns;
        Via3DALUInstructionPtr    viaALUIns = &v3d->viaALUIns;
#endif

        /* texture blending , PS path*/
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,20); /*Parameter type is 01, Update PS control*/
        OUT_RING(0x90018000); /* 4-Time configure , use less than 4 texture register or less than 3 temporary register*/
        OUT_RING(0x91000001); /* tn is as an input to TAU?*/
        OUT_RING((0x92<<24) | (1<<16) | 2); /* Length of ALU and TAU: 1 TAU instruction and 2 ALU instruction */
        OUT_RING(0x93000001); /* Instruction swtch from TAU to ALU after the first TAU instruction*/
        OUT_RING(0x94000000);
        OUT_RING(0x95000000);
        OUT_RING(0x96000000);
        OUT_RING(0x97000000);
        OUT_RING(0x98000000);
        OUT_RING(0x99000000);
        OUT_RING(0x9A000000);
        OUT_RING(HC_CmdNULL1);
        OUT_RING(HC_CmdNULL2);


        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x21000000,10); 
        /* PS Instruction update, 1 TLU*/
        /* texld temp0.rgba  t0.rgba sample0*/
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaTAUIns, 0x00, sizeof(Via3DTAUInstructionRec));

        viaTAUIns->pvalid = 1;
        viaTAUIns->sfire = 1;
        viaTAUIns->tstage = 0;
        viaTAUIns->op = via_texld;
        viaTAUIns->dreg = dstT_temp;
        viaTAUIns->dindex = 0;
        viaTAUIns->s0index = 0;
        viaTAUIns->s0sel_r = selT_r;
        viaTAUIns->s0sel_g = selT_g;
        viaTAUIns->s0sel_b = selT_b;
        viaTAUIns->s0sel_a = selT_a;
        viaTAUIns->s1index = 0;
        
        OUT_RING(viaTAUIns->uint[0]);
#else
        OUT_RING(0x301001b0);
#endif

        BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x20000000,15); 
        /* PS Instruction update, 2 ALU, each ALU is 4 dword*/

	 /* mad temp1.rgba, temp0.aaaa, V0.xyza, (0, 0, 0, 0)*/
        /* fire temp0 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mad;
        viaALUIns->dreg0 = dst_temp;
        viaALUIns->dindex0 = 1;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 0;
        viaALUIns->s0sel_r = selA_a;
        viaALUIns->s0sel_g = selA_a;
        viaALUIns->s0sel_b = selA_a;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_color;
        viaALUIns->s1index = 0;
        viaALUIns->s1sel_r = selA_r;
        viaALUIns->s1sel_g = selA_g;
        viaALUIns->s1sel_b = selA_b;
        viaALUIns->s1sel_a = selA_a;
        viaALUIns->s2reg = src_preConstant;
        viaALUIns->s2index = 0;
        viaALUIns->s2sel_r = selA_0f;
        viaALUIns->s2sel_g = selA_0f;
        viaALUIns->s2sel_b = selA_0f;
        viaALUIns->s2sel_a = selA_0f;
        
        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x0140b6d0);
        OUT_RING(0xb0040053);
        OUT_RING(0x00f2006d);
        OUT_RING(0x00101060);
#endif
        ///////////ok
        
		
        /* mov oC0.rgba temp1.rgba*/
        /* fire temp1 */
#if VIA_H5_SHADER_INSTRUCTION_TRANSLATE_ENABLE
        memset(viaALUIns, 0x00, sizeof(Via3DALUInstructionRec));

        viaALUIns->op = via_mov;
        viaALUIns->dreg0 = dst_output;
        viaALUIns->dindex0 = 0;
        viaALUIns->dreg1 = dst_nodefined;
        viaALUIns->dindex1 = NO_DEST_INDEX;
        viaALUIns->dwmask = mask_all;
        viaALUIns->s0fire = 1;
        viaALUIns->s0reg = src_temp;
        viaALUIns->s0index = 1;
        viaALUIns->s0sel_r = selA_r;
        viaALUIns->s0sel_g = selA_g;
        viaALUIns->s0sel_b = selA_b;
        viaALUIns->s0sel_a = selA_a;
        viaALUIns->s1reg = src_notrequired;
        viaALUIns->s2reg = src_notrequired;

        OUT_RING(viaALUIns->uint[0]);
        OUT_RING(viaALUIns->uint[1]);
        OUT_RING(viaALUIns->uint[2]);
        OUT_RING(viaALUIns->uint[3]);
#else
        OUT_RING(0x01c00530);
        OUT_RING(0x301C0053);
        OUT_RING(0x00f20105);
        OUT_RING(0x000d0820);
#endif
}

static void
via3DEmitState_H5(Via3DState * v3d, ViaCommandBuffer * cb, Bool forceUpload)
{
    int i;
    Bool saveHas3dState;
    ViaTextureUnit *vTex;

    /*
     * Destination buffer location, format and pitch.
     */
     
    if (forceUpload || v3d->destDirty) {
        v3d->destDirty = FALSE;
	 /*Parameter type:0x01*/
		BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,3);
	 /* dwDstBase should be 256 alignment*/
		OUT_RING_SubA(0x50,v3d->destOffset>>8);
	 /*RT memory location*/
		OUT_RING_SubA(0x51,0);

     /* Dst format pitch should in unit of 32 bytes for linear mode */
	 	OUT_RING_SubA(0x52,v3d->destFormat |(v3d->destPitch >>5));

	}

    if (forceUpload || v3d->blendDirty) {
        v3d->blendDirty = FALSE;
        BEGIN_HEADER2_3D_H5(HC_ParaType_Attr, 8);
        OUT_RING_SubA(EXA_HC_SubA_HABLRCa, 0x00);
        OUT_RING_SubA(EXA_HC_SubA_HABLRCb, 0x00);
        OUT_RING_SubA(EXA_HC_SubA_HABLRFCa, 0x00);
        OUT_RING_SubA(EXA_HC_SubA_HABLRFCb, 0x00);
        OUT_RING_SubA(EXA_HC_SubA_HABLCsat, v3d->blendCol0);
        OUT_RING_SubA(EXA_HC_SubA_HABLCop, v3d->blendCol1);
        OUT_RING_SubA(EXA_HC_SubA_HABLAsat, v3d->blendAl0);
        OUT_RING_SubA(EXA_HC_SubA_HABLAop, v3d->blendAl1);
    }

	/*Texture setting   */
    for (i = 0; i < v3d->numTextures; ++i) {
        vTex = v3d->tex + i;

        if (forceUpload || vTex->textureDirty) {
            vTex->textureDirty = FALSE;

            BEGIN_HEADER2_3D_H5(HC_ParaType_Tex | \
            	(((i == 0) ? EXA_HC_SubType_Tex0 : EXA_HC_SubType_Tex1) << 24),15);
            /* shoud in unit of 256 byte*/
            OUT_RING_SubA(0x00, (vTex->textureLevel0Offset >> 8)); 
            /* 2D texture, enable offset mode?*/
            OUT_RING_SubA(0x18,0x1100 |(vTex->agpTexture ? LOC_SF: LOC_SL)); 
            /* width, height of texture*/
            OUT_RING_SubA(0x20, (vTex->textureLevel0Height << 12) |vTex->textureLevel0Width); 
            OUT_RING_SubA(0x21,0x01);

            /*  bit 15 can decide whether width and height are both power of 2?*/
            if ((vTex->textureLevel0Width == (1<<vTex->textureLevel0WExp)) && (vTex->textureLevel0Height == (1<<vTex->textureLevel0HExp)))
            {
                OUT_RING_SubA(0x22, (1<<15) | (vTex->textureLevel0HExp<<4) | vTex->textureLevel0WExp); 
            }
            else{
                OUT_RING_SubA(0x22, (vTex->textureLevel0HExp<<4) | vTex->textureLevel0WExp); 
            }

            /* set texture format*/
            if(vTex->textureFormat == EXA_HC_HTXnFM_YUY2) {
                /* Refer 3D spec: 0x00: xxx 0x01: BT601(SDTV) 0x10: BT709(HDTV) 0x11: xxx */
                OUT_RING_SubA(0x30, vTex->textureFormat | EXA_HC_HTXnYUV2RGBMode_SDTV);
            }else {
                OUT_RING_SubA(0x30, vTex->textureFormat);
            }

            /* set sampler header*/
            BEGIN_HEADER2_3D_H5(HC_ParaType_Tex | \
            (((i == 0) ? EXA_HC_SubType_Samp0 : EXA_HC_SubType_Samp1) << 24),10);

            //     		OUT_RING((0x31<<24) | (0x10<<16) | (dwTxtFilterInS<<10) | (dwTxtFilterInT<<4));
            OUT_RING_SubA(0x31,(0x10<<16) | vTex->texturefilter);

            /* set clamp mode*/
            OUT_RING_SubA(0x32,((1<<6) | (((unsigned)vTex->textureModesT) << 3) | \
            							((unsigned)vTex->textureModesS)));

        }
    }

    if (forceUpload || v3d->drawingDirty) {
        CARD32 planeMaskLo, planeMaskHi;
        v3d->drawingDirty = FALSE;
		
		BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,15);/*Parameter type:0x01*/
		/*Solid shading color*/
		OUT_RING_SubA(INV_HC_SubA_HSolidCL, (v3d->solidColor & 0x00FFFFFF));    
		/* for H5 there seems no pixel Grid center?? && Solid shading color*/
		OUT_RING_SubA(INV_HC_SubA_HSolidCH, (((v3d->solidColor & 0xFF000000) >> 16) |  \
											(v3d->solidAlpha & 0xFF)));
	 /*Render Target's setting, Raster Operation, Mask of Alpha, Red, Green, Blue channel  */
	 	OUT_RING_SubA(0x53,0x10c0f);
    }
	

    if (forceUpload || v3d->enableDirty) {
        v3d->enableDirty = FALSE;
		
	    BEGIN_HEADER2_3D_H5(HC_ParaType_Attr,15);/*Parameter type:0x01*/
		/*Enable Color Write, pixel shader, texture cache, Alpha blending */
		OUT_RING_SubA(INV_HC_SubA_HEnable1, ((v3d->blend) ? INV_HC_HenABLMRT0_MASK : 0) |  \
												INV_HC_HenDTMRT0_MASK);
		OUT_RING_SubA(INV_HC_SubA_HEnable2, INV_HC_HenLUL2DR_MASK | \
												INV_HC_HenLDIAMOND_MASK |  \
												INV_HC_HenVC_MASK |  \
												INV_HC_HenPS_MASK |  \
												INV_HC_HenTXCH_MASK | \
						   ((v3d->writeColor) ? INV_HC_HenCW_MASK : 0));
    	/* Guardband, value from xp, left*/
    	OUT_RING_SubA(INV_HC_SubA_HGBClipGL, 0x001ed0);
		/* Guardband, value from xp, right*/
    	OUT_RING_SubA(INV_HC_SubA_HGBClipGR, 0x000800);

    }
	
    /* set general texture attribute*/
    BEGIN_HEADER2_3D_H5(HC_ParaType_Tex | 0xfe000000,10); 
    /* texture number, clear texture cache*/
    OUT_RING_SubA(0x00, ((unsigned)v3d->numTextures)<<4 | 0x01);
    /* source of texture 0 and texture 1, dimenstion of texture0 and texture1*/ 
    OUT_RING_SubA(0x01, (0x25<<8) | 0x21);
    /* texture number in VB, 2 Dimension*/
    OUT_RING_SubA(0x08, (0x01<<16) | (0x01<<8) | ((unsigned)v3d->numTextures));

    OUT_RING_SubA(0x0d, 0x810000);
    /*Primitive Setting*/
    BEGIN_HEADER2_3D_H5(INV_HC_ParaType_Vetex, 50);       
    /* length of FVF vertex Length (in 32bits)*/
    OUT_RING_SubA(0x00,(0x06 + v3d->numTextures*2));  

    /*vertex sequence select, x,y,z,w,cs,cd,s,t*/
    OUT_RING_SubA(0x01,0x20100);  
    OUT_RING_SubA(0x02,0x60503);
    /*set texture2 cordinate*/
    switch (v3d->numTextures){
    case 0:
    	break;
    case 1:
    	OUT_RING_SubA(0x03, 0x908);
    	break;
    case 2:
    	OUT_RING_SubA(0x03, 0x0c0908);
      	OUT_RING_SubA(0x04, 0x0d);
    	break;
    default:
    	DEBUG(ErrorF("\n Error number of texture"));
    	break;
    }

    /* triangle fan setting*/
    OUT_RING_SubA(0x20,0x1c5c00);
    /* Solid shading, back face, primitive type */
    OUT_RING_SubA(0x23,0x000f02);
    /* 4 vertices */
    OUT_RING_SubA(0x24,0x000004);
    /* x, y, z, w, Cd, Cs, S, T*/
    OUT_RING_SubA(0x25,0xf6cc00);
    /* vertex parameter mask*/
    OUT_RING_SubA(0x28,0xffcc00);

    OUT_RING(0x40800000);
    OUT_RING(0x41400000);
    OUT_RING(0x4200493f);
                
    OUT_RING(0x43800000);
    OUT_RING(0x44400000);
    OUT_RING(0x4500493f);

    OUT_RING(0x46800000);
    OUT_RING(0x47400000);
    OUT_RING(0x4800003f);

    OUT_RING(0x49000000);
    OUT_RING(0x50000000);
    OUT_RING(0x51000000);
    OUT_RING(0x52147fff);

	/* PS constant*/
	vTex = v3d->tex;
	if (forceUpload || vTex->texBColDirty) {
            saveHas3dState = cb->has3dState;
            vTex->texBColDirty = FALSE;
            CARD32 ulR,ulG,ulB,ulA;
            float colorkey[4];

            BEGIN_HEADER2_3D_H5(HC_ParaType_Pal | 0x22000000,10); 

            /*const 0 --> vTex->texRCa HTXnTBLRCa*/
            ulR = ((vTex->texRCa & 0xFF0000) >> 14) | ((vTex->texRCa & 0xFF0000) >> 22);
            ulG = ((vTex->texRCa & 0xFF00) >> 6) | ((vTex->texRCa & 0xFF00) >> 14);
            ulB = ((vTex->texRCa & 0xFF) << 2) | ((vTex->texRCa& 0xFF) >> 6);
            ulA = ((vTex->texRAa & 0xFF0000) >> 14) | ((vTex->texRAa & 0xFF0000) >> 22);
            colorkey[0] = (float)(ulR / 1024.0); 
            colorkey[1] = (float)(ulG / 1024.0); 
            colorkey[2] = (float)(ulB / 1024.0); 
            colorkey[3] = (float)(ulA / 1024.0); 

            OUT_RING(*(CARD32*)(&colorkey[0]));
            OUT_RING(*(CARD32*)(&colorkey[1]));
            OUT_RING(*(CARD32*)(&colorkey[2]));
            OUT_RING(*(CARD32*)(&colorkey[3]));
			
            cb->has3dState = saveHas3dState;
    }
}

static void via3DEmitPixelShader_H5(Via3DState * v3d, ViaCommandBuffer * cb, 
    int srcFormat, int maskFormat, int  dstFormat)
{
    ViaTextureUnit *vTex1,*vTex2;
    Bool CompAlpha=FALSE;

    vTex1 = v3d->tex;
    vTex2 = v3d->tex + 1;
    switch (v3d->numTextures){
        case 1:
            switch (vTex1->textureBlendMode){
                case via_src:
                    if(PICT_FORMAT_A(srcFormat)){
                        Modulate_H5_via_src(v3d, cb);
                    } else {
                    /*
                    * It seems that no need to take special treatment for NO ALPHA srcFormat !
                    */
                    Modulate_H5_via_src(v3d, cb);
                    }
                    break;
                case via_src_onepix_mask:
                    if(PICT_FORMAT_A(srcFormat)){
                    Modulate_H5_via_src_onepix_mask(v3d, cb);
                    } else {
                    Modulate_H5_via_src_onepix_mask_no_alpha(v3d, cb);
                    }
                    break;
                case via_src_onepix_comp_mask:
                    return ;
                    break;
                case via_mask:
                    Modulate_H5_via_mask(v3d, cb);
                    break;
                case via_comp_mask:
                    return ;
                    break;
                default:
                    return ;
                    break;
            }

        break;

        case 2:
            switch (vTex2->textureBlendMode){
                case via_mask:
                    if(PICT_FORMAT_A(srcFormat)){
                        Modulate_H5_via_mask2(v3d,cb);
                    } else {
                    /*
                    * It seems that no need to take special treatment for NO ALPHA srcFormat !
                    */
                    Modulate_H5_via_mask2(v3d, cb);
                    }
                    break;
                case via_comp_mask:
                    return ;
                    break;
                case mask_Ca:
                    if(PICT_FORMAT_A(srcFormat)){
                        CompAlpha=TRUE;
                    }else{
                        CompAlpha=TRUE;
                    }
                    break;
                default:
                    return ;
                    break;
            }
            if(CompAlpha){
                    switch (vTex1->textureBlendMode){
                    case src_Aa:
                        Modulate_H5_src_comp_Aa_Ca2(v3d,cb,src_Aa);
                        break;
                    case src_Ca:
                        Modulate_H5_src_comp_Aa_Ca2(v3d,cb,src_Ca);
                        break;
                    default:
                        break;
                    }
                    CompAlpha=FALSE;
                }
            break;

        default:
            Modulate_H5_No_Tex(v3d, cb);
            break;

	}
}

static Bool
viaSet3DTexture_H5(Via3DState * v3d, int tex, CARD32 offset, CARD32 pitch, 
   Bool npot, CARD32 width, CARD32 height, int format, ViaTextureModes sMode,
   ViaTextureModes tMode, ViaTexBlendingModes blendingMode, Bool agpTexture, 
   PictTransformPtr matrix, ViaTexFilterModes filter)
{
    ViaTextureUnit *vTex = v3d->tex + tex;

    vTex->textureLevel0Offset = offset;

    vTex->npot = npot;
    vTex->textureLevel0Pitch = pitch;
    viaOrder(width, &vTex->textureLevel0WExp);
    viaOrder(height, &vTex->textureLevel0HExp);
	
	CARD32 bpp = PICT_FORMAT_BPP(format);
	if((((width*bpp + 255)>>8)<<5) != pitch){
		vTex->textureLevel0Width = pitch / (bpp>>3);
	} else {
		vTex->textureLevel0Width = width;
	}
	vTex->textureLevel0Height = height;
	
    if (pitch <= 4) {
    }

    vTex->textureFormat = via3DTexFormat(format);
 	vTex->textureBlendMode = blendingMode;
	vTex->transform = matrix;
	if(!PictureTransformIsAffine(vTex->transform)) 
		return FALSE;

    switch (filter) {
        case via_FilterFast:
        case via_FilterNearest:
            vTex->texturefilter = 0x0;
            break;
        case via_FilterBest:
        case via_FilterGood:
        case via_FilterBilinear:
            vTex->texturefilter = 0x2490;
            break;
        default:
            vTex->texturefilter = 0x0;
	break;
    }
    vTex->textureDirty = TRUE;
    vTex->textureModesS = sMode - via_single;
    vTex->textureModesT = tMode - via_single;

    vTex->agpTexture = agpTexture;
    return TRUE;
}

void
viaInit3DState_H5(Via3DState * v3d)
{
    ViaCompositeOperator *op;
    int i;
    CARD32 tmp, hash;
    Via3DFormat *format;
	
    v3d->setDestination = viaSet3DDestination_H5;
    v3d->setDrawing = viaSet3DDrawing;
    v3d->setFlags = viaSet3DFlags;
    v3d->setTexture = viaSet3DTexture_H5;
    v3d->setTexUVOffset = viaSet3DTexUVOffset;
    v3d->setTexBlendCol = viaSet3DTexBlendCol;
    v3d->opSupported = via3DOpSupported;
    v3d->setCompositeOperator = viaSet3DCompositeOperator;
    v3d->emitQuad = via3DEmitQuad_H5;
    v3d->emitState = via3DEmitState_H5;
    v3d->emitPixelShader = via3DEmitPixelShader_H5;
    v3d->emitClipRect = via3DEmitClipRect_H5;
    v3d->dstSupported = via3DDstSupported;
    v3d->texSupported = via3DTexSupported;
    v3d->MarkSync = viaAccelMarkSync_H5;
    v3d->WaitMarker = viaAccelWaitMarker_H5;

    for (i = 0; i < 256; ++i) {
        viaOperatorModes[i].supported = FALSE;
    }

    for (i = 0; i < VIA_NUM_3D_OPCODES; ++i) {
        op = viaOperatorModes + viaOpCodes[i][0];
        op->supported = TRUE;
        op->col0 = viaOpCodes[i][1];
        op->col1 = viaOpCodes[i][2];
        op->al0 = viaOpCodes[i][3];
        op->al1 = viaOpCodes[i][4];
    }

    for (i = 0; i < 256; ++i) {
        via3DFormats[i].pictFormat = 0x00;
    }
	
    for (i = 0; i < VIA_NUM_3D_FORMATS; ++i) {
        tmp = viaFormats[i][0];
        hash = VIA_FMT_HASH(tmp);
        format = via3DFormats + hash;
        if (format->pictFormat) {
        }
        format->pictFormat = tmp;
        format->dstSupported = (viaFormats[i][3] != 0x00);
        format->texSupported = (viaFormats[i][4] != 0x00);
        format->dstFormat = viaFormats[i][2];
        format->texFormat = viaFormats[i][2];
    }
}

void
viaInit3DState_H6(Via3DState * v3d)
{
    ViaCompositeOperator *op;
    int i;
    CARD32 tmp, hash;
    Via3DFormat *format;
	
    v3d->setDestination = viaSet3DDestination_H5;
    v3d->setDrawing = viaSet3DDrawing;
    v3d->setFlags = viaSet3DFlags;
    v3d->setTexture = viaSet3DTexture_H5;
    v3d->setTexUVOffset = viaSet3DTexUVOffset;
    v3d->setTexBlendCol = viaSet3DTexBlendCol;
    v3d->opSupported = via3DOpSupported;
    v3d->setCompositeOperator = viaSet3DCompositeOperator;
    v3d->emitQuad = via3DEmitQuad_H5;
    v3d->emitState = via3DEmitState_H5;
    v3d->emitPixelShader = via3DEmitPixelShader_H5;
    v3d->emitClipRect = via3DEmitClipRect_H5;
    v3d->dstSupported = via3DDstSupported;
    v3d->texSupported = via3DTexSupported;
    v3d->MarkSync = viaAccelMarkSync_H6;
    v3d->WaitMarker = viaAccelWaitMarker_H6;

    for (i = 0; i < 256; ++i) {
        viaOperatorModes[i].supported = FALSE;
    }

    for (i = 0; i < VIA_NUM_3D_OPCODES; ++i) {
        op = viaOperatorModes + viaOpCodes[i][0];
        op->supported = TRUE;
        op->col0 = viaOpCodes[i][1];
        op->col1 = viaOpCodes[i][2];
        op->al0 = viaOpCodes[i][3];
        op->al1 = viaOpCodes[i][4];
    }

    for (i = 0; i < 256; ++i) {
        via3DFormats[i].pictFormat = 0x00;
    }
	
    for (i = 0; i < VIA_NUM_3D_FORMATS; ++i) {
        tmp = viaFormats[i][0];
        hash = VIA_FMT_HASH(tmp);
        format = via3DFormats + hash;
        if (format->pictFormat) {
        }
        format->pictFormat = tmp;
        format->dstSupported = (viaFormats[i][3] != 0x00);
        format->texSupported = (viaFormats[i][4] != 0x00);
        format->dstFormat = viaFormats[i][2];
        format->texFormat = viaFormats[i][2];
    }
}

void viaAccel2DBlt(ScrnInfoPtr pScrn, via2DBltSrcPtr src, 
    via2DBltDstPtr dst, via2DBltConfigPtr config)
{
    VIAPtr pVia = VIAPTR(pScrn);
    ViaTwodContext *td = &pVia->td;
    
    RING_VARS;

    /*
    if(src->bpp != dst->bpp) {
        return;
    }
    */
    if(!td->setModeHelper(dst->bpp, td)) {
        return;
    }
    /*
    if(!td->planeMaskHelper(td, 0xFFFFFFFF)) {
        return;
    }
    */
    if(config->solidfill) {
        td->cmd = VIA_GEC_BLT |VIA_GEC_FIXCOLOR_PAT| 0xF0<<24;
        td->solidHelper(cb, dst->x, dst->y, dst->w, dst->h, dst->offset, 
            dst->pitch, td->mode, src->color, td->cmd);
    } else {
        td->cmd = VIA_GEC_BLT |0xCC<<24;
        td->transparentHelper(td, cb, config->dstey? dst->colorKey: 0,
            config->srckey? src->colorKey: 0, TRUE);
        td->copyHelper(cb, src->x, src->y, dst->x, dst->y, dst->w, dst->h, src->offset,
            dst->offset, td->mode, src->pitch,  dst->pitch, td->cmd);
    }
        
    ADVANCE_RING;
    return;
}

void
viaAccelTexture3DBlt(ScrnInfoPtr pScrn, viaTexture3DBltSrcPtr src, viaTexture3DBltDstPtr dst,
                    viaTexture3DBltRotationPtr rotation, RegionPtr clip_region)
{
    VIAPtr pVia = VIAPTR(pScrn);
    Via3DState *v3d = &pVia->v3d;
    /* transform : src --> dst ; inverse: dst --> src */
    PictTransform transform, invTrans;
    PictTransform rrTransform, rrInvTrans;

    RegionPtr fake_clip_region = NullRegion;
    BoxRec dst_box;
    BoxPtr pbox;
    int nbox;
    int syncMarker = -1;
    RING_VARS;
    v3d->forceUpload = viaCheckUpload(pScrn, v3d);

    /* if the clip_region is not used, fake a clip_region parameter */
    if(clip_region == NULL)
    {
        dst_box.x1 = dst->x;
        dst_box.y1 = dst->y;
        dst_box.x2 = dst->x + dst->w;
        dst_box.y2 = dst->y + dst->h;
        
        fake_clip_region = REGION_CREATE(pScrn->pScreen, &dst_box, 1);
        clip_region = fake_clip_region;
    }

    viaTransformCompute(src->x, src->y, src->w, src->h, 
                                         dst->x, dst->y, dst->w, dst->h, 
                                         rotation->rotate, (rotation->width+1), 
                                         (rotation->height+1), 
                                         &transform, &invTrans, 
                                         &rrTransform, &rrInvTrans);
    
    /* start of program 3D api for seting 3D Pipeline */
    
    /* the 3D api : step 1 setDestination */
    v3d->setDestination(v3d, dst->offset, dst->pitch, dst->format);
    /* the 3D api : step 2 setDrawing */
    v3d->setDrawing(v3d, 0x0c, 0xFFFFFFFF, 0x000000FF, 0x00);
    /* the 3D api : step 3 setFlags */
    v3d->setFlags(v3d, 1, TRUE, TRUE, FALSE);

    /* the 3D api : step 4 setTexture */
    v3d->setTexture(v3d, 0, src->offset, src->pitch, TRUE,
                            src->surfwidth, src->surfheight, src->format, via_single, via_single,
                            via_src, (src->memLoc == LOC_SF), &invTrans, src->filter);
    if(src->format == PIXMAN_yv12) {
        v3d->setTexUVOffset(v3d, 0, src->uoffset, src->voffset);
    }
    pbox = REGION_RECTS(clip_region);
    nbox = REGION_NUM_RECTS(clip_region);
    
    while(nbox--)
    {
        dst_box = *pbox;

        if(rotation->rotate != RR_Rotate_0) {
            /* xorg version 1.60 */
#if XORG_VERSION_CURRENT  >= (((1) * 10000000) + ((6) * 100000) + ((0) * 1000) + 0)
            pixman_transform_bounds(&rrTransform, &dst_box);
#else
            PictureTransformBounds(&dst_box, &rrTransform);
#endif
        }

        if(!H2_UMA_CHIPID) {
            Command_CRSync_2D3D(&pVia->cb,(unsigned int)pVia->ChipId, FLAG_WAIT_3D_IDLE);
        }

        /* the 3D api : step 5 emitState */
        v3d->emitState(v3d, &pVia->cb, v3d->forceUpload);

        /* the 3D api : step 6 emitPixelShader */
        v3d->emitPixelShader(v3d, &pVia->cb, src->format, 0, dst->format);

        /* the 3D api : step 7 emitClipRect */
        if(rotation->rotate != RR_Rotate_0)
        {
            dst_box.x1 = dst->x;
            dst_box.y1 = dst->y;
            dst_box.x2 = dst->x + dst->w;
            dst_box.y2 = dst->y + dst->h;

            /* xorg version 1.60 */
#if XORG_VERSION_CURRENT  >= (((1) * 10000000) + ((6) * 100000) + ((0) * 1000) + 0)
            pixman_transform_bounds(&rrTransform, &dst_box);
#else
            PictureTransformBounds(&dst_box, &rrTransform);
#endif

            v3d->emitClipRect(v3d, &pVia->cb, dst_box.x1, dst_box.y1, 
            (dst_box.x2 -dst_box.x1), 
            (dst_box.y2 -dst_box.y1));
        }
        else
            v3d->emitClipRect(v3d, &pVia->cb, dst->x, dst->y, dst->w, dst->h);

        if(H2_UMA_CHIPID) {
            if(syncMarker >= 0)
                v3d->WaitMarker(pScrn->pScreen, syncMarker);
        }

        /* the 3D api : step 8 emitQuad */
        v3d->emitQuad(v3d, &pVia->cb, dst_box.x1, dst_box.y1, dst_box.x1, dst_box.y1, 
        0, 0, (dst_box.x2 -dst_box.x1), (dst_box.y2 -dst_box.y1));

        pbox++;

        if(H2_UMA_CHIPID) {
            syncMarker = v3d->MarkSync(pScrn->pScreen);
        }
    }

    if(H2_UMA_CHIPID) {
        if(syncMarker >= 0)
            v3d->WaitMarker(pScrn->pScreen, syncMarker);
    }

    if(!H2_UMA_CHIPID) {
        Command_CRSync_2D3D(&pVia->cb,(unsigned int)pVia->ChipId, FLAG_WAIT_3D_IDLE);
    }

    if(fake_clip_region != NullRegion)
        REGION_DESTROY(pScrn->pScreen, fake_clip_region);

    ADVANCE_RING;
    return;
}
