NGLE

This page aims to reveal those pieces needed for hardware acceleration on NGLE family of graphics cards, which vendor decided to keep obscure.

Copy area

ngledoblt.S is from X11 support patch for MkLinux on PA-RISC. It comes in assembler form only, so here’s the translation

.import ngleScreenPrivIndex,data

ngleDepth8_CopyAreaFromToScreen
    ldo 0x40(sp),sp
    stw arg0,-0x64(sp) ;pSrcDraw
    stw arg1,-0x68(sp) ;pDstDraw
    stw arg2,-0x6c(sp) ;pptSrc->x
    stw arg3,-0x70(sp) ;pptSrc->y
    ldw -0x68(sp),r1  ;pDstDraw
    ldw 0x10(r1),r31  ;->pScreen
    ldw 0x19c(r31),t4 ;->devPrivates
    ldil L%ngleScreenPrivIndex,t3
    ldw  R%ngleScreenPrivIndex(t3),t3
    ldwx,s t3(t4),t2 ;devPrivates[ngleScreenPrivIndex].ptr => NgleScreenPrivPtr pScreenPriv
    stw t2,-0x38(sp)
    ldw -0x38(sp),t1 ;pScreenPriv
    ldw 0x14(t1),r1  ;->pDregs
    stw r1,-0x34(sp)
    ldw -0x34(sp),r31
    zdepi 1,10,1,t4
    add t4,r31,t3 ;pDregs+0x200000 => REG_15b0
    stw t3,-0x28(sp)
    ldw -0x28(sp),t2
;SETUP_HW()
$ngle2:
    ldb 0(t2),t1 ;read1(REG_15b0) => val1
    extrs t1,31,8,r1
    stb r1,-0x2c(sp)
    ldb -0x2c(sp),r31
    extrs r31,31,8,t4
    comibf,=,n 0,t4,$ngle1 ;if (val1) goto ngle1
    ldw -0x28(sp),t3
    ldb 0(t3),t2 ;read1(REG_15b0) => val1
    extrs t2,31,8,t1
    stb t1,-0x2c(sp)
$ngle1:
    ldb -0x2c(sp),r1
    extrs r1,31,8,r31
    comibf,=,n 0,r31,$ngle2 ;if (val1) goto ngle2
;
    ldw -0x28(sp),t2
    ldw -0x38(sp),t4 ;pScreenPriv
    ldw 0x10(t4),t3  ;->deviceID
    .word 0x22B88578    /*  ldil 0x2bcb0000,t2 */
    ldo 0x15a(t2),t1
    combf,=,n t3,t1,$ngle3 ;if (deviceID != S9000_ID_HCRX) goto ngle3
    ldw -0x34(sp),r1
    .word 0x23e10274    /* ldil 0x13a02000,r31 */
    .word 0x282c0000    /* addil 0x18000,r1 */
    stw r31,0(r1) ;write4(0x13a02000,REG_10)
    b $ngle4
    ldw -0x78(sp),t2 ;arg5 = alu
$ngle3:
    ldw -0x34(sp),t4
    .word 0x22802274    /* ldil 0x13a01000,t3 */
    .word 0x2a6c0000    /* addil 0x18000,t4 */
    stw t3,0(r1) ;write4(0x13a01000,REG_10)
    ldw -0x78(sp),t2
$ngle4:
    .word 0x23e00460    /* ldil 0x23000000,r31 */
    zdep t2,23,4,r1 ;(alu << (31-23)) & 0x00000f00
    or r1,r31,t4    ;| 0x23000000 => val2
    ldw -0x34(sp),t3
    .word 0x2a8c0000    /* addil 0x18000,t3 */
    stw t4,0x1c(r1) ;write4(val2,REG_14)
    ldw -0x34(sp),t2
    ldw -0x7c(sp),t1 ;arg6 = planeMask
    .word 0x2aac0000    /* addil 0x18000,t2 */
    stw t1,0x18(r1) ;write4(planeMask,REG_13)
    ldw -0x6c(sp),r1
    sth r1,-0x30(sp) ;x
    ldw -0x70(sp),r31
    sth r31,-0x2e(sp) ;y
    ldw -0x34(sp),t4
    ldw -0x30(sp),t3
    stw t3,0x808(t4) ;write4((x << 16) | y,REG_24)
    ldw -0x74(sp),t2 ;arg4 = pbox
    ldh 4(t2),t1     ;->x2
    extrs t1,31,16,r1
    ldw -0x74(sp),r31 ;pbox
    ldh 0(r31),t4     ;->x1
    extrs t4,31,16,t3
    sub r1,t3,t2
    sth t2,-0x30(sp) ;x2 - x1 => w
    ldw -0x74(sp),t1 ;pbox
    ldh 6(t1),r1     ;->y2
    extrs r1,31,16,r31
    ldw -0x74(sp),t4 ;pbox
    ldh 2(t4),t3     ;->y1
    extrs t3,31,16,t2
    sub r31,t2,t1
    sth t1,-0x2e(sp) ;y2 - y1 => h
    ldw -0x34(sp),r1
    ldw -0x30(sp),r31
    stw r31,0x804(r1) ;write4((w << 16) | h,REG_7)
    ldw -0x74(sp),t4 ;pbox
    ldh 0(t4),t3     ;->x1
    extrs t3,31,16,t2
    sth t2,-0x30(sp)
    ldw -0x74(sp),t1 ;pbox
    ldh 2(t1),r1     ;->y1
    extrs r1,31,16,r31
    sth r31,-0x2e(sp)
    ldw -0x34(sp),t4
    ldw -0x30(sp),t3
    stw t3,0xb00(t4) ;write4((x1 << 16) | y1,REG_25)
    bv r0(rp)
    ldo -0x40(sp),sp

ngleDepth24_CopyAreaFromToScreen differs only in reg10 init: write4(-0x445f6000,reg10) which resembles SETUP_FB() routine.

ngledoblt.o.8.07 is an original blob realizing bit blit function provided by vendor to X11 project, which matches ngledoblt.S. 8.07 means max STI ROM revision:

$CODE$:00000000 # Input MD5   : 195E3A8D90B0DFE5D09F3E05E2B4D9B7
$CODE$:00000000
$CODE$:00000000 ngleDepth8_CopyAreaFromToScreen:
$CODE$:00000000
$CODE$:00000000 planeMask       =  0x3C
$CODE$:00000000 alu             =  0x38
$CODE$:00000000 pbox            =  0x34
$CODE$:00000000 REG_15b0        = -0x3C
$CODE$:00000000 val1            = -0x43
$CODE$:00000000 x               = -0x44
$CODE$:00000000 y               = -0x46
$CODE$:00000000 pDregs          = -0x48
$CODE$:00000000 pScreenPriv     = -0x4C
$CODE$:00000000
$CODE$:00000000                 ldo             0x80(%sp), %sp
$CODE$:00000004                 ldw             0x10(%r25), %r1
$CODE$:00000008                 ldw             0x19C(%r1), %r31
$CODE$:0000000C                 addil           0, %dp, %r1
$CODE$:00000010                 ldw             0(%r1), %r19
$CODE$:00000014                 ldw,s           %r19(%r31), %r20
$CODE$:00000018                 stw             %r20, -(0x80+pScreenPriv)(%sp)
$CODE$:0000001C                 ldw             -(0x80+pScreenPriv)(%sp), %r21
$CODE$:00000020                 ldw             0x14(%r21), %r22
$CODE$:00000024                 stw             %r22, -(0x80+pDregs)(%sp)
$CODE$:00000028                 ldw             -(0x80+pDregs)(%sp), %r1
$CODE$:0000002C                 addil           0x200000, %r1, %r1
$CODE$:00000030                 stw             %r1, -(0x80+REG_15b0)(%sp)
$CODE$:00000034                 ldw             -(0x80+REG_15b0)(%sp), %r31
$CODE$:00000034
$CODE$:00000038
$CODE$:00000038 ngle2:                                  # CODE XREF: ngleDepth8_CopyAreaFromToScreen+58
$CODE$:00000038                 ldb             0(%r31), %r19
$CODE$:0000003C                 stb             %r19, -(0x80+val1)(%sp)
$CODE$:00000040                 ldb             -(0x80+val1)(%sp), %r20
$CODE$:00000044                 cmpib,<>,n      0, %r20, ngle1
$CODE$:00000044
$CODE$:00000048                 ldw             -(0x80+REG_15b0)(%sp), %r21
$CODE$:0000004C                 ldb             0(%r21), %r22
$CODE$:00000050                 stb             %r22, -(0x80+val1)(%sp)
$CODE$:00000050
$CODE$:00000054
$CODE$:00000054 ngle1:                                  # CODE XREF: ngleDepth8_CopyAreaFromToScreen+44
$CODE$:00000054                 ldb             -(0x80+val1)(%sp), %r1
$CODE$:00000058                 cmpib,<>,n      0, %r1, ngle2
$CODE$:00000058
$CODE$:0000005C                 ldw             -(0x80+REG_15b0)(%sp), %r31
$CODE$:00000060                 ldw             -(0x80+pDregs)(%sp), %r31
$CODE$:00000064                 ldil            0x13A01000, %r19
$CODE$:00000068                 addil           0x18000, %r31, %r1
$CODE$:0000006C                 stw             %r19, 0(%r1)
$CODE$:00000070                 ldw             -(0x80+alu)(%sp), %r20
$CODE$:00000074                 ldil            0x23000000, %r1
$CODE$:00000078                 depw,z          %r20, 23, 4, %r22
$CODE$:0000007C                 or              %r22, %r1, %r31
$CODE$:00000080                 ldw             -(0x80+pDregs)(%sp), %r19
$CODE$:00000084                 addil           0x18000, %r19, %r1
$CODE$:00000088                 stw             %r31, 0x1C(%r1)
$CODE$:0000008C                 ldw             -(0x80+planeMask)(%sp), %r20
$CODE$:00000090                 ldw             -(0x80+pDregs)(%sp), %r21
$CODE$:00000094                 addil           0x18000, %r21, %r1
$CODE$:00000098                 stw             %r20, 0x18(%r1)
$CODE$:0000009C                 sth             %r24, -(0x80+x)(%sp)
$CODE$:000000A0                 sth             %r23, -(0x80+y)(%sp)
$CODE$:000000A4                 ldo             -(0x80+x)(%sp), %r22
$CODE$:000000A8                 ldw             -(0x80+pDregs)(%sp), %r1
$CODE$:000000AC                 ldo             0x808(%r1), %r31
$CODE$:000000B0                 ldw             0(%r22), %r19
$CODE$:000000B4                 stw             %r19, 0(%r31)
$CODE$:000000B8                 ldw             -(0x80+pbox)(%sp), %r20
$CODE$:000000BC                 ldh             4(%r20), %r21
$CODE$:000000C0                 extrw           %r21, 31, 16, %r1
$CODE$:000000C4                 ldw             -(0x80+pbox)(%sp), %r31
$CODE$:000000C8                 ldh             0(%r31), %r19
$CODE$:000000CC                 extrw           %r19, 31, 16, %r20
$CODE$:000000D0                 sub             %r1, %r20, %r21
$CODE$:000000D4                 sth             %r21, -(0x80+x)(%sp)
$CODE$:000000D8                 ldw             -(0x80+pbox)(%sp), %r1
$CODE$:000000DC                 ldh             6(%r1), %r31
$CODE$:000000E0                 extrw           %r31, 31, 16, %r19
$CODE$:000000E4                 ldw             -(0x80+pbox)(%sp), %r20
$CODE$:000000E8                 ldh             2(%r20), %r21
$CODE$:000000EC                 extrw           %r21, 31, 16, %r1
$CODE$:000000F0                 sub             %r19, %r1, %r31
$CODE$:000000F4                 sth             %r31, -(0x80+y)(%sp)
$CODE$:000000F8                 ldw             -(0x80+pDregs)(%sp), %r19
$CODE$:000000FC                 ldo             0x804(%r19), %r20
$CODE$:00000100                 ldw             0(%r22), %r21
$CODE$:00000104                 stw             %r21, 0(%r20)
$CODE$:00000108                 ldw             -(0x80+pbox)(%sp), %r1
$CODE$:0000010C                 ldh             0(%r1), %r31
$CODE$:00000110                 sth             %r31, -(0x80+x)(%sp)
$CODE$:00000114                 ldw             -(0x80+pbox)(%sp), %r19
$CODE$:00000118                 ldh             2(%r19), %r20
$CODE$:0000011C                 sth             %r20, -(0x80+y)(%sp)
$CODE$:00000120                 ldw             -(0x80+pDregs)(%sp), %r21
$CODE$:00000124                 ldo             0xB00(%r21), %r1
$CODE$:00000128                 ldw             0(%r22), %r31
$CODE$:0000012C                 stw             %r31, 0(%r1)
$CODE$:00000130                 bv              %r0(%rp)
$CODE$:00000134                 ldo             -0x80(%sp), %sp

Resurrected and properly working ngledoblt.c to be used under X11R6.3 and vendor cfb X server:

void ngleDepth8_CopyAreaFromToScreen(DrawablePtr pSrcDraw, DrawablePtr pDstDraw,
                short srcx, short srcy, BoxPtr pbox, int alu, unsigned long planeMask)
{
        NgleScreenPrivPtr   pScreenPriv;
        NgleHdwPtr          pDregs;

        pScreenPriv = NGLE_SCREEN_PRIV(pDstDraw->pScreen);
        pDregs = (NgleHdwPtr) pScreenPriv->pDregs;

        SETUP_HW(pDregs);

        if (pScreenPriv->deviceID != S9000_ID_HCRX)
             pDregs->reg10 = 0x13a01000;
        else
             pDregs->reg10 = 0x13a02000;

        pDregs->reg14.all = ((alu << 8) & 0x00000f00) | 0x23000000; // raster op
        pDregs->reg13 = planeMask;

        pDregs->reg24.all = (srcx << 16) | srcy;
        pDregs->reg7.all = ((pbox->x2 - pbox->x1) << 16) | (pbox->y2 - pbox->y1);
        pDregs->reg25.all = (pbox->x1 << 16) | pbox->y1; // destination
}

Rough x11perf results under MkLinux showing the difference

Type

scroll10

scroll100

scroll500

copywinwin10

copywinwin100

copywinwin500

soft

12300

457

21

15700

432

20

hw

26700

3380

157

26800

3380

158