Subversion Repositories pentevo

Rev

Rev 716 | Blame | Last modification | View Log | Download | RSS feed

  1. #include "std.h"
  2.  
  3. #include "emul.h"
  4. #include "vars.h"
  5. #include "dxrend.h"
  6. #include "dxrcopy.h"
  7. #include "dxr_512.h"
  8. #include "dxr_4bpp.h"
  9. #include "dxr_prof.h"
  10. #include "dxr_atm.h"
  11. #include "draw.h"
  12. #include "util.h"
  13.  
  14. void rend_small(unsigned char *dst, unsigned pitch)
  15. {
  16.     if (temp.obpp == 8)  { rend_copy8 (dst, pitch); return; }
  17.     if (temp.obpp == 16) { rend_copy16(dst, pitch); return; }
  18.     if (temp.obpp == 32) { rend_copy32(dst, pitch); return; }
  19. }
  20.  
  21. void __fastcall render_small(unsigned char *dst, unsigned pitch)
  22. {
  23.    if (conf.noflic)
  24.    {
  25.       if (temp.obpp == 8)  { rend_copy8_nf (dst, pitch); }
  26.       if (temp.obpp == 16) { rend_copy16_nf(dst, pitch); }
  27.       if (temp.obpp == 32) { rend_copy32_nf(dst, pitch); }
  28.       memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  29.       return;
  30.    }
  31.  
  32.    if (comp.pEFF7 & EFF7_4BPP)
  33.    {
  34.        rend_p4bpp_small(dst, pitch);
  35.        return;
  36.    }
  37.  
  38.    if (conf.mem_model == MM_ATM450)
  39.    {
  40.        rend_atm_1_small(dst, pitch);
  41.        return;
  42.    }
  43.  
  44.    if (conf.mem_model == MM_ATM710 || conf.mem_model == MM_ATM3)
  45.    {
  46.        rend_atm_2_small(dst, pitch);
  47.        return;
  48.    }
  49.    rend_small(dst, pitch);
  50. }
  51.  
  52. void rend_dbl(unsigned char *dst, unsigned pitch)
  53. {
  54.    if (temp.oy > temp.scy && conf.fast_sl)
  55.        pitch *= 2;
  56.  
  57.    if (conf.noflic)
  58.    {
  59.       if (temp.obpp == 8)
  60.       {
  61.           if (conf.fast_sl)
  62.               rend_copy8d1_nf (dst, pitch);
  63.           else
  64.               rend_copy8d_nf (dst, pitch);
  65.       }
  66.       else if (temp.obpp == 16)
  67.       {
  68.           if (conf.fast_sl)
  69.               rend_copy16d1_nf(dst, pitch);
  70.           else
  71.               rend_copy16d_nf(dst, pitch);
  72.       }
  73.       else if (temp.obpp == 32)
  74.       {
  75.           if (conf.fast_sl)
  76.               rend_copy32d1_nf(dst, pitch);
  77.           else
  78.               rend_copy32d_nf(dst, pitch);
  79.       }
  80.  
  81.       memcpy(rbuf_s, rbuf, temp.scy * temp.scx / 4);
  82.    }
  83.    else
  84.    {
  85.       if (temp.obpp == 8)
  86.       {
  87.           if (conf.fast_sl)
  88.               rend_copy8d1 (dst, pitch);
  89.           else
  90.               rend_copy8d (dst, pitch);
  91.           return;
  92.       }
  93.       if (temp.obpp == 16)
  94.       {
  95.           if (conf.fast_sl)
  96.               rend_copy16d1(dst, pitch);
  97.           else
  98.               rend_copy16d(dst, pitch);
  99.           return;
  100.       }
  101.       if (temp.obpp == 32)
  102.       {
  103.           if (conf.fast_sl)
  104.               rend_copy32d1(dst, pitch);
  105.           else
  106.               rend_copy32d(dst, pitch);
  107.           return;
  108.       }
  109.    }
  110. }
  111.  
  112. void __fastcall render_dbl(unsigned char *dst, unsigned pitch)
  113. {
  114.    #ifdef MOD_VID_VD
  115.    if ((comp.pVD & 8) && temp.obpp == 8)
  116.    {
  117.        rend_vd8dbl(dst, pitch);
  118.        return;
  119.    }
  120.    #endif
  121.  
  122.    // todo: add ini option to show zx-screen with palette or with MC
  123.    if (comp.pEFF7 & EFF7_512)
  124.    {
  125.        rend_512(dst, pitch);
  126.        return;
  127.    }
  128.    if (comp.pEFF7 & EFF7_4BPP)
  129.    {
  130.        rend_p4bpp(dst, pitch);
  131.        return;
  132.    }
  133.    if ((comp.pDFFD & 0x80) && conf.mem_model == MM_PROFI)
  134.    {
  135.        rend_profi(dst, pitch);
  136.        return;
  137.    }
  138.    if (conf.mem_model == MM_ATM450)
  139.    {
  140.        rend_atm_1(dst, pitch);
  141.        return;
  142.    }
  143.    if (conf.mem_model == MM_ATM710 || conf.mem_model == MM_ATM3)
  144.    {
  145.        rend_atm_2(dst, pitch);
  146.        return;
  147.    }
  148.  
  149.    rend_dbl(dst, pitch);
  150. }
  151.  
  152. void __fastcall render_3x(unsigned char *dst, unsigned pitch)
  153. {
  154.    if (conf.noflic) {
  155.       if (temp.obpp == 8)  rend_copy8t_nf (dst, pitch);
  156.       if (temp.obpp == 16) rend_copy16t_nf(dst, pitch);
  157.       if (temp.obpp == 32) rend_copy32t_nf(dst, pitch);
  158.       memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  159.    }
  160.    else
  161.    {
  162.       if (temp.obpp == 8)  { rend_copy8t (dst, pitch); return; }
  163.       if (temp.obpp == 16) { rend_copy16t(dst, pitch); return; }
  164.       if (temp.obpp == 32) { rend_copy32t(dst, pitch); return; }
  165.    }
  166. }
  167.  
  168. void __fastcall render_quad(unsigned char *dst, unsigned pitch)
  169. {
  170.    if (conf.noflic) {
  171.       if (temp.obpp == 8)  rend_copy8q_nf (dst, pitch);
  172.       if (temp.obpp == 16) rend_copy16q_nf(dst, pitch);
  173.       if (temp.obpp == 32) rend_copy32q_nf(dst, pitch);
  174.       memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  175.    } else {
  176.       if (temp.obpp == 8)  { rend_copy8q (dst, pitch); return; }
  177.       if (temp.obpp == 16) { rend_copy16q(dst, pitch); return; }
  178.       if (temp.obpp == 32) { rend_copy32q(dst, pitch); return; }
  179.    }
  180. }
  181.  
  182.  
  183. void __fastcall render_scale(unsigned char *dst, unsigned pitch)
  184. {
  185.    unsigned char *src = rbuf;
  186.    unsigned dx = temp.scx / 4;
  187.    unsigned char buf[MAX_WIDTH*2];
  188.    unsigned x; //Alone Coder 0.36.7
  189.    for (unsigned y = 0; y < temp.scy-1; y++)
  190.    {
  191.       for (x = 0; x < dx; x += 2)
  192.       {
  193.          unsigned xx = (t.dbl[src[x]] << 16) + t.dbl[src[x+2]];
  194.          unsigned yy = (t.dbl[src[x+dx]] << 16) + t.dbl[src[x+dx+2]];
  195.          unsigned x1 = xx | (yy & ((xx>>1) | (xx<<1)));
  196.          unsigned *tab0 = t.sctab8[0] + (src[x+1] << 4);
  197.          *(unsigned*)(dst+x*8+ 0)   = tab0[(x1>>28) & 0x0F];
  198.          *(unsigned*)(dst+x*8+ 4)   = tab0[(x1>>24) & 0x0F];
  199.          *(unsigned*)(dst+x*8+ 8)   = tab0[(x1>>20) & 0x0F];
  200.          *(unsigned*)(dst+x*8+12)   = tab0[(x1>>16) & 0x0F];
  201.          unsigned *tab1 = t.sctab8[0] + src[x+3];
  202.          *(unsigned*)(dst+x*8+16)   = tab1[(x1>>12) & 0x0F];
  203.          *(unsigned*)(dst+x*8+20)   = tab1[(x1>> 8) & 0x0F];
  204.          *(unsigned*)(dst+x*8+24)   = tab1[(x1>> 4) & 0x0F];
  205.          *(unsigned*)(dst+x*8+28)   = tab1[(x1>> 0) & 0x0F];
  206.          x1 = yy | (xx & ((yy>>1) | (yy<<1)));
  207.          *(unsigned*)(buf+x*8+ 0)   = tab0[(x1>>28) & 0x0F];
  208.          *(unsigned*)(buf+x*8+ 4)   = tab0[(x1>>24) & 0x0F];
  209.          *(unsigned*)(buf+x*8+ 8)   = tab0[(x1>>20) & 0x0F];
  210.          *(unsigned*)(buf+x*8+12)   = tab0[(x1>>16) & 0x0F];
  211.          *(unsigned*)(buf+x*8+16)   = tab1[(x1>>12) & 0x0F];
  212.          *(unsigned*)(buf+x*8+20)   = tab1[(x1>> 8) & 0x0F];
  213.          *(unsigned*)(buf+x*8+24)   = tab1[(x1>> 4) & 0x0F];
  214.          *(unsigned*)(buf+x*8+28)   = tab1[(x1>> 0) & 0x0F];
  215.       }
  216.       dst += pitch;
  217.       for (x = 0; x < temp.ox; x += 4)
  218.           *(unsigned*)(dst+x) = *(unsigned*)(buf+x);
  219.       src += dx; dst += pitch;
  220.    }
  221. }
  222.  
  223. static u64 mask49 = 0x4949494949494949ULL;
  224. static u64 mask92 = 0x9292929292929292ULL;
  225.  
  226. static void /*__declspec(naked)*/ __fastcall _bil_line1(unsigned char *dst, unsigned char *src)
  227. {
  228.     for (unsigned i = 0; i < temp.scx; i += 2)
  229.     {
  230.        dst[i] = src[i];
  231.        dst[i+1] = ((src[i] + src[i+1]) >> 1);
  232.     }
  233. /*
  234.    __asm {
  235.  
  236.       push ebx
  237.       push edi
  238.       push ebp
  239.  
  240.       mov  ebp, [temp.scx]
  241.       xor  eax, eax
  242.       xor  ebx, ebx // ebx - prev. pixel
  243.       shr ebp,1
  244.  
  245. l1:
  246.       mov  al, [edx]
  247.       xadd eax, ebx
  248.       shr  eax, 1
  249.       mov  [ecx+1], bl
  250.       mov  [ecx], al
  251.       mov  al, [edx+1]
  252.       add  ecx, 4
  253.       xadd eax, ebx
  254.       add  edx, 2
  255.       shr  eax, 1
  256.       mov  [ecx-1], bl
  257.       dec  ebp
  258.       mov  [ecx-2], al
  259.       jnz l1
  260.  
  261.       pop ebp
  262.       pop edi
  263.       pop ebx
  264.       retn
  265.    }
  266. */
  267. }
  268.  
  269. static void /*__declspec(naked)*/ __fastcall _bil_line2(unsigned char *dst, unsigned char *s1)
  270. {
  271.       u32 *s = (u32 *)s1;
  272.       u32 *d = (u32 *)dst;
  273.  
  274.       for (unsigned j = 0; j < temp.ox/4; j++)
  275.       {
  276.           u32 a = s[j];
  277.           u32 b = s[j+2*MAX_WIDTH/4];
  278.           u32 x = a & b;
  279.           u32 y = (a ^ b) >> 1;
  280.           u32 z = a | b;
  281.           u32 n = x << 1;
  282.           u32 v1 = x ^ y;
  283.           v1 &= 0x49494949;
  284.           u32 v2 = z & n;
  285.           v2 |= x;
  286.           v2 &= 0x92929292;
  287.  
  288.           d[j] = v1 | v2;
  289.       }
  290.  
  291. /*
  292.    __asm {
  293.  
  294.       mov  eax, [temp.ox]
  295.       movq mm2, [mask49]
  296.       movq mm3, [mask92]
  297.       shr  eax, 3
  298.  
  299. m2:   movq  mm0, [edx]
  300.       movq  mm1, [edx+MAX_WIDTH*2]
  301.       movq  mm4, mm0
  302.       movq  mm5, mm0
  303.       pand  mm4, mm1    // mm4 = a & b
  304.       pxor  mm5, mm1    // mm5 = a ^ b
  305.       movq  mm6, mm0
  306.       psrlq mm5, 1      // mm5 = (a ^ b) >> 1
  307.       por   mm6, mm1    // mm6 = a | b
  308.       movq  mm7, mm4
  309.       pxor  mm5, mm4    // mm5 = (a & b) ^ ((a ^ b) >> 1)
  310.       psllq mm7, 1      // mm7 = (a & b) << 1
  311.       pand  mm5, mm2    // mm5 = 0x49494949 & ((a & b) ^ ((a ^ b) >> 1))
  312.       pand  mm7, mm6    // mm7 = (a|b) & ((a & b) << 1)
  313.       por   mm7, mm4    // mm7 = (a&b) | ((a|b)&((a&b)<<1))
  314.       add   ecx, 8
  315.       pand  mm7, mm3    // mm7 &= 0x92929292
  316.       add  edx, 8
  317.       por   mm7, mm5
  318.       dec  eax
  319.       movq [ecx-8], mm7
  320.       jnz  m2
  321.  
  322.       retn
  323.    }
  324. */
  325. }
  326.  
  327. void __fastcall render_bil(unsigned char *dst, unsigned pitch)
  328. {
  329.    render_small(snbuf, MAX_WIDTH);
  330.  
  331.    unsigned char *src = snbuf;
  332.    unsigned char ATTR_ALIGN(16) l1[MAX_WIDTH*4];
  333.    #define l2 (l1+MAX_WIDTH*2)
  334.    _bil_line1(l1, src); src += MAX_WIDTH;
  335.    memcpy(dst, l1, temp.ox);
  336.    dst += pitch;
  337.  
  338.    for (unsigned i = temp.scy/2-1; i; i--)
  339.    {
  340.       _bil_line1(l2, src); src += MAX_WIDTH;
  341.       _bil_line2(dst, l1); dst += pitch;
  342.       memcpy(dst, l2, temp.ox);
  343.       dst += pitch;
  344.  
  345.       _bil_line1(l1, src); src += MAX_WIDTH;
  346.       _bil_line2(dst, l1); dst += pitch;
  347.       memcpy(dst, l1, temp.ox);
  348.       dst += pitch;
  349.    }
  350.    _bil_line1(l2, src); src += MAX_WIDTH;
  351.    _bil_line2(dst, l1); dst += pitch;
  352.    memcpy(dst, l2, temp.ox);
  353.    dst += pitch;
  354.    memcpy(dst, l2, temp.ox);
  355.    #undef l2
  356.  
  357. //   _mm_empty();
  358. }
  359.  
  360. void __fastcall render_tv(unsigned char *dst, unsigned pitch)
  361. {
  362. // ripped from ccs and *highly* simplified and optimized
  363.  
  364.    unsigned char midbuf[MAX_WIDTH*2];
  365.    unsigned char line[MAX_WIDTH*2+4*2], line2[MAX_WIDTH*2];
  366.  
  367.    unsigned j; //Alone Coder 0.36.7
  368.    for (/*unsigned*/ j = 0; j < MAX_WIDTH/2; j++)
  369.       *(unsigned*)(midbuf+j*4) = WORD4(0,0x80,0,0x80);
  370.  
  371.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  372.  
  373.    for (unsigned i = temp.scy; i; i--) {
  374.       *(unsigned*)line = *(unsigned*)(line+4) = WORD4(0,0x80,0,0x80);
  375.  
  376.       if (conf.noflic) line16_nf(line+8, src, t.sctab16_nf[0]);
  377.       else line16(line+8, src, t.sctab16[0]);
  378.  
  379.       src += delta;
  380.  
  381.       for (j = 0; j < temp.scx; j++) {
  382.  
  383.          unsigned Y = line[j*2+8]*9+
  384.                       line[j*2-2+8]*4+
  385.                       line[j*2-4+8]*2+
  386.                       line[j*2-8+8];
  387. /*
  388.          unsigned U = line[j*2+8+1]*12 +
  389.                       line[j*2-2+8+1]*2+
  390.                       line[j*2-4+8+1]+
  391.                       line[j*2-8+8+1];
  392. */
  393.          line2[j*2] = Y>>4;
  394. //         line2[j*2+1] = U>>4;
  395.          line2[j*2+1] = line[j*2+9];
  396.       }
  397.       // there must be only fixed length fader buffer
  398.       for (j = 0; j < temp.scx/2; j++) {
  399.          *(unsigned*)(midbuf+j*4) = *(unsigned*)(dst + j*4) =
  400.          ((*(unsigned*)(midbuf+j*4) & 0xFEFEFEFE)/2 + (*(unsigned*)(line2+j*4) & 0xFEFEFEFE)/2);
  401.       }
  402.       dst += pitch;
  403.    }
  404.    if (conf.noflic) memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  405. }
  406.  
  407.