Subversion Repositories pentevo

Rev

Rev 716 | Blame | Last modification | View Log | Download | RSS feed

  1. #include "std.h"
  2.  
  3. #include "emul.h"
  4. #include "vars.h"
  5. #include "draw.h"
  6. #include "dxrcopy.h"
  7.  
  8. // #define QUAD_BUFFER  // tests show that this variant is slower, even in noflic mode
  9.  
  10. void line32_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  11. {
  12.    for (unsigned x = 0; x < temp.scx*4; x += 32) {
  13.       unsigned char byte = *src;
  14.       unsigned *t1 = tab + src[1];
  15.       unsigned char byt1 = src[rb2_offs];
  16.       unsigned *t2 = tab + src[rb2_offs+1];
  17.       src += 2;
  18.  
  19.       *(unsigned*)(dst+x)    = t1[(byte << 1) & 0x100] +
  20.                                t2[(byt1 << 1) & 0x100];
  21.       *(unsigned*)(dst+x+4)  = t1[(byte << 2) & 0x100] +
  22.                                t2[(byt1 << 2) & 0x100];
  23.       *(unsigned*)(dst+x+8)  = t1[(byte << 3) & 0x100] +
  24.                                t2[(byt1 << 3) & 0x100];
  25.       *(unsigned*)(dst+x+12) = t1[(byte << 4) & 0x100] +
  26.                                t2[(byt1 << 4) & 0x100];
  27.       *(unsigned*)(dst+x+16) = t1[(byte << 5) & 0x100] +
  28.                                t2[(byt1 << 5) & 0x100];
  29.       *(unsigned*)(dst+x+20) = t1[(byte << 6) & 0x100] +
  30.                                t2[(byt1 << 6) & 0x100];
  31.       *(unsigned*)(dst+x+24) = t1[(byte << 7) & 0x100] +
  32.                                t2[(byt1 << 7) & 0x100];
  33.       *(unsigned*)(dst+x+28) = t1[(byte << 8) & 0x100] +
  34.                                t2[(byt1 << 8) & 0x100];
  35.    }
  36. }
  37.  
  38. void line32d_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  39. {
  40.    for (unsigned x = 0; x < temp.scx*8; x += 64) {
  41.       unsigned char byte = *src;
  42.       unsigned *t1 = tab + src[1];
  43.       unsigned char byt1 = src[rb2_offs];
  44.       unsigned *t2 = tab + src[rb2_offs+1];
  45.       src += 2;
  46.  
  47.       *(unsigned*)(dst+x)    =
  48.       *(unsigned*)(dst+x+4)  =
  49.                                t1[(byte << 1) & 0x100] +
  50.                                t2[(byt1 << 1) & 0x100];
  51.       *(unsigned*)(dst+x+8)  =
  52.       *(unsigned*)(dst+x+12) =
  53.                                t1[(byte << 2) & 0x100] +
  54.                                t2[(byt1 << 2) & 0x100];
  55.       *(unsigned*)(dst+x+16)  =
  56.       *(unsigned*)(dst+x+20)  =
  57.                                t1[(byte << 3) & 0x100] +
  58.                                t2[(byt1 << 3) & 0x100];
  59.       *(unsigned*)(dst+x+24) =
  60.       *(unsigned*)(dst+x+28) =
  61.                                t1[(byte << 4) & 0x100] +
  62.                                t2[(byt1 << 4) & 0x100];
  63.       *(unsigned*)(dst+x+32) =
  64.       *(unsigned*)(dst+x+36) =
  65.                                t1[(byte << 5) & 0x100] +
  66.                                t2[(byt1 << 5) & 0x100];
  67.       *(unsigned*)(dst+x+40) =
  68.       *(unsigned*)(dst+x+44) =
  69.                                t1[(byte << 6) & 0x100] +
  70.                                t2[(byt1 << 6) & 0x100];
  71.       *(unsigned*)(dst+x+48) =
  72.       *(unsigned*)(dst+x+52) =
  73.                                t1[(byte << 7) & 0x100] +
  74.                                t2[(byt1 << 7) & 0x100];
  75.       *(unsigned*)(dst+x+56) =
  76.       *(unsigned*)(dst+x+60) =
  77.                                t1[(byte << 8) & 0x100] +
  78.                                t2[(byt1 << 8) & 0x100];
  79.    }
  80. }
  81.  
  82. void line32t_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  83. {
  84.    u32 *d = (u32 *)dst;
  85.    for (unsigned x = 0,  i = 0; x < temp.scx*3; x += 24, i += 2)
  86.    {
  87.       u8 byte1 = src[i+0];
  88.       unsigned *t1 = tab + src[i+1];
  89.       u8 byte2 = src[i+rb2_offs];
  90.       unsigned *t2 = tab + src[i+rb2_offs+1];
  91.  
  92.       u32 paper1 = t1[0];
  93.       u32 ink1 = t1[0x100];
  94.  
  95.       u32 paper2 = t2[0];
  96.       u32 ink2 = t2[0x100];
  97.  
  98.       d[x+0]  =
  99.       d[x+1]  =
  100.       d[x+2]  = ((byte1 & 0x80) ? ink1 : paper1) + ((byte2 & 0x80) ? ink2 : paper2);
  101.  
  102.       d[x+3]  =
  103.       d[x+4]  =
  104.       d[x+5]  = ((byte1 & 0x40) ? ink1 : paper1) + ((byte2 & 0x40) ? ink2 : paper2);
  105.  
  106.       d[x+6]  =
  107.       d[x+7]  =
  108.       d[x+8]  = ((byte1 & 0x20) ? ink1 : paper1) + ((byte2 & 0x20) ? ink2 : paper2);
  109.  
  110.       d[x+9]  =
  111.       d[x+10] =
  112.       d[x+11] = ((byte1 & 0x10) ? ink1 : paper1) + ((byte2 & 0x10) ? ink2 : paper2);
  113.  
  114.       d[x+12] =
  115.       d[x+13] =
  116.       d[x+14] = ((byte1 & 0x08) ? ink1 : paper1) + ((byte2 & 0x08) ? ink2 : paper2);
  117.  
  118.       d[x+15] =
  119.       d[x+16] =
  120.       d[x+17] = ((byte1 & 0x04) ? ink1 : paper1) + ((byte2 & 0x04) ? ink2 : paper2);
  121.  
  122.       d[x+18] =
  123.       d[x+19] =
  124.       d[x+20] = ((byte1 & 0x02) ? ink1 : paper1) + ((byte2 & 0x02) ? ink2 : paper2);
  125.  
  126.       d[x+21] =
  127.       d[x+22] =
  128.       d[x+23] = ((byte1 & 0x01) ? ink1 : paper1) + ((byte2 & 0x01) ? ink2 : paper2);
  129.    }
  130. }
  131.  
  132. void line32q_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  133. {
  134.    for (unsigned x = 0; x < temp.scx*16; x += 128) {
  135.       unsigned char byte = *src;
  136.       unsigned *t1 = tab + src[1];
  137.       unsigned char byt1 = src[rb2_offs];
  138.       unsigned *t2 = tab + src[rb2_offs+1];
  139.       src += 2;
  140.  
  141.       *(unsigned*)(dst+x+0x00) =
  142.       *(unsigned*)(dst+x+0x04) =
  143.       *(unsigned*)(dst+x+0x08) =
  144.       *(unsigned*)(dst+x+0x0C) =
  145.                                t1[(byte << 1) & 0x100] +
  146.                                t2[(byt1 << 1) & 0x100];
  147.       *(unsigned*)(dst+x+0x10) =
  148.       *(unsigned*)(dst+x+0x14) =
  149.       *(unsigned*)(dst+x+0x18) =
  150.       *(unsigned*)(dst+x+0x1C) =
  151.                                t1[(byte << 2) & 0x100] +
  152.                                t2[(byt1 << 2) & 0x100];
  153.       *(unsigned*)(dst+x+0x20) =
  154.       *(unsigned*)(dst+x+0x24) =
  155.       *(unsigned*)(dst+x+0x28) =
  156.       *(unsigned*)(dst+x+0x2C) =
  157.                                t1[(byte << 3) & 0x100] +
  158.                                t2[(byt1 << 3) & 0x100];
  159.       *(unsigned*)(dst+x+0x30) =
  160.       *(unsigned*)(dst+x+0x34) =
  161.       *(unsigned*)(dst+x+0x38) =
  162.       *(unsigned*)(dst+x+0x3C) =
  163.                                t1[(byte << 4) & 0x100] +
  164.                                t2[(byt1 << 4) & 0x100];
  165.       *(unsigned*)(dst+x+0x40) =
  166.       *(unsigned*)(dst+x+0x44) =
  167.       *(unsigned*)(dst+x+0x48) =
  168.       *(unsigned*)(dst+x+0x4C) =
  169.                                t1[(byte << 5) & 0x100] +
  170.                                t2[(byt1 << 5) & 0x100];
  171.       *(unsigned*)(dst+x+0x50) =
  172.       *(unsigned*)(dst+x+0x54) =
  173.       *(unsigned*)(dst+x+0x58) =
  174.       *(unsigned*)(dst+x+0x5C) =
  175.                                t1[(byte << 6) & 0x100] +
  176.                                t2[(byt1 << 6) & 0x100];
  177.       *(unsigned*)(dst+x+0x60) =
  178.       *(unsigned*)(dst+x+0x64) =
  179.       *(unsigned*)(dst+x+0x68) =
  180.       *(unsigned*)(dst+x+0x6C) =
  181.                                t1[(byte << 7) & 0x100] +
  182.                                t2[(byt1 << 7) & 0x100];
  183.       *(unsigned*)(dst+x+0x70) =
  184.       *(unsigned*)(dst+x+0x74) =
  185.       *(unsigned*)(dst+x+0x78) =
  186.       *(unsigned*)(dst+x+0x7C) =
  187.                                t1[(byte << 8) & 0x100] +
  188.                                t2[(byt1 << 8) & 0x100];
  189.    }
  190. }
  191.  
  192. #ifdef MOD_SSE2
  193. void line32(unsigned char *dst, unsigned char *src, unsigned *tab)
  194. {
  195.    __m128i *d = (__m128i *)dst;
  196.    __m128i m1, m2;
  197.    m1 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
  198.    m2 = _mm_set_epi32(0x1, 0x2, 0x4, 0x8);
  199.  
  200.    for (unsigned x = 0,  i = 0; x < temp.scx / 4; x += 2,  i += 2)
  201.    {
  202.       unsigned byte = src[i];
  203.       unsigned attr = src[i+1];
  204.       unsigned ink = tab[attr + 0x100];
  205.       unsigned paper = tab[attr];
  206.  
  207.       __m128i b, b1, b2;
  208.       __m128i r1, r2;
  209.       __m128i iv, pv;
  210.       __m128i im1, pm1, im2, pm2;
  211.       __m128i vr1, vr2;
  212.  
  213.       b = _mm_set1_epi32(byte);
  214.       iv = _mm_set1_epi32(ink);
  215.       pv = _mm_set1_epi32(paper);
  216.  
  217.       b1 = _mm_and_si128(b, m1);
  218.       r1 = _mm_cmpeq_epi32(b1, m1);
  219.       im1 = _mm_and_si128(r1, iv);
  220.       pm1 = _mm_andnot_si128(r1, pv);
  221.       vr1 = _mm_or_si128(im1, pm1);
  222.       _mm_store_si128(&d[x], vr1);
  223.  
  224.       b2 = _mm_and_si128(b, m2);
  225.       r2 = _mm_cmpeq_epi32(b2, m2);
  226.       im2 = _mm_and_si128(r2, iv);
  227.       pm2 = _mm_andnot_si128(r2, pv);
  228.       vr2 = _mm_or_si128(im2, pm2);
  229.       _mm_store_si128(&d[x+1], vr2);
  230.    }
  231. }
  232. #else
  233. void line32(unsigned char *dst, unsigned char *src, unsigned *tab)
  234. {
  235.    unsigned *d = (unsigned *)dst;
  236.    for (unsigned x = 0,  i = 0; x < temp.scx; x += 8,  i += 2)
  237.    {
  238.       unsigned byte = src[i];
  239.       unsigned attr = src[i+1];
  240.       unsigned ink = tab[attr + 0x100];
  241.       unsigned paper = tab[attr];
  242.  
  243.       d[x]   = (byte & 0x80) ? ink : paper; // 7
  244.       d[x+1] = (byte & 0x40) ? ink : paper; // 6
  245.       d[x+2] = (byte & 0x20) ? ink : paper; // 5
  246.       d[x+3] = (byte & 0x10) ? ink : paper; // 4
  247.  
  248.       d[x+4] = (byte & 0x08) ? ink : paper; // 3
  249.       d[x+5] = (byte & 0x04) ? ink : paper; // 2
  250.       d[x+6] = (byte & 0x02) ? ink : paper; // 1
  251.       d[x+7] = (byte & 0x01) ? ink : paper; // 0
  252.    }
  253. }
  254. #endif
  255.  
  256. #ifdef MOD_SSE2
  257. void line32d(unsigned char *dst, unsigned char *src, unsigned *tab)
  258. {
  259.    __m128i *d = (__m128i *)dst;
  260.    __m128i m1, m2;
  261.    m1 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
  262.    m2 = _mm_set_epi32(0x1, 0x2, 0x4, 0x8);
  263.  
  264.    for (unsigned x = 0,  i = 0; x < temp.scx / 2; x += 4,  i += 2)
  265.    {
  266.       unsigned byte = src[i];
  267.       unsigned attr = src[i+1];
  268.       unsigned ink = tab[attr + 0x100];
  269.       unsigned paper = tab[attr];
  270.  
  271.       __m128i b, b1, b2;
  272.       __m128i r1, r2;
  273.       __m128i iv, pv;
  274.       __m128i im1, pm1, im2, pm2;
  275.       __m128i vr1, vr2;
  276.       __m128i l1, l2;
  277.       __m128i h1, h2;
  278.  
  279.       b = _mm_set1_epi32(byte);
  280.       iv = _mm_set1_epi32(ink);
  281.       pv = _mm_set1_epi32(paper);
  282.  
  283.       b1 = _mm_and_si128(b, m1);
  284.       r1 = _mm_cmpeq_epi32(b1, m1);
  285.       im1 = _mm_and_si128(r1, iv);
  286.       pm1 = _mm_andnot_si128(r1, pv);
  287.       vr1 = _mm_or_si128(im1, pm1);
  288.  
  289.       l1 = _mm_unpacklo_epi32(vr1, vr1);
  290.       _mm_store_si128(&d[x], l1);
  291.       h1 = _mm_unpackhi_epi32(vr1, vr1);
  292.       _mm_store_si128(&d[x+1], h1);
  293.  
  294.       b2 = _mm_and_si128(b, m2);
  295.       r2 = _mm_cmpeq_epi32(b2, m2);
  296.       im2 = _mm_and_si128(r2, iv);
  297.       pm2 = _mm_andnot_si128(r2, pv);
  298.       vr2 = _mm_or_si128(im2, pm2);
  299.  
  300.       l2 = _mm_unpacklo_epi32(vr2, vr2);
  301.       _mm_store_si128(&d[x+2], l2);
  302.       h2 = _mm_unpackhi_epi32(vr2, vr2);
  303.       _mm_store_si128(&d[x+3], h2);
  304.    }
  305. }
  306. #else
  307. void line32d(unsigned char *dst, unsigned char *src, unsigned *tab)
  308. {
  309.    unsigned *d = (unsigned *)dst;
  310.    for (unsigned x = 0, i = 0; x < temp.scx * 2; x += 16, i+= 2)
  311.    {
  312.       // [vv] ╥ръющ яюЁ фюъ чряшёш яючтюы хЄ icl ухэхЁшЁютрЄ№ cmovcc тьхёЄю jcc
  313.       unsigned char byte = src[i];
  314.       unsigned char attr = src[i+1];
  315.       unsigned ink = tab[attr + 0x100];
  316.       unsigned paper = tab[attr];
  317.  
  318.       d[x]    = d[x+1]  = (byte & 0x80) ? ink : paper; // 7
  319.       d[x+2]  = d[x+3]  = (byte & 0x40) ? ink : paper; // 6
  320.       d[x+4]  = d[x+5]  = (byte & 0x20) ? ink : paper; // 5
  321.       d[x+6]  = d[x+7]  = (byte & 0x10) ? ink : paper; // 4
  322.       d[x+8]  = d[x+9]  = (byte & 0x08) ? ink : paper; // 3
  323.       d[x+10] = d[x+11] = (byte & 0x04) ? ink : paper; // 2
  324.       d[x+12] = d[x+13] = (byte & 0x02) ? ink : paper; // 1
  325.       d[x+14] = d[x+15] = (byte & 0x01) ? ink : paper; // 0
  326.    }
  327. }
  328. #endif
  329.  
  330. void line32t(unsigned char *dst, const unsigned char *src, const unsigned *tab)
  331. {
  332.    unsigned *d = (unsigned *)dst;
  333.    for (unsigned x = 0, i = 0; x < temp.scx * 3; x += 3*8,  i += 2)
  334.    {
  335.       unsigned char byte = src[i];
  336.       unsigned attr = src[i + 1];
  337.       unsigned ink = tab[attr + 0x100];
  338.       unsigned paper = tab[attr];
  339.  
  340.       d[x]      = d[x + 1]  = d[x + 2]  = (byte & 0x80) ? ink : paper;
  341.       d[x + 3]  = d[x + 4]  = d[x + 5]  = (byte & 0x40) ? ink : paper;
  342.       d[x + 6]  = d[x + 7]  = d[x + 8]  = (byte & 0x20) ? ink : paper;
  343.       d[x + 9]  = d[x + 10] = d[x + 11] = (byte & 0x10) ? ink : paper;
  344.       d[x + 12] = d[x + 13] = d[x + 14] = (byte & 0x08) ? ink : paper;
  345.       d[x + 15] = d[x + 16] = d[x + 17] = (byte & 0x04) ? ink : paper;
  346.       d[x + 18] = d[x + 19] = d[x + 20] = (byte & 0x02) ? ink : paper;
  347.       d[x + 21] = d[x + 22] = d[x + 23] = (byte & 0x01) ? ink : paper;
  348.    }
  349. }
  350.  
  351. void line32q(unsigned char *dst, unsigned char *src, unsigned *tab)
  352. {
  353.    for (unsigned x = 0; x < temp.scx*16; x += 128) {
  354.       unsigned char byte = *src++;
  355.       unsigned *t = tab + *src++;
  356.       *(unsigned*)(dst+x+0x00) =
  357.       *(unsigned*)(dst+x+0x04) =
  358.       *(unsigned*)(dst+x+0x08) =
  359.       *(unsigned*)(dst+x+0x0C) =
  360.                                t[(byte << 1) & 0x100];
  361.       *(unsigned*)(dst+x+0x10) =
  362.       *(unsigned*)(dst+x+0x14) =
  363.       *(unsigned*)(dst+x+0x18) =
  364.       *(unsigned*)(dst+x+0x1C) =
  365.                                t[(byte << 2) & 0x100];
  366.       *(unsigned*)(dst+x+0x20) =
  367.       *(unsigned*)(dst+x+0x24) =
  368.       *(unsigned*)(dst+x+0x28) =
  369.       *(unsigned*)(dst+x+0x2C) =
  370.                                t[(byte << 3) & 0x100];
  371.       *(unsigned*)(dst+x+0x30) =
  372.       *(unsigned*)(dst+x+0x34) =
  373.       *(unsigned*)(dst+x+0x38) =
  374.       *(unsigned*)(dst+x+0x3C) =
  375.                                t[(byte << 4) & 0x100];
  376.       *(unsigned*)(dst+x+0x40) =
  377.       *(unsigned*)(dst+x+0x44) =
  378.       *(unsigned*)(dst+x+0x48) =
  379.       *(unsigned*)(dst+x+0x4C) =
  380.                                t[(byte << 5) & 0x100];
  381.       *(unsigned*)(dst+x+0x50) =
  382.       *(unsigned*)(dst+x+0x54) =
  383.       *(unsigned*)(dst+x+0x58) =
  384.       *(unsigned*)(dst+x+0x5C) =
  385.                                t[(byte << 6) & 0x100];
  386.       *(unsigned*)(dst+x+0x60) =
  387.       *(unsigned*)(dst+x+0x64) =
  388.       *(unsigned*)(dst+x+0x68) =
  389.       *(unsigned*)(dst+x+0x6C) =
  390.                                t[(byte << 7) & 0x100];
  391.       *(unsigned*)(dst+x+0x70) =
  392.       *(unsigned*)(dst+x+0x74) =
  393.       *(unsigned*)(dst+x+0x78) =
  394.       *(unsigned*)(dst+x+0x7C) =
  395.                                t[(byte << 8) & 0x100];
  396.    }
  397. }
  398.  
  399. void line16_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  400. {
  401.    for (unsigned x = 0; x < temp.scx*2; x += 32) {
  402.       unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
  403.       unsigned r = *(unsigned*)(src + rb2_offs), atr2 = (r >> 6) & 0x3FC;
  404.       *(unsigned*)(dst+x)   = (tab[((s >> 6) & 3) + attr]) +
  405.                               (tab[((r >> 6) & 3) + atr2]);
  406.       *(unsigned*)(dst+x+4) = (tab[((s >> 4) & 3) + attr]) +
  407.                               (tab[((r >> 4) & 3) + atr2]);
  408.       *(unsigned*)(dst+x+8) = (tab[((s >> 2) & 3) + attr]) +
  409.                               (tab[((r >> 2) & 3) + atr2]);
  410.       *(unsigned*)(dst+x+12)= (tab[((s >> 0) & 3) + attr]) +
  411.                               (tab[((r >> 0) & 3) + atr2]);
  412.       attr = (s >> 22) & 0x3FC; atr2 = (r >> 22) & 0x3FC;
  413.       *(unsigned*)(dst+x+16)= (tab[((s >>22) & 3) + attr]) +
  414.                               (tab[((r >>22) & 3) + atr2]);
  415.       *(unsigned*)(dst+x+20)= (tab[((s >>20) & 3) + attr]) +
  416.                               (tab[((r >>20) & 3) + atr2]);
  417.       *(unsigned*)(dst+x+24)= (tab[((s >>18) & 3) + attr]) +
  418.                               (tab[((r >>18) & 3) + atr2]);
  419.       *(unsigned*)(dst+x+28)= (tab[((s >>16) & 3) + attr]) +
  420.                               (tab[((r >>16) & 3) + atr2]);
  421.       src += 4;
  422.    }
  423. }
  424.  
  425. #define line16d_nf line32_nf
  426.  
  427. #define line16q line32d
  428. #define line16q_nf line32d_nf
  429.  
  430. void line16t(unsigned char *dst, unsigned char *src, unsigned *tab)
  431. {
  432.    u16 *d = (u16 *)dst;
  433.    for (unsigned x = 0; x < temp.scx*3; x += 24)
  434.    {
  435.       unsigned char byte = *src++;
  436.       unsigned *t = tab + *src++;
  437.       u16 paper_yu = t[0];
  438.       u16 paper_yv = t[0] >> 16;
  439.       u16 ink_yu = t[0x100];
  440.       u16 ink_yv = t[0x100] >> 16;
  441.  
  442.       d[x+0]  = (byte & 0x80) ? ink_yu : paper_yu;
  443.       d[x+1]  = (byte & 0x80) ? ink_yv : paper_yv;
  444.       d[x+2]  = (byte & 0x80) ? ink_yu : paper_yu;
  445.  
  446.       d[x+3]  = (byte & 0x40) ? ink_yv : paper_yv;
  447.       d[x+4]  = (byte & 0x40) ? ink_yu : paper_yu;
  448.       d[x+5]  = (byte & 0x40) ? ink_yv : paper_yv;
  449.  
  450.       d[x+6]  = (byte & 0x20) ? ink_yu : paper_yu;
  451.       d[x+7]  = (byte & 0x20) ? ink_yv : paper_yv;
  452.       d[x+8]  = (byte & 0x20) ? ink_yu : paper_yu;
  453.  
  454.       d[x+9]  = (byte & 0x10) ? ink_yv : paper_yv;
  455.       d[x+10] = (byte & 0x10) ? ink_yu : paper_yu;
  456.       d[x+11] = (byte & 0x10) ? ink_yv : paper_yv;
  457.  
  458.       d[x+12] = (byte & 0x08) ? ink_yu : paper_yu;
  459.       d[x+13] = (byte & 0x08) ? ink_yv : paper_yv;
  460.       d[x+14] = (byte & 0x08) ? ink_yu : paper_yu;
  461.  
  462.       d[x+15] = (byte & 0x04) ? ink_yv : paper_yv;
  463.       d[x+16] = (byte & 0x04) ? ink_yu : paper_yu;
  464.       d[x+17] = (byte & 0x04) ? ink_yv : paper_yv;
  465.  
  466.       d[x+18] = (byte & 0x02) ? ink_yu : paper_yu;
  467.       d[x+19] = (byte & 0x02) ? ink_yv : paper_yv;
  468.       d[x+20] = (byte & 0x02) ? ink_yu : paper_yu;
  469.  
  470.       d[x+21] = (byte & 0x01) ? ink_yv : paper_yv;
  471.       d[x+22] = (byte & 0x01) ? ink_yu : paper_yu;
  472.       d[x+23] = (byte & 0x01) ? ink_yv : paper_yv;
  473.    }
  474. }
  475.  
  476. void line16t_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  477. {
  478.    u16 *d = (u16 *)dst;
  479.    for (unsigned x = 0,  i = 0; x < temp.scx*3; x += 24, i += 2)
  480.    {
  481.       u8 byte1 = src[i+0];
  482.       unsigned *t1 = tab + src[i+1];
  483.       u8 byte2 = src[i+rb2_offs];
  484.       unsigned *t2 = tab + src[i+rb2_offs+1];
  485.  
  486.       u16 paper_yu1 = t1[0];
  487.       u16 paper_yv1 = t1[0] >> 16;
  488.       u16 ink_yu1 = t1[0x100];
  489.       u16 ink_yv1 = t1[0x100] >> 16;
  490.  
  491.       u16 paper_yu2 = t2[0];
  492.       u16 paper_yv2 = t2[0] >> 16;
  493.       u16 ink_yu2 = t2[0x100];
  494.       u16 ink_yv2 = t2[0x100] >> 16;
  495.  
  496.       d[x+0]  = ((byte1 & 0x80) ? ink_yu1 : paper_yu1) + ((byte2 & 0x80) ? ink_yu2 : paper_yu2);
  497.       d[x+1]  = ((byte1 & 0x80) ? ink_yv1 : paper_yv1) + ((byte2 & 0x80) ? ink_yv2 : paper_yv2);
  498.       d[x+2]  = ((byte1 & 0x80) ? ink_yu1 : paper_yu1) + ((byte2 & 0x80) ? ink_yu2 : paper_yu2);
  499.  
  500.       d[x+3]  = ((byte1 & 0x40) ? ink_yv1 : paper_yv1) + ((byte2 & 0x40) ? ink_yv2 : paper_yv2);
  501.       d[x+4]  = ((byte1 & 0x40) ? ink_yu1 : paper_yu1) + ((byte2 & 0x40) ? ink_yu2 : paper_yu2);
  502.       d[x+5]  = ((byte1 & 0x40) ? ink_yv1 : paper_yv1) + ((byte2 & 0x40) ? ink_yv2 : paper_yv2);
  503.  
  504.       d[x+6]  = ((byte1 & 0x20) ? ink_yu1 : paper_yu1) + ((byte2 & 0x20) ? ink_yu2 : paper_yu2);
  505.       d[x+7]  = ((byte1 & 0x20) ? ink_yv1 : paper_yv1) + ((byte2 & 0x20) ? ink_yv2 : paper_yv2);
  506.       d[x+8]  = ((byte1 & 0x20) ? ink_yu1 : paper_yu1) + ((byte2 & 0x20) ? ink_yu2 : paper_yu2);
  507.  
  508.       d[x+9]  = ((byte1 & 0x10) ? ink_yv1 : paper_yv1) + ((byte2 & 0x10) ? ink_yv2 : paper_yv2);
  509.       d[x+10] = ((byte1 & 0x10) ? ink_yu1 : paper_yu1) + ((byte2 & 0x10) ? ink_yu2 : paper_yu2);
  510.       d[x+11] = ((byte1 & 0x10) ? ink_yv1 : paper_yv1) + ((byte2 & 0x10) ? ink_yv2 : paper_yv2);
  511.  
  512.       d[x+12] = ((byte1 & 0x08) ? ink_yu1 : paper_yu1) + ((byte2 & 0x08) ? ink_yu2 : paper_yu2);
  513.       d[x+13] = ((byte1 & 0x08) ? ink_yv1 : paper_yv1) + ((byte2 & 0x08) ? ink_yv2 : paper_yv2);
  514.       d[x+14] = ((byte1 & 0x08) ? ink_yu1 : paper_yu1) + ((byte2 & 0x08) ? ink_yu2 : paper_yu2);
  515.  
  516.       d[x+15] = ((byte1 & 0x04) ? ink_yv1 : paper_yv1) + ((byte2 & 0x04) ? ink_yv2 : paper_yv2);
  517.       d[x+16] = ((byte1 & 0x04) ? ink_yu1 : paper_yu1) + ((byte2 & 0x04) ? ink_yu2 : paper_yu2);
  518.       d[x+17] = ((byte1 & 0x04) ? ink_yv1 : paper_yv1) + ((byte2 & 0x04) ? ink_yv2 : paper_yv2);
  519.  
  520.       d[x+18] = ((byte1 & 0x02) ? ink_yu1 : paper_yu1) + ((byte2 & 0x02) ? ink_yu2 : paper_yu2);
  521.       d[x+19] = ((byte1 & 0x02) ? ink_yv1 : paper_yv1) + ((byte2 & 0x02) ? ink_yv2 : paper_yv2);
  522.       d[x+20] = ((byte1 & 0x02) ? ink_yu1 : paper_yu1) + ((byte2 & 0x02) ? ink_yu2 : paper_yu2);
  523.  
  524.       d[x+21] = ((byte1 & 0x01) ? ink_yv1 : paper_yv1) + ((byte2 & 0x01) ? ink_yv2 : paper_yv2);
  525.       d[x+22] = ((byte1 & 0x01) ? ink_yu1 : paper_yu1) + ((byte2 & 0x01) ? ink_yu2 : paper_yu2);
  526.       d[x+23] = ((byte1 & 0x01) ? ink_yv1 : paper_yv1) + ((byte2 & 0x01) ? ink_yv2 : paper_yv2);
  527.    }
  528. }
  529.  
  530. void line8(unsigned char *dst, unsigned char *src, unsigned *tab)
  531. {
  532.    for (unsigned x = 0; x < temp.scx; x += 32) {
  533.       unsigned src0 = *(unsigned*)src, attr = (src0 >> 4) & 0xFF0;
  534.       *(unsigned*)(dst+x)    = tab[((src0 >> 4)  & 0xF) + attr];
  535.       *(unsigned*)(dst+x+4)  = tab[((src0 >> 0)  & 0xF) + attr];
  536.       attr = (src0 >> 20) & 0xFF0;
  537.       *(unsigned*)(dst+x+8)  = tab[((src0 >> 20) & 0xF) + attr];
  538.       *(unsigned*)(dst+x+12) = tab[((src0 >> 16) & 0xF) + attr];
  539.       src0 = *(unsigned*)(src+4), attr = (src0 >> 4) & 0xFF0;
  540.       *(unsigned*)(dst+x+16) = tab[((src0 >> 4)  & 0xF) + attr];
  541.       *(unsigned*)(dst+x+20) = tab[((src0 >> 0)  & 0xF) + attr];
  542.       attr = (src0 >> 20) & 0xFF0;
  543.       *(unsigned*)(dst+x+24) = tab[((src0 >> 20) & 0xF) + attr];
  544.       *(unsigned*)(dst+x+28) = tab[((src0 >> 16) & 0xF) + attr];
  545.       src += 8;
  546.    }
  547. }
  548.  
  549. void line8_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  550. {
  551.    for (unsigned x = 0; x < temp.scx; x += 32) {
  552.       unsigned s = *(unsigned*)src, attr = (s >> 4) & 0xFF0;
  553.       unsigned r = *(unsigned*)(src + rb2_offs), atr2 = (r >> 4) & 0xFF0;
  554.       *(unsigned*)(dst+x)    = (tab[((s >> 4)  & 0xF) + attr] & 0x0F0F0F0F) +
  555.                                (tab[((r >> 4)  & 0xF) + atr2] & 0xF0F0F0F0);
  556.       *(unsigned*)(dst+x+4)  = (tab[((s >> 0)  & 0xF) + attr] & 0x0F0F0F0F) +
  557.                                (tab[((r >> 0)  & 0xF) + atr2] & 0xF0F0F0F0);
  558.       attr = (s >> 20) & 0xFF0; atr2 = (r >> 20) & 0xFF0;
  559.       *(unsigned*)(dst+x+8)  = (tab[((s >> 20) & 0xF) + attr] & 0x0F0F0F0F) +
  560.                                (tab[((r >> 20) & 0xF) + atr2] & 0xF0F0F0F0);
  561.       *(unsigned*)(dst+x+12) = (tab[((s >> 16) & 0xF) + attr] & 0x0F0F0F0F) +
  562.                                (tab[((r >> 16) & 0xF) + atr2] & 0xF0F0F0F0);
  563.       s = *(unsigned*)(src+4), attr = (s >> 4) & 0xFF0;
  564.       r = *(unsigned*)(src+rb2_offs+4), atr2 = (r >> 4) & 0xFF0;
  565.       *(unsigned*)(dst+x+16) = (tab[((s >> 4)  & 0xF) + attr] & 0x0F0F0F0F) +
  566.                                (tab[((r >> 4)  & 0xF) + atr2] & 0xF0F0F0F0);
  567.       *(unsigned*)(dst+x+20) = (tab[((s >> 0)  & 0xF) + attr] & 0x0F0F0F0F) +
  568.                                (tab[((r >> 0)  & 0xF) + atr2] & 0xF0F0F0F0);
  569.       attr = (s >> 20) & 0xFF0; atr2 = (r >> 20) & 0xFF0;
  570.       *(unsigned*)(dst+x+24) = (tab[((s >> 20) & 0xF) + attr] & 0x0F0F0F0F) +
  571.                                (tab[((r >> 20) & 0xF) + atr2] & 0xF0F0F0F0);
  572.       *(unsigned*)(dst+x+28) = (tab[((s >> 16) & 0xF) + attr] & 0x0F0F0F0F) +
  573.                                (tab[((r >> 16) & 0xF) + atr2] & 0xF0F0F0F0);
  574.       src += 8;
  575.    }
  576. }
  577.  
  578. void line8d(unsigned char *dst, unsigned char *src, unsigned *tab)
  579. {
  580.    for (unsigned x = 0; x < temp.scx*2; x += 32) {
  581.       unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
  582.       *(unsigned*)(dst+x)   = tab[((s >> 6) & 3) + attr];
  583.       *(unsigned*)(dst+x+4) = tab[((s >> 4) & 3) + attr];
  584.       *(unsigned*)(dst+x+8) = tab[((s >> 2) & 3) + attr];
  585.       *(unsigned*)(dst+x+12)= tab[((s >> 0) & 3) + attr];
  586.       attr = (s >> 22) & 0x3FC;
  587.       *(unsigned*)(dst+x+16)= tab[((s >>22) & 3) + attr];
  588.       *(unsigned*)(dst+x+20)= tab[((s >>20) & 3) + attr];
  589.       *(unsigned*)(dst+x+24)= tab[((s >>18) & 3) + attr];
  590.       *(unsigned*)(dst+x+28)= tab[((s >>16) & 3) + attr];
  591.       src += 4;
  592.    }
  593. }
  594.  
  595.  
  596. void line8t(unsigned char *dst, unsigned char *src, unsigned *tab)
  597. {
  598.    for (unsigned x = 0; x < temp.scx*3; x += 24)
  599.    {
  600.       unsigned char byte = *src++;
  601.       unsigned *t = tab + *src++;
  602.       dst[x+0]  = dst[x+1]  = dst[x+2]  = t[(byte << 1) & 0x100];
  603.       dst[x+3]  = dst[x+4]  = dst[x+5]  = t[(byte << 2) & 0x100];
  604.       dst[x+6]  = dst[x+7]  = dst[x+8]  = t[(byte << 3) & 0x100];
  605.       dst[x+9]  = dst[x+10] = dst[x+11] = t[(byte << 4) & 0x100];
  606.       dst[x+12] = dst[x+13] = dst[x+14] = t[(byte << 5) & 0x100];
  607.       dst[x+15] = dst[x+16] = dst[x+17] = t[(byte << 6) & 0x100];
  608.       dst[x+18] = dst[x+19] = dst[x+20] = t[(byte << 7) & 0x100];
  609.       dst[x+21] = dst[x+22] = dst[x+23] = t[(byte << 8) & 0x100];
  610.    }
  611. }
  612.  
  613. void line8q(unsigned char *dst, unsigned char *src, unsigned *tab)
  614. {
  615.    for (unsigned x = 0; x < temp.scx*4; x += 32) {
  616.       unsigned char byte = *src++;
  617.       unsigned *t = tab + *src++;
  618.       *(unsigned*)(dst+x+0x00) = t[(byte << 1) & 0x100];
  619.       *(unsigned*)(dst+x+0x04) = t[(byte << 2) & 0x100];
  620.       *(unsigned*)(dst+x+0x08) = t[(byte << 3) & 0x100];
  621.       *(unsigned*)(dst+x+0x0C) = t[(byte << 4) & 0x100];
  622.       *(unsigned*)(dst+x+0x10) = t[(byte << 5) & 0x100];
  623.       *(unsigned*)(dst+x+0x14) = t[(byte << 6) & 0x100];
  624.       *(unsigned*)(dst+x+0x18) = t[(byte << 7) & 0x100];
  625.       *(unsigned*)(dst+x+0x1C) = t[(byte << 8) & 0x100];
  626.    }
  627. }
  628.  
  629. void line8d_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  630. {
  631.    for (unsigned x = 0; x < temp.scx*2; x += 32) {
  632.       unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
  633.       unsigned r = *(unsigned*)(src + rb2_offs), atr2 = (r >> 6) & 0x3FC;
  634.       *(unsigned*)(dst+x)   = (tab[((s >> 6) & 3) + attr] & 0x0F0F0F0F) +
  635.                               (tab[((r >> 6) & 3) + atr2] & 0xF0F0F0F0);
  636.       *(unsigned*)(dst+x+4) = (tab[((s >> 4) & 3) + attr] & 0x0F0F0F0F) +
  637.                               (tab[((r >> 4) & 3) + atr2] & 0xF0F0F0F0);
  638.       *(unsigned*)(dst+x+8) = (tab[((s >> 2) & 3) + attr] & 0x0F0F0F0F) +
  639.                               (tab[((r >> 2) & 3) + atr2] & 0xF0F0F0F0);
  640.       *(unsigned*)(dst+x+12)= (tab[((s >> 0) & 3) + attr] & 0x0F0F0F0F) +
  641.                               (tab[((r >> 0) & 3) + atr2] & 0xF0F0F0F0);
  642.       attr = (s >> 22) & 0x3FC; atr2 = (r >> 22) & 0x3FC;
  643.       *(unsigned*)(dst+x+16)= (tab[((s >>22) & 3) + attr] & 0x0F0F0F0F) +
  644.                               (tab[((r >>22) & 3) + atr2] & 0xF0F0F0F0);
  645.       *(unsigned*)(dst+x+20)= (tab[((s >>20) & 3) + attr] & 0x0F0F0F0F) +
  646.                               (tab[((r >>20) & 3) + atr2] & 0xF0F0F0F0);
  647.       *(unsigned*)(dst+x+24)= (tab[((s >>18) & 3) + attr] & 0x0F0F0F0F) +
  648.                               (tab[((r >>18) & 3) + atr2] & 0xF0F0F0F0);
  649.       *(unsigned*)(dst+x+28)= (tab[((s >>16) & 3) + attr] & 0x0F0F0F0F) +
  650.                               (tab[((r >>16) & 3) + atr2] & 0xF0F0F0F0);
  651.       src += 4;
  652.    }
  653. }
  654.  
  655. void line8t_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  656. {
  657.    for (unsigned x = 0, i = 0; x < temp.scx*3; x += 24, i += 2)
  658.    {
  659.       u32 byte1 = src[i+0];
  660.       u32 byte2 = src[i+rb2_offs+0];
  661.       unsigned *t1 = tab + src[i+1];
  662.       unsigned *t2 = tab + src[i+rb2_offs+1];
  663.       u8 ink1 = u8(t1[0x100] & 0x0F);
  664.       u8 ink2 = u8(t2[0x100] & 0xF0);
  665.       u8 paper1 = u8(t1[0] & 0x0F);
  666.       u8 paper2 = u8(t2[0] & 0xF0);
  667.  
  668.       dst[x+0]  = dst[x+1]  = dst[x+2]  = ((byte1 & 0x80) ? ink1 : paper1) + ((byte2 & 0x80) ? ink2 : paper2);
  669.       dst[x+3]  = dst[x+4]  = dst[x+5]  = ((byte1 & 0x40) ? ink1 : paper1) + ((byte2 & 0x40) ? ink2 : paper2);
  670.       dst[x+6]  = dst[x+7]  = dst[x+8]  = ((byte1 & 0x20) ? ink1 : paper1) + ((byte2 & 0x20) ? ink2 : paper2);
  671.       dst[x+9]  = dst[x+10] = dst[x+11] = ((byte1 & 0x10) ? ink1 : paper1) + ((byte2 & 0x10) ? ink2 : paper2);
  672.       dst[x+12] = dst[x+13] = dst[x+14] = ((byte1 & 0x08) ? ink1 : paper1) + ((byte2 & 0x08) ? ink2 : paper2);
  673.       dst[x+15] = dst[x+16] = dst[x+17] = ((byte1 & 0x04) ? ink1 : paper1) + ((byte2 & 0x04) ? ink2 : paper2);
  674.       dst[x+18] = dst[x+19] = dst[x+20] = ((byte1 & 0x02) ? ink1 : paper1) + ((byte2 & 0x02) ? ink2 : paper2);
  675.       dst[x+21] = dst[x+22] = dst[x+23] = ((byte1 & 0x01) ? ink1 : paper1) + ((byte2 & 0x01) ? ink2 : paper2);
  676.    }
  677. }
  678.  
  679. void line8q_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  680. {
  681.    for (unsigned x = 0; x < temp.scx*4; x += 32) {
  682.       unsigned char byte1 = src[0], byte2 = src[rb2_offs+0];
  683.       unsigned *t1 = tab + src[1], *t2 = tab + src[rb2_offs+1];
  684.       src += 2;
  685.  
  686.       *(unsigned*)(dst+x+0x00) = (t1[(byte1 << 1) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 1) & 0x100] & 0xF0F0F0F0);
  687.       *(unsigned*)(dst+x+0x04) = (t1[(byte1 << 2) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 2) & 0x100] & 0xF0F0F0F0);
  688.       *(unsigned*)(dst+x+0x08) = (t1[(byte1 << 3) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 3) & 0x100] & 0xF0F0F0F0);
  689.       *(unsigned*)(dst+x+0x0C) = (t1[(byte1 << 4) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 4) & 0x100] & 0xF0F0F0F0);
  690.       *(unsigned*)(dst+x+0x10) = (t1[(byte1 << 5) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 5) & 0x100] & 0xF0F0F0F0);
  691.       *(unsigned*)(dst+x+0x14) = (t1[(byte1 << 6) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 6) & 0x100] & 0xF0F0F0F0);
  692.       *(unsigned*)(dst+x+0x18) = (t1[(byte1 << 7) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 7) & 0x100] & 0xF0F0F0F0);
  693.       *(unsigned*)(dst+x+0x1C) = (t1[(byte1 << 8) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 8) & 0x100] & 0xF0F0F0F0);
  694.    }
  695. }
  696.  
  697.  
  698. void rend_copy32_nf(unsigned char *dst, unsigned pitch)
  699. {
  700.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  701.    for (unsigned y = 0; y < temp.scy; y++) {
  702.       line32_nf(dst, src, t.sctab32_nf[0]);
  703.       dst += pitch; src += delta;
  704.    }
  705. }
  706.  
  707. void rend_copy32(unsigned char *dst, unsigned pitch)
  708. {
  709.    unsigned char *src = rbuf;
  710.    unsigned delta = temp.scx / 4;
  711.    for (unsigned y = 0; y < temp.scy; y++)
  712.    {
  713.       line32(dst, src, t.sctab32[0]);
  714.       dst += pitch;
  715.       src += delta;
  716.    }
  717. }
  718.  
  719. void rend_copy32d1_nf(unsigned char *dst, unsigned pitch)
  720. {
  721.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  722.    for (unsigned y = 0; y < temp.scy; y++) {
  723.       line32d_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  724.       src += delta;
  725.    }
  726. }
  727.  
  728. void rend_copy32d_nf(unsigned char *dst, unsigned pitch)
  729. {
  730.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  731.    if (conf.alt_nf) {
  732.       int offset = rb2_offs;
  733.       if (comp.frame_counter & 1) src += rb2_offs, offset = -offset;
  734.       for (unsigned y = 0; y < temp.scy; y++) {
  735.          line32d(dst, src, t.sctab32[0]); dst += pitch;
  736.          line32d(dst, src+offset, t.sctab32[0]); dst += pitch;
  737.          src += delta;
  738.       }
  739.    } else {
  740.       for (unsigned y = 0; y < temp.scy; y++) {
  741.          line32d_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  742.          line32d_nf(dst, src, t.sctab32_nf[1]); dst += pitch;
  743.          src += delta;
  744.       }
  745.    }
  746. }
  747.  
  748. void rend_copy32t_nf(unsigned char *dst, unsigned pitch)
  749. {
  750.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  751.    for (unsigned y = 0; y < temp.scy; y++) {
  752.       line32t_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  753.       line32t_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  754.       line32t_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  755.       src += delta;
  756.    }
  757. }
  758.  
  759. void rend_copy32q_nf(unsigned char *dst, unsigned pitch)
  760. {
  761.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  762.    for (unsigned y = 0; y < temp.scy; y++) {
  763. #ifdef QUAD_BUFFER
  764.       unsigned char buffer[MAX_WIDTH*4*sizeof(DWORD)];
  765.       line32q_nf(buffer, src, t.sctab32_nf[0]);
  766.       for (int i = 0; i < 4; i++) {
  767.          memcpy(dst, buffer, temp.scx*16);
  768.          dst += pitch;
  769.       }
  770. #else
  771.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  772.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  773.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  774.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  775. #endif
  776.       src += delta;
  777.    }
  778. }
  779.  
  780. void rend_copy32d1(unsigned char *dst, unsigned pitch)
  781. {
  782.    unsigned char *src = rbuf;
  783.    unsigned delta = temp.scx/4;
  784.    for (unsigned y = 0; y < temp.scy; y++)
  785.    {
  786.       line32d(dst, src, t.sctab32[0]); dst += pitch;
  787.       src += delta;
  788.    }
  789. }
  790.  
  791. void rend_copy32d(unsigned char *dst, unsigned pitch)
  792. {
  793.    unsigned char *src = rbuf;
  794.    unsigned delta = temp.scx / 4;
  795.    for (unsigned y = 0; y < temp.scy; y++)
  796.    {
  797.       line32d(dst, src, t.sctab32[0]); dst += pitch; // ╫хЄэ√х ёЄЁюъш
  798.       line32d(dst, src, t.sctab32[1]); dst += pitch; // ═хўхЄэ√х ёЄЁюъш
  799.       src += delta;
  800.    }
  801. }
  802.  
  803. void rend_copy32t(unsigned char *dst, unsigned pitch)
  804. {
  805.    unsigned char *src = rbuf;
  806.    unsigned delta = temp.scx / 4;
  807.    for (unsigned y = 0; y < temp.scy; y++)
  808.    {
  809.       line32t(dst, src, t.sctab32[0]); dst += pitch;
  810.       line32t(dst, src, t.sctab32[0]); dst += pitch;
  811.       line32t(dst, src, t.sctab32[0]); dst += pitch;
  812.       src += delta;
  813.    }
  814. }
  815.  
  816. void rend_copy32q(unsigned char *dst, unsigned pitch)
  817. {
  818.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  819.    for (unsigned y = 0; y < temp.scy; y++) {
  820. #ifdef QUAD_BUFFER
  821.       unsigned char buffer[MAX_WIDTH*4*sizeof(DWORD)];
  822.       line32q(buffer, src, t.sctab32[0]);
  823.       for (int i = 0; i < 4; i++) {
  824.          memcpy(dst, buffer, temp.scx*16);
  825.          dst += pitch;
  826.       }
  827. #else
  828.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  829.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  830.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  831.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  832. #endif
  833.       src += delta;
  834.    }
  835. }
  836.  
  837. void rend_copy16(unsigned char *dst, unsigned pitch)
  838. {
  839.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  840.    for (unsigned y = 0; y < temp.scy; y++) {
  841.       line16(dst, src, t.sctab16[0]);
  842.       dst += pitch, src += delta;
  843.    }
  844. }
  845.  
  846. void rend_copy16_nf(unsigned char *dst, unsigned pitch)
  847. {
  848.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  849.    for (unsigned y = 0; y < temp.scy; y++) {
  850.       line16_nf(dst, src, t.sctab16_nf[0]);
  851.       dst += pitch, src += delta;
  852.    }
  853. }
  854.  
  855. void rend_copy16d1(unsigned char *dst, unsigned pitch)
  856. {
  857.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  858.    for (unsigned y = 0; y < temp.scy; y++) {
  859.       line16d(dst, src, t.sctab16d[0]); dst += pitch;
  860.       src += delta;
  861.    }
  862. }
  863.  
  864. void rend_copy16d(unsigned char *dst, unsigned pitch)
  865. {
  866.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  867.    for (unsigned y = 0; y < temp.scy; y++) {
  868.       line16d(dst, src, t.sctab16d[0]); dst += pitch;
  869.       line16d(dst, src, t.sctab16d[1]); dst += pitch;
  870.       src += delta;
  871.    }
  872. }
  873.  
  874. void rend_copy16t(unsigned char *dst, unsigned pitch)
  875. {
  876.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  877.    for (unsigned y = 0; y < temp.scy; y++) {
  878.       line16t(dst, src, t.sctab16d[0]); dst += pitch;
  879.       line16t(dst, src, t.sctab16d[0]); dst += pitch;
  880.       line16t(dst, src, t.sctab16d[0]); dst += pitch;
  881.       src += delta;
  882.    }
  883. }
  884.  
  885. void rend_copy16q(unsigned char *dst, unsigned pitch)
  886. {
  887.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  888.    for (unsigned y = 0; y < temp.scy; y++) {
  889.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  890.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  891.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  892.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  893.       src += delta;
  894.    }
  895. }
  896.  
  897. void rend_copy16d1_nf(unsigned char *dst, unsigned pitch)
  898. {
  899.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  900.    for (unsigned y = 0; y < temp.scy; y++) {
  901.       line16d_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  902.       src += delta;
  903.    }
  904. }
  905.  
  906. void rend_copy16d_nf(unsigned char *dst, unsigned pitch)
  907. {
  908.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  909.    if (conf.alt_nf) {
  910.       int offset = rb2_offs;
  911.       if (comp.frame_counter & 1) src += rb2_offs, offset = -offset;
  912.       for (unsigned y = 0; y < temp.scy; y++) {
  913.          line16d(dst, src, t.sctab16d[0]); dst += pitch;
  914.          line16d(dst, src+offset, t.sctab16d[0]); dst += pitch;
  915.          src += delta;
  916.       }
  917.    } else {
  918.       unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  919.       for (unsigned y = 0; y < temp.scy; y++) {
  920.          line16d_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  921.          line16d_nf(dst, src, t.sctab16d_nf[1]); dst += pitch;
  922.          src += delta;
  923.       }
  924.    }
  925. }
  926.  
  927. void rend_copy16t_nf(unsigned char *dst, unsigned pitch)
  928. {
  929.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  930.    for (unsigned y = 0; y < temp.scy; y++) {
  931.       line16t_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  932.       line16t_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  933.       line16t_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  934.       src += delta;
  935.    }
  936. }
  937.  
  938. void rend_copy16q_nf(unsigned char *dst, unsigned pitch)
  939. {
  940.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  941.    for (unsigned y = 0; y < temp.scy; y++) {
  942.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  943.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  944.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  945.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  946.       src += delta;
  947.    }
  948. }
  949.  
  950. void __fastcall rend_copy8(unsigned char *dst, unsigned pitch)
  951. {
  952.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  953.    for (unsigned y = 0; y < temp.scy; y++) {
  954.       line8(dst, src, t.sctab8[0]);
  955.       dst += pitch, src += delta;
  956.    }
  957. }
  958.  
  959. void __fastcall rend_copy8_nf(unsigned char *dst, unsigned pitch)
  960. {
  961.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  962.    for (unsigned y = 0; y < temp.scy; y++) {
  963.       line8_nf(dst, src, t.sctab8[0]);
  964.       dst += pitch, src += delta;
  965.    }
  966. }
  967.  
  968. void rend_copy8d1(unsigned char *dst, unsigned pitch)
  969. {
  970.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  971.    for (unsigned y = 0; y < temp.scy; y++) {
  972.       line8d(dst, src, t.sctab8d[0]); dst += pitch;
  973.       src += delta;
  974.    }
  975. }
  976.  
  977. void rend_copy8d(unsigned char *dst, unsigned pitch)
  978. {
  979.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  980.    for (unsigned y = 0; y < temp.scy; y++) {
  981.       line8d(dst, src, t.sctab8d[0]); dst += pitch;
  982.       line8d(dst, src, t.sctab8d[1]); dst += pitch;
  983.       src += delta;
  984.    }
  985. }
  986.  
  987. void rend_copy8t(unsigned char *dst, unsigned pitch)
  988. {
  989.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  990.    for (unsigned y = 0; y < temp.scy; y++) {
  991.       line8t(dst, src, t.sctab8q); dst += pitch;
  992.       line8t(dst, src, t.sctab8q); dst += pitch;
  993.       line8t(dst, src, t.sctab8q); dst += pitch;
  994.       src += delta;
  995.    }
  996. }
  997.  
  998. void rend_copy8q(unsigned char *dst, unsigned pitch)
  999. {
  1000.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1001.    for (unsigned y = 0; y < temp.scy; y++) {
  1002.       line8q(dst, src, t.sctab8q); dst += pitch;
  1003.       line8q(dst, src, t.sctab8q); dst += pitch;
  1004.       line8q(dst, src, t.sctab8q); dst += pitch;
  1005.       line8q(dst, src, t.sctab8q); dst += pitch;
  1006.       src += delta;
  1007.    }
  1008. }
  1009.  
  1010. void rend_copy8d1_nf(unsigned char *dst, unsigned pitch)
  1011. {
  1012.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1013.    for (unsigned y = 0; y < temp.scy; y++) {
  1014.       line8d_nf(dst, src, t.sctab8d[0]); dst += pitch;
  1015.       src += delta;
  1016.    }
  1017. }
  1018.  
  1019. void rend_copy8d_nf(unsigned char *dst, unsigned pitch)
  1020. {
  1021.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1022.    if (conf.alt_nf) {
  1023.       int offset = rb2_offs;
  1024.       if (comp.frame_counter & 1) src += rb2_offs, offset = -offset;
  1025.       for (unsigned y = 0; y < temp.scy; y++) {
  1026.          line8d(dst, src, t.sctab8d[0]); dst += pitch;
  1027.          line8d(dst, src+offset, t.sctab8d[0]); dst += pitch;
  1028.          src += delta;
  1029.       }
  1030.    } else {
  1031.       for (unsigned y = 0; y < temp.scy; y++) {
  1032.          line8d_nf(dst, src, t.sctab8d[0]); dst += pitch;
  1033.          line8d_nf(dst, src, t.sctab8d[1]); dst += pitch;
  1034.          src += delta;
  1035.       }
  1036.    }
  1037. }
  1038.  
  1039. void rend_copy8t_nf(unsigned char *dst, unsigned pitch)
  1040. {
  1041.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1042.    for (unsigned y = 0; y < temp.scy; y++) {
  1043.       line8t_nf(dst, src, t.sctab8q); dst += pitch;
  1044.       line8t_nf(dst, src, t.sctab8q); dst += pitch;
  1045.       line8t_nf(dst, src, t.sctab8q); dst += pitch;
  1046.       src += delta;
  1047.    }
  1048. }
  1049.  
  1050. void rend_copy8q_nf(unsigned char *dst, unsigned pitch)
  1051. {
  1052.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1053.    for (unsigned y = 0; y < temp.scy; y++) {
  1054.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1055.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1056.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1057.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1058.       src += delta;
  1059.    }
  1060. }
  1061.