Subversion Repositories pentevo

Rev

Rev 883 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed | ?url?

  1. #include "std.h"
  2.  
  3. #include "emul.h"
  4. #include "vars.h"
  5. #include "draw.h"
  6. #include "dxrcopy.h"
  7.  
  8. // #define QUAD_BUFFER  // tests show that this variant is slower, even in noflic mode
  9.  
  10. void line32_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  11. {
  12.    for (unsigned x = 0; x < temp.scx*4; x += 32) {
  13.       unsigned char byte = *src;
  14.       unsigned *t1 = tab + src[1];
  15.       unsigned char byt1 = src[rb2_offs];
  16.       unsigned *t2 = tab + src[rb2_offs+1];
  17.       src += 2;
  18.  
  19.       *(unsigned*)(dst+x)    = t1[(byte << 1) & 0x100] +
  20.                                t2[(byt1 << 1) & 0x100];
  21.       *(unsigned*)(dst+x+4)  = t1[(byte << 2) & 0x100] +
  22.                                t2[(byt1 << 2) & 0x100];
  23.       *(unsigned*)(dst+x+8)  = t1[(byte << 3) & 0x100] +
  24.                                t2[(byt1 << 3) & 0x100];
  25.       *(unsigned*)(dst+x+12) = t1[(byte << 4) & 0x100] +
  26.                                t2[(byt1 << 4) & 0x100];
  27.       *(unsigned*)(dst+x+16) = t1[(byte << 5) & 0x100] +
  28.                                t2[(byt1 << 5) & 0x100];
  29.       *(unsigned*)(dst+x+20) = t1[(byte << 6) & 0x100] +
  30.                                t2[(byt1 << 6) & 0x100];
  31.       *(unsigned*)(dst+x+24) = t1[(byte << 7) & 0x100] +
  32.                                t2[(byt1 << 7) & 0x100];
  33.       *(unsigned*)(dst+x+28) = t1[(byte << 8) & 0x100] +
  34.                                t2[(byt1 << 8) & 0x100];
  35.    }
  36. }
  37.  
  38. static void line32d_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  39. {
  40.    for (unsigned x = 0; x < temp.scx*8; x += 64) {
  41.       unsigned char byte = *src;
  42.       unsigned *t1 = tab + src[1];
  43.       unsigned char byt1 = src[rb2_offs];
  44.       unsigned *t2 = tab + src[rb2_offs+1];
  45.       src += 2;
  46.  
  47.       *(unsigned*)(dst+x)    =
  48.       *(unsigned*)(dst+x+4)  =
  49.                                t1[(byte << 1) & 0x100] +
  50.                                t2[(byt1 << 1) & 0x100];
  51.       *(unsigned*)(dst+x+8)  =
  52.       *(unsigned*)(dst+x+12) =
  53.                                t1[(byte << 2) & 0x100] +
  54.                                t2[(byt1 << 2) & 0x100];
  55.       *(unsigned*)(dst+x+16)  =
  56.       *(unsigned*)(dst+x+20)  =
  57.                                t1[(byte << 3) & 0x100] +
  58.                                t2[(byt1 << 3) & 0x100];
  59.       *(unsigned*)(dst+x+24) =
  60.       *(unsigned*)(dst+x+28) =
  61.                                t1[(byte << 4) & 0x100] +
  62.                                t2[(byt1 << 4) & 0x100];
  63.       *(unsigned*)(dst+x+32) =
  64.       *(unsigned*)(dst+x+36) =
  65.                                t1[(byte << 5) & 0x100] +
  66.                                t2[(byt1 << 5) & 0x100];
  67.       *(unsigned*)(dst+x+40) =
  68.       *(unsigned*)(dst+x+44) =
  69.                                t1[(byte << 6) & 0x100] +
  70.                                t2[(byt1 << 6) & 0x100];
  71.       *(unsigned*)(dst+x+48) =
  72.       *(unsigned*)(dst+x+52) =
  73.                                t1[(byte << 7) & 0x100] +
  74.                                t2[(byt1 << 7) & 0x100];
  75.       *(unsigned*)(dst+x+56) =
  76.       *(unsigned*)(dst+x+60) =
  77.                                t1[(byte << 8) & 0x100] +
  78.                                t2[(byt1 << 8) & 0x100];
  79.    }
  80. }
  81.  
  82. static void line32t_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  83. {
  84.    u32 *d = (u32 *)dst;
  85.    for (unsigned x = 0,  i = 0; x < temp.scx*3; x += 24, i += 2)
  86.    {
  87.       u8 byte1 = src[i+0];
  88.       unsigned *t1 = tab + src[i+1];
  89.       u8 byte2 = src[i+rb2_offs];
  90.       unsigned *t2 = tab + src[i+rb2_offs+1];
  91.  
  92.       u32 paper1 = t1[0];
  93.       u32 ink1 = t1[0x100];
  94.  
  95.       u32 paper2 = t2[0];
  96.       u32 ink2 = t2[0x100];
  97.  
  98.       d[x+0]  =
  99.       d[x+1]  =
  100.       d[x+2]  = ((byte1 & 0x80) ? ink1 : paper1) + ((byte2 & 0x80) ? ink2 : paper2);
  101.  
  102.       d[x+3]  =
  103.       d[x+4]  =
  104.       d[x+5]  = ((byte1 & 0x40) ? ink1 : paper1) + ((byte2 & 0x40) ? ink2 : paper2);
  105.  
  106.       d[x+6]  =
  107.       d[x+7]  =
  108.       d[x+8]  = ((byte1 & 0x20) ? ink1 : paper1) + ((byte2 & 0x20) ? ink2 : paper2);
  109.  
  110.       d[x+9]  =
  111.       d[x+10] =
  112.       d[x+11] = ((byte1 & 0x10) ? ink1 : paper1) + ((byte2 & 0x10) ? ink2 : paper2);
  113.  
  114.       d[x+12] =
  115.       d[x+13] =
  116.       d[x+14] = ((byte1 & 0x08) ? ink1 : paper1) + ((byte2 & 0x08) ? ink2 : paper2);
  117.  
  118.       d[x+15] =
  119.       d[x+16] =
  120.       d[x+17] = ((byte1 & 0x04) ? ink1 : paper1) + ((byte2 & 0x04) ? ink2 : paper2);
  121.  
  122.       d[x+18] =
  123.       d[x+19] =
  124.       d[x+20] = ((byte1 & 0x02) ? ink1 : paper1) + ((byte2 & 0x02) ? ink2 : paper2);
  125.  
  126.       d[x+21] =
  127.       d[x+22] =
  128.       d[x+23] = ((byte1 & 0x01) ? ink1 : paper1) + ((byte2 & 0x01) ? ink2 : paper2);
  129.    }
  130. }
  131.  
  132. static void line32q_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  133. {
  134.    for (unsigned x = 0; x < temp.scx*16; x += 128) {
  135.       unsigned char byte = *src;
  136.       unsigned *t1 = tab + src[1];
  137.       unsigned char byt1 = src[rb2_offs];
  138.       unsigned *t2 = tab + src[rb2_offs+1];
  139.       src += 2;
  140.  
  141.       *(unsigned*)(dst+x+0x00) =
  142.       *(unsigned*)(dst+x+0x04) =
  143.       *(unsigned*)(dst+x+0x08) =
  144.       *(unsigned*)(dst+x+0x0C) =
  145.                                t1[(byte << 1) & 0x100] +
  146.                                t2[(byt1 << 1) & 0x100];
  147.       *(unsigned*)(dst+x+0x10) =
  148.       *(unsigned*)(dst+x+0x14) =
  149.       *(unsigned*)(dst+x+0x18) =
  150.       *(unsigned*)(dst+x+0x1C) =
  151.                                t1[(byte << 2) & 0x100] +
  152.                                t2[(byt1 << 2) & 0x100];
  153.       *(unsigned*)(dst+x+0x20) =
  154.       *(unsigned*)(dst+x+0x24) =
  155.       *(unsigned*)(dst+x+0x28) =
  156.       *(unsigned*)(dst+x+0x2C) =
  157.                                t1[(byte << 3) & 0x100] +
  158.                                t2[(byt1 << 3) & 0x100];
  159.       *(unsigned*)(dst+x+0x30) =
  160.       *(unsigned*)(dst+x+0x34) =
  161.       *(unsigned*)(dst+x+0x38) =
  162.       *(unsigned*)(dst+x+0x3C) =
  163.                                t1[(byte << 4) & 0x100] +
  164.                                t2[(byt1 << 4) & 0x100];
  165.       *(unsigned*)(dst+x+0x40) =
  166.       *(unsigned*)(dst+x+0x44) =
  167.       *(unsigned*)(dst+x+0x48) =
  168.       *(unsigned*)(dst+x+0x4C) =
  169.                                t1[(byte << 5) & 0x100] +
  170.                                t2[(byt1 << 5) & 0x100];
  171.       *(unsigned*)(dst+x+0x50) =
  172.       *(unsigned*)(dst+x+0x54) =
  173.       *(unsigned*)(dst+x+0x58) =
  174.       *(unsigned*)(dst+x+0x5C) =
  175.                                t1[(byte << 6) & 0x100] +
  176.                                t2[(byt1 << 6) & 0x100];
  177.       *(unsigned*)(dst+x+0x60) =
  178.       *(unsigned*)(dst+x+0x64) =
  179.       *(unsigned*)(dst+x+0x68) =
  180.       *(unsigned*)(dst+x+0x6C) =
  181.                                t1[(byte << 7) & 0x100] +
  182.                                t2[(byt1 << 7) & 0x100];
  183.       *(unsigned*)(dst+x+0x70) =
  184.       *(unsigned*)(dst+x+0x74) =
  185.       *(unsigned*)(dst+x+0x78) =
  186.       *(unsigned*)(dst+x+0x7C) =
  187.                                t1[(byte << 8) & 0x100] +
  188.                                t2[(byt1 << 8) & 0x100];
  189.    }
  190. }
  191.  
  192. #ifdef MOD_SSE2
  193. void line32(unsigned char *dst, unsigned char *src, unsigned *tab)
  194. {
  195.    __m128i *d = (__m128i *)dst;
  196.    __m128i m1, m2;
  197.    m1 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
  198.    m2 = _mm_set_epi32(0x1, 0x2, 0x4, 0x8);
  199.  
  200.    for (unsigned x = 0,  i = 0; x < temp.scx / 4; x += 2,  i += 2)
  201.    {
  202.       unsigned byte = src[i];
  203.       unsigned attr = src[i+1];
  204.       unsigned ink = tab[attr + 0x100];
  205.       unsigned paper = tab[attr];
  206.  
  207.       __m128i b, b1, b2;
  208.       __m128i r1, r2;
  209.       __m128i iv, pv;
  210.       __m128i im1, pm1, im2, pm2;
  211.       __m128i vr1, vr2;
  212.  
  213.       b = _mm_set1_epi32(int(byte));
  214.       iv = _mm_set1_epi32(int(ink));
  215.       pv = _mm_set1_epi32(int(paper));
  216.  
  217.       b1 = _mm_and_si128(b, m1);
  218.       r1 = _mm_cmpeq_epi32(b1, m1);
  219.       im1 = _mm_and_si128(r1, iv);
  220.       pm1 = _mm_andnot_si128(r1, pv);
  221.       vr1 = _mm_or_si128(im1, pm1);
  222.       _mm_store_si128(&d[x], vr1);
  223.  
  224.       b2 = _mm_and_si128(b, m2);
  225.       r2 = _mm_cmpeq_epi32(b2, m2);
  226.       im2 = _mm_and_si128(r2, iv);
  227.       pm2 = _mm_andnot_si128(r2, pv);
  228.       vr2 = _mm_or_si128(im2, pm2);
  229.       _mm_store_si128(&d[x+1], vr2);
  230.    }
  231. }
  232. #else
  233. void line32(unsigned char *dst, unsigned char *src, unsigned *tab)
  234. {
  235.    unsigned *d = (unsigned *)dst;
  236.    for (unsigned x = 0,  i = 0; x < temp.scx; x += 8,  i += 2)
  237.    {
  238.       unsigned byte = src[i];
  239.       unsigned attr = src[i+1];
  240.       unsigned ink = tab[attr + 0x100];
  241.       unsigned paper = tab[attr];
  242.  
  243.       d[x]   = (byte & 0x80) ? ink : paper; // 7
  244.       d[x+1] = (byte & 0x40) ? ink : paper; // 6
  245.       d[x+2] = (byte & 0x20) ? ink : paper; // 5
  246.       d[x+3] = (byte & 0x10) ? ink : paper; // 4
  247.  
  248.       d[x+4] = (byte & 0x08) ? ink : paper; // 3
  249.       d[x+5] = (byte & 0x04) ? ink : paper; // 2
  250.       d[x+6] = (byte & 0x02) ? ink : paper; // 1
  251.       d[x+7] = (byte & 0x01) ? ink : paper; // 0
  252.    }
  253. }
  254. #endif
  255.  
  256. #ifdef MOD_SSE2
  257. void line32d(unsigned char *dst, unsigned char *src, unsigned *tab)
  258. {
  259.    __m128i *d = (__m128i *)dst;
  260.    __m128i m1, m2;
  261.    m1 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
  262.    m2 = _mm_set_epi32(0x1, 0x2, 0x4, 0x8);
  263.  
  264.    for (unsigned x = 0,  i = 0; x < temp.scx / 2; x += 4,  i += 2)
  265.    {
  266.       unsigned byte = src[i];
  267.       unsigned attr = src[i+1];
  268.       unsigned ink = tab[attr + 0x100];
  269.       unsigned paper = tab[attr];
  270.  
  271.       __m128i b, b1, b2;
  272.       __m128i r1, r2;
  273.       __m128i iv, pv;
  274.       __m128i im1, pm1, im2, pm2;
  275.       __m128i vr1, vr2;
  276.       __m128i l1, l2;
  277.       __m128i h1, h2;
  278.  
  279.       b = _mm_set1_epi32(int(byte));
  280.       iv = _mm_set1_epi32(int(ink));
  281.       pv = _mm_set1_epi32(int(paper));
  282.  
  283.       b1 = _mm_and_si128(b, m1);
  284.       r1 = _mm_cmpeq_epi32(b1, m1);
  285.       im1 = _mm_and_si128(r1, iv);
  286.       pm1 = _mm_andnot_si128(r1, pv);
  287.       vr1 = _mm_or_si128(im1, pm1);
  288.  
  289.       l1 = _mm_unpacklo_epi32(vr1, vr1);
  290.       _mm_store_si128(&d[x], l1);
  291.       h1 = _mm_unpackhi_epi32(vr1, vr1);
  292.       _mm_store_si128(&d[x+1], h1);
  293.  
  294.       b2 = _mm_and_si128(b, m2);
  295.       r2 = _mm_cmpeq_epi32(b2, m2);
  296.       im2 = _mm_and_si128(r2, iv);
  297.       pm2 = _mm_andnot_si128(r2, pv);
  298.       vr2 = _mm_or_si128(im2, pm2);
  299.  
  300.       l2 = _mm_unpacklo_epi32(vr2, vr2);
  301.       _mm_store_si128(&d[x+2], l2);
  302.       h2 = _mm_unpackhi_epi32(vr2, vr2);
  303.       _mm_store_si128(&d[x+3], h2);
  304.    }
  305. }
  306. #else
  307. void line32d(unsigned char *dst, unsigned char *src, unsigned *tab)
  308. {
  309.    unsigned *d = (unsigned *)dst;
  310.    for (unsigned x = 0, i = 0; x < temp.scx * 2; x += 16, i+= 2)
  311.    {
  312.       // [vv] ╥ръющ яюЁ фюъ чряшёш яючтюы хЄ icl ухэхЁшЁютрЄ№ cmovcc тьхёЄю jcc
  313.       unsigned char byte = src[i];
  314.       unsigned char attr = src[i+1];
  315.       unsigned ink = tab[attr + 0x100];
  316.       unsigned paper = tab[attr];
  317.  
  318.       d[x]    = d[x+1]  = (byte & 0x80) ? ink : paper; // 7
  319.       d[x+2]  = d[x+3]  = (byte & 0x40) ? ink : paper; // 6
  320.       d[x+4]  = d[x+5]  = (byte & 0x20) ? ink : paper; // 5
  321.       d[x+6]  = d[x+7]  = (byte & 0x10) ? ink : paper; // 4
  322.       d[x+8]  = d[x+9]  = (byte & 0x08) ? ink : paper; // 3
  323.       d[x+10] = d[x+11] = (byte & 0x04) ? ink : paper; // 2
  324.       d[x+12] = d[x+13] = (byte & 0x02) ? ink : paper; // 1
  325.       d[x+14] = d[x+15] = (byte & 0x01) ? ink : paper; // 0
  326.    }
  327. }
  328. #endif
  329.  
  330. static void line32t(unsigned char *dst, const unsigned char *src, const unsigned *tab)
  331. {
  332.    unsigned *d = (unsigned *)dst;
  333.    for (unsigned x = 0, i = 0; x < temp.scx * 3; x += 3*8,  i += 2)
  334.    {
  335.       unsigned char byte = src[i];
  336.       unsigned attr = src[i + 1];
  337.       unsigned ink = tab[attr + 0x100];
  338.       unsigned paper = tab[attr];
  339.  
  340.       d[x]      = d[x + 1]  = d[x + 2]  = (byte & 0x80) ? ink : paper;
  341.       d[x + 3]  = d[x + 4]  = d[x + 5]  = (byte & 0x40) ? ink : paper;
  342.       d[x + 6]  = d[x + 7]  = d[x + 8]  = (byte & 0x20) ? ink : paper;
  343.       d[x + 9]  = d[x + 10] = d[x + 11] = (byte & 0x10) ? ink : paper;
  344.       d[x + 12] = d[x + 13] = d[x + 14] = (byte & 0x08) ? ink : paper;
  345.       d[x + 15] = d[x + 16] = d[x + 17] = (byte & 0x04) ? ink : paper;
  346.       d[x + 18] = d[x + 19] = d[x + 20] = (byte & 0x02) ? ink : paper;
  347.       d[x + 21] = d[x + 22] = d[x + 23] = (byte & 0x01) ? ink : paper;
  348.    }
  349. }
  350.  
  351. static void line32q(unsigned char *dst, unsigned char *src, unsigned *tab)
  352. {
  353.    for (unsigned x = 0; x < temp.scx*16; x += 128) {
  354.       unsigned char byte = *src++;
  355.       unsigned *t = tab + *src++;
  356.       *(unsigned*)(dst+x+0x00) =
  357.       *(unsigned*)(dst+x+0x04) =
  358.       *(unsigned*)(dst+x+0x08) =
  359.       *(unsigned*)(dst+x+0x0C) =
  360.                                t[(byte << 1) & 0x100];
  361.       *(unsigned*)(dst+x+0x10) =
  362.       *(unsigned*)(dst+x+0x14) =
  363.       *(unsigned*)(dst+x+0x18) =
  364.       *(unsigned*)(dst+x+0x1C) =
  365.                                t[(byte << 2) & 0x100];
  366.       *(unsigned*)(dst+x+0x20) =
  367.       *(unsigned*)(dst+x+0x24) =
  368.       *(unsigned*)(dst+x+0x28) =
  369.       *(unsigned*)(dst+x+0x2C) =
  370.                                t[(byte << 3) & 0x100];
  371.       *(unsigned*)(dst+x+0x30) =
  372.       *(unsigned*)(dst+x+0x34) =
  373.       *(unsigned*)(dst+x+0x38) =
  374.       *(unsigned*)(dst+x+0x3C) =
  375.                                t[(byte << 4) & 0x100];
  376.       *(unsigned*)(dst+x+0x40) =
  377.       *(unsigned*)(dst+x+0x44) =
  378.       *(unsigned*)(dst+x+0x48) =
  379.       *(unsigned*)(dst+x+0x4C) =
  380.                                t[(byte << 5) & 0x100];
  381.       *(unsigned*)(dst+x+0x50) =
  382.       *(unsigned*)(dst+x+0x54) =
  383.       *(unsigned*)(dst+x+0x58) =
  384.       *(unsigned*)(dst+x+0x5C) =
  385.                                t[(byte << 6) & 0x100];
  386.       *(unsigned*)(dst+x+0x60) =
  387.       *(unsigned*)(dst+x+0x64) =
  388.       *(unsigned*)(dst+x+0x68) =
  389.       *(unsigned*)(dst+x+0x6C) =
  390.                                t[(byte << 7) & 0x100];
  391.       *(unsigned*)(dst+x+0x70) =
  392.       *(unsigned*)(dst+x+0x74) =
  393.       *(unsigned*)(dst+x+0x78) =
  394.       *(unsigned*)(dst+x+0x7C) =
  395.                                t[(byte << 8) & 0x100];
  396.    }
  397. }
  398.  
  399. void line16_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  400. {
  401.    for (unsigned x = 0; x < temp.scx*2; x += 32) {
  402.       unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
  403.       unsigned r = *(unsigned*)(src + rb2_offs), atr2 = (r >> 6) & 0x3FC;
  404.       *(unsigned*)(dst+x)   = (tab[((s >> 6) & 3) + attr]) +
  405.                               (tab[((r >> 6) & 3) + atr2]);
  406.       *(unsigned*)(dst+x+4) = (tab[((s >> 4) & 3) + attr]) +
  407.                               (tab[((r >> 4) & 3) + atr2]);
  408.       *(unsigned*)(dst+x+8) = (tab[((s >> 2) & 3) + attr]) +
  409.                               (tab[((r >> 2) & 3) + atr2]);
  410.       *(unsigned*)(dst+x+12)= (tab[((s >> 0) & 3) + attr]) +
  411.                               (tab[((r >> 0) & 3) + atr2]);
  412.       attr = (s >> 22) & 0x3FC; atr2 = (r >> 22) & 0x3FC;
  413.       *(unsigned*)(dst+x+16)= (tab[((s >>22) & 3) + attr]) +
  414.                               (tab[((r >>22) & 3) + atr2]);
  415.       *(unsigned*)(dst+x+20)= (tab[((s >>20) & 3) + attr]) +
  416.                               (tab[((r >>20) & 3) + atr2]);
  417.       *(unsigned*)(dst+x+24)= (tab[((s >>18) & 3) + attr]) +
  418.                               (tab[((r >>18) & 3) + atr2]);
  419.       *(unsigned*)(dst+x+28)= (tab[((s >>16) & 3) + attr]) +
  420.                               (tab[((r >>16) & 3) + atr2]);
  421.       src += 4;
  422.    }
  423. }
  424.  
  425. #define line16d_nf line32_nf
  426.  
  427. #define line16q line32d
  428. #define line16q_nf line32d_nf
  429.  
  430. static void line16t(unsigned char *dst, unsigned char *src, unsigned *tab)
  431. {
  432.    u16 *d = (u16 *)dst;
  433.    for (unsigned x = 0; x < temp.scx*3; x += 24)
  434.    {
  435.       unsigned char byte = *src++;
  436.       unsigned *t = tab + *src++;
  437.       u16 paper_yu = u16(t[0]);
  438.       u16 paper_yv = u16(t[0] >> 16);
  439.       u16 ink_yu = u16(t[0x100]);
  440.       u16 ink_yv = u16(t[0x100] >> 16);
  441.  
  442.       d[x+0]  = (byte & 0x80) ? ink_yu : paper_yu;
  443.       d[x+1]  = (byte & 0x80) ? ink_yv : paper_yv;
  444.       d[x+2]  = (byte & 0x80) ? ink_yu : paper_yu;
  445.  
  446.       d[x+3]  = (byte & 0x40) ? ink_yv : paper_yv;
  447.       d[x+4]  = (byte & 0x40) ? ink_yu : paper_yu;
  448.       d[x+5]  = (byte & 0x40) ? ink_yv : paper_yv;
  449.  
  450.       d[x+6]  = (byte & 0x20) ? ink_yu : paper_yu;
  451.       d[x+7]  = (byte & 0x20) ? ink_yv : paper_yv;
  452.       d[x+8]  = (byte & 0x20) ? ink_yu : paper_yu;
  453.  
  454.       d[x+9]  = (byte & 0x10) ? ink_yv : paper_yv;
  455.       d[x+10] = (byte & 0x10) ? ink_yu : paper_yu;
  456.       d[x+11] = (byte & 0x10) ? ink_yv : paper_yv;
  457.  
  458.       d[x+12] = (byte & 0x08) ? ink_yu : paper_yu;
  459.       d[x+13] = (byte & 0x08) ? ink_yv : paper_yv;
  460.       d[x+14] = (byte & 0x08) ? ink_yu : paper_yu;
  461.  
  462.       d[x+15] = (byte & 0x04) ? ink_yv : paper_yv;
  463.       d[x+16] = (byte & 0x04) ? ink_yu : paper_yu;
  464.       d[x+17] = (byte & 0x04) ? ink_yv : paper_yv;
  465.  
  466.       d[x+18] = (byte & 0x02) ? ink_yu : paper_yu;
  467.       d[x+19] = (byte & 0x02) ? ink_yv : paper_yv;
  468.       d[x+20] = (byte & 0x02) ? ink_yu : paper_yu;
  469.  
  470.       d[x+21] = (byte & 0x01) ? ink_yv : paper_yv;
  471.       d[x+22] = (byte & 0x01) ? ink_yu : paper_yu;
  472.       d[x+23] = (byte & 0x01) ? ink_yv : paper_yv;
  473.    }
  474. }
  475.  
  476. static void line16t_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  477. {
  478.    u16 *d = (u16 *)dst;
  479.    for (unsigned x = 0,  i = 0; x < temp.scx*3; x += 24, i += 2)
  480.    {
  481.       u8 byte1 = src[i+0];
  482.       unsigned *t1 = tab + src[i+1];
  483.       u8 byte2 = src[i+rb2_offs];
  484.       unsigned *t2 = tab + src[i+rb2_offs+1];
  485.  
  486.       u16 paper_yu1 = u16(t1[0]);
  487.       u16 paper_yv1 = u16(t1[0] >> 16);
  488.       u16 ink_yu1 = u16(t1[0x100]);
  489.       u16 ink_yv1 = u16(t1[0x100] >> 16);
  490.  
  491.       u16 paper_yu2 = u16(t2[0]);
  492.       u16 paper_yv2 = u16(t2[0] >> 16);
  493.       u16 ink_yu2 = u16(t2[0x100]);
  494.       u16 ink_yv2 = u16(t2[0x100] >> 16);
  495.  
  496.       d[x+0]  = ((byte1 & 0x80) ? ink_yu1 : paper_yu1) + ((byte2 & 0x80) ? ink_yu2 : paper_yu2);
  497.       d[x+1]  = ((byte1 & 0x80) ? ink_yv1 : paper_yv1) + ((byte2 & 0x80) ? ink_yv2 : paper_yv2);
  498.       d[x+2]  = ((byte1 & 0x80) ? ink_yu1 : paper_yu1) + ((byte2 & 0x80) ? ink_yu2 : paper_yu2);
  499.  
  500.       d[x+3]  = ((byte1 & 0x40) ? ink_yv1 : paper_yv1) + ((byte2 & 0x40) ? ink_yv2 : paper_yv2);
  501.       d[x+4]  = ((byte1 & 0x40) ? ink_yu1 : paper_yu1) + ((byte2 & 0x40) ? ink_yu2 : paper_yu2);
  502.       d[x+5]  = ((byte1 & 0x40) ? ink_yv1 : paper_yv1) + ((byte2 & 0x40) ? ink_yv2 : paper_yv2);
  503.  
  504.       d[x+6]  = ((byte1 & 0x20) ? ink_yu1 : paper_yu1) + ((byte2 & 0x20) ? ink_yu2 : paper_yu2);
  505.       d[x+7]  = ((byte1 & 0x20) ? ink_yv1 : paper_yv1) + ((byte2 & 0x20) ? ink_yv2 : paper_yv2);
  506.       d[x+8]  = ((byte1 & 0x20) ? ink_yu1 : paper_yu1) + ((byte2 & 0x20) ? ink_yu2 : paper_yu2);
  507.  
  508.       d[x+9]  = ((byte1 & 0x10) ? ink_yv1 : paper_yv1) + ((byte2 & 0x10) ? ink_yv2 : paper_yv2);
  509.       d[x+10] = ((byte1 & 0x10) ? ink_yu1 : paper_yu1) + ((byte2 & 0x10) ? ink_yu2 : paper_yu2);
  510.       d[x+11] = ((byte1 & 0x10) ? ink_yv1 : paper_yv1) + ((byte2 & 0x10) ? ink_yv2 : paper_yv2);
  511.  
  512.       d[x+12] = ((byte1 & 0x08) ? ink_yu1 : paper_yu1) + ((byte2 & 0x08) ? ink_yu2 : paper_yu2);
  513.       d[x+13] = ((byte1 & 0x08) ? ink_yv1 : paper_yv1) + ((byte2 & 0x08) ? ink_yv2 : paper_yv2);
  514.       d[x+14] = ((byte1 & 0x08) ? ink_yu1 : paper_yu1) + ((byte2 & 0x08) ? ink_yu2 : paper_yu2);
  515.  
  516.       d[x+15] = ((byte1 & 0x04) ? ink_yv1 : paper_yv1) + ((byte2 & 0x04) ? ink_yv2 : paper_yv2);
  517.       d[x+16] = ((byte1 & 0x04) ? ink_yu1 : paper_yu1) + ((byte2 & 0x04) ? ink_yu2 : paper_yu2);
  518.       d[x+17] = ((byte1 & 0x04) ? ink_yv1 : paper_yv1) + ((byte2 & 0x04) ? ink_yv2 : paper_yv2);
  519.  
  520.       d[x+18] = ((byte1 & 0x02) ? ink_yu1 : paper_yu1) + ((byte2 & 0x02) ? ink_yu2 : paper_yu2);
  521.       d[x+19] = ((byte1 & 0x02) ? ink_yv1 : paper_yv1) + ((byte2 & 0x02) ? ink_yv2 : paper_yv2);
  522.       d[x+20] = ((byte1 & 0x02) ? ink_yu1 : paper_yu1) + ((byte2 & 0x02) ? ink_yu2 : paper_yu2);
  523.  
  524.       d[x+21] = ((byte1 & 0x01) ? ink_yv1 : paper_yv1) + ((byte2 & 0x01) ? ink_yv2 : paper_yv2);
  525.       d[x+22] = ((byte1 & 0x01) ? ink_yu1 : paper_yu1) + ((byte2 & 0x01) ? ink_yu2 : paper_yu2);
  526.       d[x+23] = ((byte1 & 0x01) ? ink_yv1 : paper_yv1) + ((byte2 & 0x01) ? ink_yv2 : paper_yv2);
  527.    }
  528. }
  529.  
  530. void line8(unsigned char *dst, unsigned char *src, unsigned *tab)
  531. {
  532.    for (unsigned x = 0; x < temp.scx; x += 32) {
  533.       unsigned src0 = *(unsigned*)src, attr = (src0 >> 4) & 0xFF0;
  534.       *(unsigned*)(dst+x)    = tab[((src0 >> 4)  & 0xF) + attr];
  535.       *(unsigned*)(dst+x+4)  = tab[((src0 >> 0)  & 0xF) + attr];
  536.       attr = (src0 >> 20) & 0xFF0;
  537.       *(unsigned*)(dst+x+8)  = tab[((src0 >> 20) & 0xF) + attr];
  538.       *(unsigned*)(dst+x+12) = tab[((src0 >> 16) & 0xF) + attr];
  539.       src0 = *(unsigned*)(src + 4);
  540.       attr = (src0 >> 4) & 0xFF0;
  541.       *(unsigned*)(dst+x+16) = tab[((src0 >> 4)  & 0xF) + attr];
  542.       *(unsigned*)(dst+x+20) = tab[((src0 >> 0)  & 0xF) + attr];
  543.       attr = (src0 >> 20) & 0xFF0;
  544.       *(unsigned*)(dst+x+24) = tab[((src0 >> 20) & 0xF) + attr];
  545.       *(unsigned*)(dst+x+28) = tab[((src0 >> 16) & 0xF) + attr];
  546.       src += 8;
  547.    }
  548. }
  549.  
  550. void line8_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  551. {
  552.    for (unsigned x = 0; x < temp.scx; x += 32) {
  553.       unsigned s = *(unsigned*)src, attr = (s >> 4) & 0xFF0;
  554.       unsigned r = *(unsigned*)(src + rb2_offs), atr2 = (r >> 4) & 0xFF0;
  555.       *(unsigned*)(dst+x)    = (tab[((s >> 4)  & 0xF) + attr] & 0x0F0F0F0F) +
  556.                                (tab[((r >> 4)  & 0xF) + atr2] & 0xF0F0F0F0);
  557.       *(unsigned*)(dst+x+4)  = (tab[((s >> 0)  & 0xF) + attr] & 0x0F0F0F0F) +
  558.                                (tab[((r >> 0)  & 0xF) + atr2] & 0xF0F0F0F0);
  559.       attr = (s >> 20) & 0xFF0; atr2 = (r >> 20) & 0xFF0;
  560.       *(unsigned*)(dst+x+8)  = (tab[((s >> 20) & 0xF) + attr] & 0x0F0F0F0F) +
  561.                                (tab[((r >> 20) & 0xF) + atr2] & 0xF0F0F0F0);
  562.       *(unsigned*)(dst+x+12) = (tab[((s >> 16) & 0xF) + attr] & 0x0F0F0F0F) +
  563.                                (tab[((r >> 16) & 0xF) + atr2] & 0xF0F0F0F0);
  564.       s = *(unsigned*)(src + 4);
  565.       attr = (s >> 4) & 0xFF0;
  566.       r = *(unsigned*)(src + rb2_offs + 4);
  567.       atr2 = (r >> 4) & 0xFF0;
  568.       *(unsigned*)(dst+x+16) = (tab[((s >> 4)  & 0xF) + attr] & 0x0F0F0F0F) +
  569.                                (tab[((r >> 4)  & 0xF) + atr2] & 0xF0F0F0F0);
  570.       *(unsigned*)(dst+x+20) = (tab[((s >> 0)  & 0xF) + attr] & 0x0F0F0F0F) +
  571.                                (tab[((r >> 0)  & 0xF) + atr2] & 0xF0F0F0F0);
  572.       attr = (s >> 20) & 0xFF0; atr2 = (r >> 20) & 0xFF0;
  573.       *(unsigned*)(dst+x+24) = (tab[((s >> 20) & 0xF) + attr] & 0x0F0F0F0F) +
  574.                                (tab[((r >> 20) & 0xF) + atr2] & 0xF0F0F0F0);
  575.       *(unsigned*)(dst+x+28) = (tab[((s >> 16) & 0xF) + attr] & 0x0F0F0F0F) +
  576.                                (tab[((r >> 16) & 0xF) + atr2] & 0xF0F0F0F0);
  577.       src += 8;
  578.    }
  579. }
  580.  
  581. void line8d(unsigned char *dst, unsigned char *src, unsigned *tab)
  582. {
  583.    for (unsigned x = 0; x < temp.scx*2; x += 32) {
  584.       unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
  585.       *(unsigned*)(dst+x)   = tab[((s >> 6) & 3) + attr];
  586.       *(unsigned*)(dst+x+4) = tab[((s >> 4) & 3) + attr];
  587.       *(unsigned*)(dst+x+8) = tab[((s >> 2) & 3) + attr];
  588.       *(unsigned*)(dst+x+12)= tab[((s >> 0) & 3) + attr];
  589.       attr = (s >> 22) & 0x3FC;
  590.       *(unsigned*)(dst+x+16)= tab[((s >>22) & 3) + attr];
  591.       *(unsigned*)(dst+x+20)= tab[((s >>20) & 3) + attr];
  592.       *(unsigned*)(dst+x+24)= tab[((s >>18) & 3) + attr];
  593.       *(unsigned*)(dst+x+28)= tab[((s >>16) & 3) + attr];
  594.       src += 4;
  595.    }
  596. }
  597.  
  598.  
  599. void line8t(unsigned char *dst, unsigned char *src, unsigned *tab)
  600. {
  601.    for (unsigned x = 0; x < temp.scx*3; x += 24)
  602.    {
  603.       unsigned char byte = *src++;
  604.       unsigned *t = tab + *src++;
  605.       dst[x+0]  = dst[x+1]  = dst[x+2]  = u8(t[(byte << 1) & 0x100]);
  606.       dst[x+3]  = dst[x+4]  = dst[x+5]  = u8(t[(byte << 2) & 0x100]);
  607.       dst[x+6]  = dst[x+7]  = dst[x+8]  = u8(t[(byte << 3) & 0x100]);
  608.       dst[x+9]  = dst[x+10] = dst[x+11] = u8(t[(byte << 4) & 0x100]);
  609.       dst[x+12] = dst[x+13] = dst[x+14] = u8(t[(byte << 5) & 0x100]);
  610.       dst[x+15] = dst[x+16] = dst[x+17] = u8(t[(byte << 6) & 0x100]);
  611.       dst[x+18] = dst[x+19] = dst[x+20] = u8(t[(byte << 7) & 0x100]);
  612.       dst[x+21] = dst[x+22] = dst[x+23] = u8(t[(byte << 8) & 0x100]);
  613.    }
  614. }
  615.  
  616. static void line8q(unsigned char *dst, unsigned char *src, unsigned *tab)
  617. {
  618.    for (unsigned x = 0; x < temp.scx*4; x += 32) {
  619.       unsigned char byte = *src++;
  620.       unsigned *t = tab + *src++;
  621.       *(unsigned*)(dst+x+0x00) = t[(byte << 1) & 0x100];
  622.       *(unsigned*)(dst+x+0x04) = t[(byte << 2) & 0x100];
  623.       *(unsigned*)(dst+x+0x08) = t[(byte << 3) & 0x100];
  624.       *(unsigned*)(dst+x+0x0C) = t[(byte << 4) & 0x100];
  625.       *(unsigned*)(dst+x+0x10) = t[(byte << 5) & 0x100];
  626.       *(unsigned*)(dst+x+0x14) = t[(byte << 6) & 0x100];
  627.       *(unsigned*)(dst+x+0x18) = t[(byte << 7) & 0x100];
  628.       *(unsigned*)(dst+x+0x1C) = t[(byte << 8) & 0x100];
  629.    }
  630. }
  631.  
  632. static void line8d_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  633. {
  634.    for (unsigned x = 0; x < temp.scx*2; x += 32) {
  635.       unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
  636.       unsigned r = *(unsigned*)(src + rb2_offs), atr2 = (r >> 6) & 0x3FC;
  637.       *(unsigned*)(dst+x)   = (tab[((s >> 6) & 3) + attr] & 0x0F0F0F0F) +
  638.                               (tab[((r >> 6) & 3) + atr2] & 0xF0F0F0F0);
  639.       *(unsigned*)(dst+x+4) = (tab[((s >> 4) & 3) + attr] & 0x0F0F0F0F) +
  640.                               (tab[((r >> 4) & 3) + atr2] & 0xF0F0F0F0);
  641.       *(unsigned*)(dst+x+8) = (tab[((s >> 2) & 3) + attr] & 0x0F0F0F0F) +
  642.                               (tab[((r >> 2) & 3) + atr2] & 0xF0F0F0F0);
  643.       *(unsigned*)(dst+x+12)= (tab[((s >> 0) & 3) + attr] & 0x0F0F0F0F) +
  644.                               (tab[((r >> 0) & 3) + atr2] & 0xF0F0F0F0);
  645.       attr = (s >> 22) & 0x3FC; atr2 = (r >> 22) & 0x3FC;
  646.       *(unsigned*)(dst+x+16)= (tab[((s >>22) & 3) + attr] & 0x0F0F0F0F) +
  647.                               (tab[((r >>22) & 3) + atr2] & 0xF0F0F0F0);
  648.       *(unsigned*)(dst+x+20)= (tab[((s >>20) & 3) + attr] & 0x0F0F0F0F) +
  649.                               (tab[((r >>20) & 3) + atr2] & 0xF0F0F0F0);
  650.       *(unsigned*)(dst+x+24)= (tab[((s >>18) & 3) + attr] & 0x0F0F0F0F) +
  651.                               (tab[((r >>18) & 3) + atr2] & 0xF0F0F0F0);
  652.       *(unsigned*)(dst+x+28)= (tab[((s >>16) & 3) + attr] & 0x0F0F0F0F) +
  653.                               (tab[((r >>16) & 3) + atr2] & 0xF0F0F0F0);
  654.       src += 4;
  655.    }
  656. }
  657.  
  658. static void line8t_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  659. {
  660.    for (unsigned x = 0, i = 0; x < temp.scx*3; x += 24, i += 2)
  661.    {
  662.       u32 byte1 = src[i+0];
  663.       u32 byte2 = src[i+rb2_offs+0];
  664.       unsigned *t1 = tab + src[i+1];
  665.       unsigned *t2 = tab + src[i+rb2_offs+1];
  666.       u8 ink1 = u8(t1[0x100] & 0x0F);
  667.       u8 ink2 = u8(t2[0x100] & 0xF0);
  668.       u8 paper1 = u8(t1[0] & 0x0F);
  669.       u8 paper2 = u8(t2[0] & 0xF0);
  670.  
  671.       dst[x+0]  = dst[x+1]  = dst[x+2]  = ((byte1 & 0x80) ? ink1 : paper1) + ((byte2 & 0x80) ? ink2 : paper2);
  672.       dst[x+3]  = dst[x+4]  = dst[x+5]  = ((byte1 & 0x40) ? ink1 : paper1) + ((byte2 & 0x40) ? ink2 : paper2);
  673.       dst[x+6]  = dst[x+7]  = dst[x+8]  = ((byte1 & 0x20) ? ink1 : paper1) + ((byte2 & 0x20) ? ink2 : paper2);
  674.       dst[x+9]  = dst[x+10] = dst[x+11] = ((byte1 & 0x10) ? ink1 : paper1) + ((byte2 & 0x10) ? ink2 : paper2);
  675.       dst[x+12] = dst[x+13] = dst[x+14] = ((byte1 & 0x08) ? ink1 : paper1) + ((byte2 & 0x08) ? ink2 : paper2);
  676.       dst[x+15] = dst[x+16] = dst[x+17] = ((byte1 & 0x04) ? ink1 : paper1) + ((byte2 & 0x04) ? ink2 : paper2);
  677.       dst[x+18] = dst[x+19] = dst[x+20] = ((byte1 & 0x02) ? ink1 : paper1) + ((byte2 & 0x02) ? ink2 : paper2);
  678.       dst[x+21] = dst[x+22] = dst[x+23] = ((byte1 & 0x01) ? ink1 : paper1) + ((byte2 & 0x01) ? ink2 : paper2);
  679.    }
  680. }
  681.  
  682. static void line8q_nf(unsigned char *dst, unsigned char *src, unsigned *tab)
  683. {
  684.    for (unsigned x = 0; x < temp.scx*4; x += 32) {
  685.       unsigned char byte1 = src[0], byte2 = src[rb2_offs+0];
  686.       unsigned *t1 = tab + src[1], *t2 = tab + src[rb2_offs+1];
  687.       src += 2;
  688.  
  689.       *(unsigned*)(dst+x+0x00) = (t1[(byte1 << 1) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 1) & 0x100] & 0xF0F0F0F0);
  690.       *(unsigned*)(dst+x+0x04) = (t1[(byte1 << 2) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 2) & 0x100] & 0xF0F0F0F0);
  691.       *(unsigned*)(dst+x+0x08) = (t1[(byte1 << 3) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 3) & 0x100] & 0xF0F0F0F0);
  692.       *(unsigned*)(dst+x+0x0C) = (t1[(byte1 << 4) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 4) & 0x100] & 0xF0F0F0F0);
  693.       *(unsigned*)(dst+x+0x10) = (t1[(byte1 << 5) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 5) & 0x100] & 0xF0F0F0F0);
  694.       *(unsigned*)(dst+x+0x14) = (t1[(byte1 << 6) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 6) & 0x100] & 0xF0F0F0F0);
  695.       *(unsigned*)(dst+x+0x18) = (t1[(byte1 << 7) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 7) & 0x100] & 0xF0F0F0F0);
  696.       *(unsigned*)(dst+x+0x1C) = (t1[(byte1 << 8) & 0x100] & 0x0F0F0F0F) + (t2[(byte2 << 8) & 0x100] & 0xF0F0F0F0);
  697.    }
  698. }
  699.  
  700.  
  701. void rend_copy32_nf(unsigned char *dst, unsigned pitch)
  702. {
  703.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  704.    for (unsigned y = 0; y < temp.scy; y++) {
  705.       line32_nf(dst, src, t.sctab32_nf[0]);
  706.       dst += pitch; src += delta;
  707.    }
  708. }
  709.  
  710. void rend_copy32(unsigned char *dst, unsigned pitch)
  711. {
  712.    unsigned char *src = rbuf;
  713.    unsigned delta = temp.scx / 4;
  714.    for (unsigned y = 0; y < temp.scy; y++)
  715.    {
  716.       line32(dst, src, t.sctab32[0]);
  717.       dst += pitch;
  718.       src += delta;
  719.    }
  720. }
  721.  
  722. void rend_copy32d1_nf(unsigned char *dst, unsigned pitch)
  723. {
  724.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  725.    for (unsigned y = 0; y < temp.scy; y++) {
  726.       line32d_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  727.       src += delta;
  728.    }
  729. }
  730.  
  731. void rend_copy32d_nf(unsigned char *dst, unsigned pitch)
  732. {
  733.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  734.    if (conf.alt_nf) {
  735.       int offset = rb2_offs;
  736.       if(comp.frame_counter & 1)
  737.       {
  738.           src += rb2_offs;
  739.           offset = -offset;
  740.       }
  741.       for (unsigned y = 0; y < temp.scy; y++) {
  742.          line32d(dst, src, t.sctab32[0]); dst += pitch;
  743.          line32d(dst, src+offset, t.sctab32[0]); dst += pitch;
  744.          src += delta;
  745.       }
  746.    } else {
  747.       for (unsigned y = 0; y < temp.scy; y++) {
  748.          line32d_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  749.          line32d_nf(dst, src, t.sctab32_nf[1]); dst += pitch;
  750.          src += delta;
  751.       }
  752.    }
  753. }
  754.  
  755. void rend_copy32t_nf(unsigned char *dst, unsigned pitch)
  756. {
  757.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  758.    for (unsigned y = 0; y < temp.scy; y++) {
  759.       line32t_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  760.       line32t_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  761.       line32t_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  762.       src += delta;
  763.    }
  764. }
  765.  
  766. void rend_copy32q_nf(unsigned char *dst, unsigned pitch)
  767. {
  768.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  769.    for (unsigned y = 0; y < temp.scy; y++) {
  770. #ifdef QUAD_BUFFER
  771.       unsigned char buffer[MAX_WIDTH*4*sizeof(DWORD)];
  772.       line32q_nf(buffer, src, t.sctab32_nf[0]);
  773.       for (int i = 0; i < 4; i++) {
  774.          memcpy(dst, buffer, temp.scx*16);
  775.          dst += pitch;
  776.       }
  777. #else
  778.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  779.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  780.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  781.       line32q_nf(dst, src, t.sctab32_nf[0]); dst += pitch;
  782. #endif
  783.       src += delta;
  784.    }
  785. }
  786.  
  787. void rend_copy32d1(unsigned char *dst, unsigned pitch)
  788. {
  789.    unsigned char *src = rbuf;
  790.    unsigned delta = temp.scx/4;
  791.    for (unsigned y = 0; y < temp.scy; y++)
  792.    {
  793.       line32d(dst, src, t.sctab32[0]); dst += pitch;
  794.       src += delta;
  795.    }
  796. }
  797.  
  798. void rend_copy32d(unsigned char *dst, unsigned pitch)
  799. {
  800.    unsigned char *src = rbuf;
  801.    unsigned delta = temp.scx / 4;
  802.    for (unsigned y = 0; y < temp.scy; y++)
  803.    {
  804.       line32d(dst, src, t.sctab32[0]); dst += pitch; // ╫хЄэ√х ёЄЁюъш
  805.       line32d(dst, src, t.sctab32[1]); dst += pitch; // ═хўхЄэ√х ёЄЁюъш
  806.       src += delta;
  807.    }
  808. }
  809.  
  810. void rend_copy32t(unsigned char *dst, unsigned pitch)
  811. {
  812.    unsigned char *src = rbuf;
  813.    unsigned delta = temp.scx / 4;
  814.    for (unsigned y = 0; y < temp.scy; y++)
  815.    {
  816.       line32t(dst, src, t.sctab32[0]); dst += pitch;
  817.       line32t(dst, src, t.sctab32[0]); dst += pitch;
  818.       line32t(dst, src, t.sctab32[0]); dst += pitch;
  819.       src += delta;
  820.    }
  821. }
  822.  
  823. void rend_copy32q(unsigned char *dst, unsigned pitch)
  824. {
  825.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  826.    for (unsigned y = 0; y < temp.scy; y++) {
  827. #ifdef QUAD_BUFFER
  828.       unsigned char buffer[MAX_WIDTH*4*sizeof(DWORD)];
  829.       line32q(buffer, src, t.sctab32[0]);
  830.       for (int i = 0; i < 4; i++) {
  831.          memcpy(dst, buffer, temp.scx*16);
  832.          dst += pitch;
  833.       }
  834. #else
  835.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  836.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  837.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  838.       line32q(dst, src, t.sctab32[0]); dst += pitch;
  839. #endif
  840.       src += delta;
  841.    }
  842. }
  843.  
  844. void rend_copy16(unsigned char *dst, unsigned pitch)
  845. {
  846.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  847.    for (unsigned y = 0; y < temp.scy; y++) {
  848.       line16(dst, src, t.sctab16[0]);
  849.       dst += pitch;
  850.       src += delta;
  851.    }
  852. }
  853.  
  854. void rend_copy16_nf(unsigned char *dst, unsigned pitch)
  855. {
  856.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  857.    for (unsigned y = 0; y < temp.scy; y++) {
  858.       line16_nf(dst, src, t.sctab16_nf[0]);
  859.       dst += pitch;
  860.       src += delta;
  861.    }
  862. }
  863.  
  864. void rend_copy16d1(unsigned char *dst, unsigned pitch)
  865. {
  866.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  867.    for (unsigned y = 0; y < temp.scy; y++) {
  868.       line16d(dst, src, t.sctab16d[0]); dst += pitch;
  869.       src += delta;
  870.    }
  871. }
  872.  
  873. void rend_copy16d(unsigned char *dst, unsigned pitch)
  874. {
  875.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  876.    for (unsigned y = 0; y < temp.scy; y++) {
  877.       line16d(dst, src, t.sctab16d[0]); dst += pitch;
  878.       line16d(dst, src, t.sctab16d[1]); dst += pitch;
  879.       src += delta;
  880.    }
  881. }
  882.  
  883. void rend_copy16t(unsigned char *dst, unsigned pitch)
  884. {
  885.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  886.    for (unsigned y = 0; y < temp.scy; y++) {
  887.       line16t(dst, src, t.sctab16d[0]); dst += pitch;
  888.       line16t(dst, src, t.sctab16d[0]); dst += pitch;
  889.       line16t(dst, src, t.sctab16d[0]); dst += pitch;
  890.       src += delta;
  891.    }
  892. }
  893.  
  894. void rend_copy16q(unsigned char *dst, unsigned pitch)
  895. {
  896.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  897.    for (unsigned y = 0; y < temp.scy; y++) {
  898.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  899.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  900.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  901.       line16q(dst, src, t.sctab16d[0]); dst += pitch;
  902.       src += delta;
  903.    }
  904. }
  905.  
  906. void rend_copy16d1_nf(unsigned char *dst, unsigned pitch)
  907. {
  908.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  909.    for (unsigned y = 0; y < temp.scy; y++) {
  910.       line16d_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  911.       src += delta;
  912.    }
  913. }
  914.  
  915. void rend_copy16d_nf(unsigned char *dst, unsigned pitch)
  916. {
  917.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  918.    if (conf.alt_nf) {
  919.       int offset = rb2_offs;
  920.       if(comp.frame_counter & 1)
  921.       {
  922.           src += rb2_offs;
  923.           offset = -offset;
  924.       }
  925.       for (unsigned y = 0; y < temp.scy; y++) {
  926.          line16d(dst, src, t.sctab16d[0]); dst += pitch;
  927.          line16d(dst, src+offset, t.sctab16d[0]); dst += pitch;
  928.          src += delta;
  929.       }
  930.    } else {
  931.       unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  932.       for (unsigned y = 0; y < temp.scy; y++) {
  933.          line16d_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  934.          line16d_nf(dst, src, t.sctab16d_nf[1]); dst += pitch;
  935.          src += delta;
  936.       }
  937.    }
  938. }
  939.  
  940. void rend_copy16t_nf(unsigned char *dst, unsigned pitch)
  941. {
  942.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  943.    for (unsigned y = 0; y < temp.scy; y++) {
  944.       line16t_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  945.       line16t_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  946.       line16t_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  947.       src += delta;
  948.    }
  949. }
  950.  
  951. void rend_copy16q_nf(unsigned char *dst, unsigned pitch)
  952. {
  953.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  954.    for (unsigned y = 0; y < temp.scy; y++) {
  955.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  956.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  957.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  958.       line16q_nf(dst, src, t.sctab16d_nf[0]); dst += pitch;
  959.       src += delta;
  960.    }
  961. }
  962.  
  963. void __fastcall rend_copy8(unsigned char *dst, unsigned pitch)
  964. {
  965.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  966.    for (unsigned y = 0; y < temp.scy; y++) {
  967.       line8(dst, src, t.sctab8[0]);
  968.       dst += pitch;
  969.       src += delta;
  970.    }
  971. }
  972.  
  973. void __fastcall rend_copy8_nf(unsigned char *dst, unsigned pitch)
  974. {
  975.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  976.    for (unsigned y = 0; y < temp.scy; y++) {
  977.       line8_nf(dst, src, t.sctab8[0]);
  978.       dst += pitch;
  979.       src += delta;
  980.    }
  981. }
  982.  
  983. void rend_copy8d1(unsigned char *dst, unsigned pitch)
  984. {
  985.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  986.    for (unsigned y = 0; y < temp.scy; y++) {
  987.       line8d(dst, src, t.sctab8d[0]); dst += pitch;
  988.       src += delta;
  989.    }
  990. }
  991.  
  992. void rend_copy8d(unsigned char *dst, unsigned pitch)
  993. {
  994.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  995.    for (unsigned y = 0; y < temp.scy; y++) {
  996.       line8d(dst, src, t.sctab8d[0]); dst += pitch;
  997.       line8d(dst, src, t.sctab8d[1]); dst += pitch;
  998.       src += delta;
  999.    }
  1000. }
  1001.  
  1002. void rend_copy8t(unsigned char *dst, unsigned pitch)
  1003. {
  1004.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1005.    for (unsigned y = 0; y < temp.scy; y++) {
  1006.       line8t(dst, src, t.sctab8q); dst += pitch;
  1007.       line8t(dst, src, t.sctab8q); dst += pitch;
  1008.       line8t(dst, src, t.sctab8q); dst += pitch;
  1009.       src += delta;
  1010.    }
  1011. }
  1012.  
  1013. void rend_copy8q(unsigned char *dst, unsigned pitch)
  1014. {
  1015.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1016.    for (unsigned y = 0; y < temp.scy; y++) {
  1017.       line8q(dst, src, t.sctab8q); dst += pitch;
  1018.       line8q(dst, src, t.sctab8q); dst += pitch;
  1019.       line8q(dst, src, t.sctab8q); dst += pitch;
  1020.       line8q(dst, src, t.sctab8q); dst += pitch;
  1021.       src += delta;
  1022.    }
  1023. }
  1024.  
  1025. void rend_copy8d1_nf(unsigned char *dst, unsigned pitch)
  1026. {
  1027.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1028.    for (unsigned y = 0; y < temp.scy; y++) {
  1029.       line8d_nf(dst, src, t.sctab8d[0]); dst += pitch;
  1030.       src += delta;
  1031.    }
  1032. }
  1033.  
  1034. void rend_copy8d_nf(unsigned char *dst, unsigned pitch)
  1035. {
  1036.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1037.    if (conf.alt_nf) {
  1038.       int offset = rb2_offs;
  1039.       if(comp.frame_counter & 1)
  1040.       {
  1041.           src += rb2_offs;
  1042.           offset = -offset;
  1043.       }
  1044.       for (unsigned y = 0; y < temp.scy; y++) {
  1045.          line8d(dst, src, t.sctab8d[0]); dst += pitch;
  1046.          line8d(dst, src+offset, t.sctab8d[0]); dst += pitch;
  1047.          src += delta;
  1048.       }
  1049.    } else {
  1050.       for (unsigned y = 0; y < temp.scy; y++) {
  1051.          line8d_nf(dst, src, t.sctab8d[0]); dst += pitch;
  1052.          line8d_nf(dst, src, t.sctab8d[1]); dst += pitch;
  1053.          src += delta;
  1054.       }
  1055.    }
  1056. }
  1057.  
  1058. void rend_copy8t_nf(unsigned char *dst, unsigned pitch)
  1059. {
  1060.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1061.    for (unsigned y = 0; y < temp.scy; y++) {
  1062.       line8t_nf(dst, src, t.sctab8q); dst += pitch;
  1063.       line8t_nf(dst, src, t.sctab8q); dst += pitch;
  1064.       line8t_nf(dst, src, t.sctab8q); dst += pitch;
  1065.       src += delta;
  1066.    }
  1067. }
  1068.  
  1069. void rend_copy8q_nf(unsigned char *dst, unsigned pitch)
  1070. {
  1071.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  1072.    for (unsigned y = 0; y < temp.scy; y++) {
  1073.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1074.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1075.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1076.       line8q_nf(dst, src, t.sctab8q); dst += pitch;
  1077.       src += delta;
  1078.    }
  1079. }
  1080.