Subversion Repositories pentevo

Rev

Rev 796 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed | ?url?

  1. #include "std.h"
  2.  
  3. #include "emul.h"
  4. #include "vars.h"
  5. #include "dxrend.h"
  6. #include "dxrcopy.h"
  7. #include "dxr_512.h"
  8. #include "dxr_4bpp.h"
  9. #include "dxr_prof.h"
  10. #include "dxr_atm.h"
  11. #include "draw.h"
  12. #include "util.h"
  13.  
  14. void rend_small(unsigned char *dst, unsigned pitch)
  15. {
  16.     if(temp.comp_pal_changed)
  17.     {
  18.         pixel_tables();
  19.         temp.comp_pal_changed = 0;
  20.     }
  21.  
  22.     if (temp.obpp == 8)  { rend_copy8 (dst, pitch); return; }
  23.     if (temp.obpp == 16) { rend_copy16(dst, pitch); return; }
  24.     if (temp.obpp == 32) { rend_copy32(dst, pitch); return; }
  25. }
  26.  
  27. void __fastcall render_small(unsigned char *dst, unsigned pitch)
  28. {
  29.    if (conf.noflic)
  30.    {
  31.       if (temp.obpp == 8)  { rend_copy8_nf (dst, pitch); }
  32.       if (temp.obpp == 16) { rend_copy16_nf(dst, pitch); }
  33.       if (temp.obpp == 32) { rend_copy32_nf(dst, pitch); }
  34.       memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  35.       return;
  36.    }
  37.  
  38.    if (comp.pEFF7 & EFF7_4BPP)
  39.    {
  40.        rend_p4bpp_small(dst, pitch);
  41.        return;
  42.    }
  43.  
  44.    if (conf.mem_model == MM_ATM450)
  45.    {
  46.        rend_atm_1_small(dst, pitch);
  47.        return;
  48.    }
  49.  
  50.    if (conf.mem_model == MM_ATM710 || conf.mem_model == MM_ATM3)
  51.    {
  52.        rend_atm_2_small(dst, pitch);
  53.        return;
  54.    }
  55.    rend_small(dst, pitch);
  56. }
  57.  
  58. void rend_dbl(unsigned char *dst, unsigned pitch)
  59. {
  60.     if(temp.comp_pal_changed)
  61.     {
  62.         pixel_tables();
  63.         temp.comp_pal_changed = 0;
  64.     }
  65.  
  66.    if(temp.oy > temp.scy && conf.fast_sl)
  67.        pitch *= 2;
  68.  
  69.    if (conf.noflic)
  70.    {
  71.       if (temp.obpp == 8)
  72.       {
  73.           if (conf.fast_sl)
  74.               rend_copy8d1_nf (dst, pitch);
  75.           else
  76.               rend_copy8d_nf (dst, pitch);
  77.       }
  78.       else if (temp.obpp == 16)
  79.       {
  80.           if (conf.fast_sl)
  81.               rend_copy16d1_nf(dst, pitch);
  82.           else
  83.               rend_copy16d_nf(dst, pitch);
  84.       }
  85.       else if (temp.obpp == 32)
  86.       {
  87.           if (conf.fast_sl)
  88.               rend_copy32d1_nf(dst, pitch);
  89.           else
  90.               rend_copy32d_nf(dst, pitch);
  91.       }
  92.  
  93.       memcpy(rbuf_s, rbuf, temp.scy * temp.scx / 4);
  94.    }
  95.    else
  96.    {
  97.       if (temp.obpp == 8)
  98.       {
  99.           if (conf.fast_sl)
  100.               rend_copy8d1 (dst, pitch);
  101.           else
  102.               rend_copy8d (dst, pitch);
  103.           return;
  104.       }
  105.       if (temp.obpp == 16)
  106.       {
  107.           if (conf.fast_sl)
  108.               rend_copy16d1(dst, pitch);
  109.           else
  110.               rend_copy16d(dst, pitch);
  111.           return;
  112.       }
  113.       if (temp.obpp == 32)
  114.       {
  115.           if (conf.fast_sl)
  116.               rend_copy32d1(dst, pitch);
  117.           else
  118.               rend_copy32d(dst, pitch);
  119.           return;
  120.       }
  121.    }
  122. }
  123.  
  124. void __fastcall render_dbl(unsigned char *dst, unsigned pitch)
  125. {
  126.    #ifdef MOD_VID_VD
  127.    if ((comp.pVD & 8) && temp.obpp == 8)
  128.    {
  129.        rend_vd8dbl(dst, pitch);
  130.        return;
  131.    }
  132.    #endif
  133.  
  134.    // todo: add ini option to show zx-screen with palette or with MC
  135.    if (comp.pEFF7 & EFF7_512)
  136.    {
  137.        rend_512(dst, pitch);
  138.        return;
  139.    }
  140.    if (comp.pEFF7 & EFF7_4BPP)
  141.    {
  142.        rend_p4bpp(dst, pitch);
  143.        return;
  144.    }
  145.    if ((comp.pDFFD & 0x80) && conf.mem_model == MM_PROFI)
  146.    {
  147.        rend_profi(dst, pitch);
  148.        return;
  149.    }
  150.    if (conf.mem_model == MM_ATM450)
  151.    {
  152.        rend_atm_1(dst, pitch);
  153.        return;
  154.    }
  155.    if (conf.mem_model == MM_ATM710 || conf.mem_model == MM_ATM3)
  156.    {
  157.        rend_atm_2(dst, pitch);
  158.        return;
  159.    }
  160.  
  161.    rend_dbl(dst, pitch);
  162. }
  163.  
  164. void __fastcall render_3x(unsigned char *dst, unsigned pitch)
  165. {
  166.     if(temp.comp_pal_changed)
  167.     {
  168.         pixel_tables();
  169.         temp.comp_pal_changed = 0;
  170.     }
  171.  
  172.    if (conf.noflic) {
  173.       if (temp.obpp == 8)  rend_copy8t_nf (dst, pitch);
  174.       if (temp.obpp == 16) rend_copy16t_nf(dst, pitch);
  175.       if (temp.obpp == 32) rend_copy32t_nf(dst, pitch);
  176.       memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  177.    }
  178.    else
  179.    {
  180.       if (temp.obpp == 8)  { rend_copy8t (dst, pitch); return; }
  181.       if (temp.obpp == 16) { rend_copy16t(dst, pitch); return; }
  182.       if (temp.obpp == 32) { rend_copy32t(dst, pitch); return; }
  183.    }
  184. }
  185.  
  186. void __fastcall render_quad(unsigned char *dst, unsigned pitch)
  187. {
  188.     if(temp.comp_pal_changed)
  189.     {
  190.         pixel_tables();
  191.         temp.comp_pal_changed = 0;
  192.     }
  193.  
  194.     if(conf.noflic)
  195.     {
  196.       if (temp.obpp == 8)  rend_copy8q_nf (dst, pitch);
  197.       if (temp.obpp == 16) rend_copy16q_nf(dst, pitch);
  198.       if (temp.obpp == 32) rend_copy32q_nf(dst, pitch);
  199.       memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  200.    } else {
  201.       if (temp.obpp == 8)  { rend_copy8q (dst, pitch); return; }
  202.       if (temp.obpp == 16) { rend_copy16q(dst, pitch); return; }
  203.       if (temp.obpp == 32) { rend_copy32q(dst, pitch); return; }
  204.    }
  205. }
  206.  
  207.  
  208. void __fastcall render_scale(unsigned char *dst, unsigned pitch)
  209. {
  210.    unsigned char *src = rbuf;
  211.    unsigned dx = temp.scx / 4;
  212.    unsigned char buf[MAX_WIDTH*2];
  213.    unsigned x; //Alone Coder 0.36.7
  214.    for (unsigned y = 0; y < temp.scy-1; y++)
  215.    {
  216.       for (x = 0; x < dx; x += 2)
  217.       {
  218.          unsigned xx = (t.dbl[src[x]] << 16) + t.dbl[src[x+2]];
  219.          unsigned yy = (t.dbl[src[x+dx]] << 16) + t.dbl[src[x+dx+2]];
  220.          unsigned x1 = xx | (yy & ((xx>>1) | (xx<<1)));
  221.          unsigned *tab0 = t.sctab8[0] + (src[x+1] << 4);
  222.          *(unsigned*)(dst+x*8+ 0)   = tab0[(x1>>28) & 0x0F];
  223.          *(unsigned*)(dst+x*8+ 4)   = tab0[(x1>>24) & 0x0F];
  224.          *(unsigned*)(dst+x*8+ 8)   = tab0[(x1>>20) & 0x0F];
  225.          *(unsigned*)(dst+x*8+12)   = tab0[(x1>>16) & 0x0F];
  226.          unsigned *tab1 = t.sctab8[0] + src[x+3];
  227.          *(unsigned*)(dst+x*8+16)   = tab1[(x1>>12) & 0x0F];
  228.          *(unsigned*)(dst+x*8+20)   = tab1[(x1>> 8) & 0x0F];
  229.          *(unsigned*)(dst+x*8+24)   = tab1[(x1>> 4) & 0x0F];
  230.          *(unsigned*)(dst+x*8+28)   = tab1[(x1>> 0) & 0x0F];
  231.          x1 = yy | (xx & ((yy>>1) | (yy<<1)));
  232.          *(unsigned*)(buf+x*8+ 0)   = tab0[(x1>>28) & 0x0F];
  233.          *(unsigned*)(buf+x*8+ 4)   = tab0[(x1>>24) & 0x0F];
  234.          *(unsigned*)(buf+x*8+ 8)   = tab0[(x1>>20) & 0x0F];
  235.          *(unsigned*)(buf+x*8+12)   = tab0[(x1>>16) & 0x0F];
  236.          *(unsigned*)(buf+x*8+16)   = tab1[(x1>>12) & 0x0F];
  237.          *(unsigned*)(buf+x*8+20)   = tab1[(x1>> 8) & 0x0F];
  238.          *(unsigned*)(buf+x*8+24)   = tab1[(x1>> 4) & 0x0F];
  239.          *(unsigned*)(buf+x*8+28)   = tab1[(x1>> 0) & 0x0F];
  240.       }
  241.       dst += pitch;
  242.       for (x = 0; x < temp.ox; x += 4)
  243.           *(unsigned*)(dst+x) = *(unsigned*)(buf+x);
  244.       src += dx; dst += pitch;
  245.    }
  246. }
  247.  
  248. //static u64 mask49 = 0x4949494949494949ULL;
  249. //static u64 mask92 = 0x9292929292929292ULL;
  250.  
  251. static void /*__declspec(naked)*/ __fastcall _bil_line1(unsigned char *dst, unsigned char *src)
  252. {
  253.     for (unsigned i = 0; i < temp.scx; i += 2)
  254.     {
  255.        dst[i] = src[i];
  256.        dst[i+1] = ((src[i] + src[i+1]) >> 1);
  257.     }
  258. /*
  259.    __asm {
  260.  
  261.       push ebx
  262.       push edi
  263.       push ebp
  264.  
  265.       mov  ebp, [temp.scx]
  266.       xor  eax, eax
  267.       xor  ebx, ebx // ebx - prev. pixel
  268.       shr ebp,1
  269.  
  270. l1:
  271.       mov  al, [edx]
  272.       xadd eax, ebx
  273.       shr  eax, 1
  274.       mov  [ecx+1], bl
  275.       mov  [ecx], al
  276.       mov  al, [edx+1]
  277.       add  ecx, 4
  278.       xadd eax, ebx
  279.       add  edx, 2
  280.       shr  eax, 1
  281.       mov  [ecx-1], bl
  282.       dec  ebp
  283.       mov  [ecx-2], al
  284.       jnz l1
  285.  
  286.       pop ebp
  287.       pop edi
  288.       pop ebx
  289.       retn
  290.    }
  291. */
  292. }
  293.  
  294. static void /*__declspec(naked)*/ __fastcall _bil_line2(unsigned char *dst, unsigned char *s1)
  295. {
  296.       u32 *s = (u32 *)s1;
  297.       u32 *d = (u32 *)dst;
  298.  
  299.       for (unsigned j = 0; j < temp.ox/4; j++)
  300.       {
  301.           u32 a = s[j];
  302.           u32 b = s[j+2*MAX_WIDTH/4];
  303.           u32 x = a & b;
  304.           u32 y = (a ^ b) >> 1;
  305.           u32 z = a | b;
  306.           u32 n = x << 1;
  307.           u32 v1 = x ^ y;
  308.           v1 &= 0x49494949;
  309.           u32 v2 = z & n;
  310.           v2 |= x;
  311.           v2 &= 0x92929292;
  312.  
  313.           d[j] = v1 | v2;
  314.       }
  315.  
  316. /*
  317.    __asm {
  318.  
  319.       mov  eax, [temp.ox]
  320.       movq mm2, [mask49]
  321.       movq mm3, [mask92]
  322.       shr  eax, 3
  323.  
  324. m2:   movq  mm0, [edx]
  325.       movq  mm1, [edx+MAX_WIDTH*2]
  326.       movq  mm4, mm0
  327.       movq  mm5, mm0
  328.       pand  mm4, mm1    // mm4 = a & b
  329.       pxor  mm5, mm1    // mm5 = a ^ b
  330.       movq  mm6, mm0
  331.       psrlq mm5, 1      // mm5 = (a ^ b) >> 1
  332.       por   mm6, mm1    // mm6 = a | b
  333.       movq  mm7, mm4
  334.       pxor  mm5, mm4    // mm5 = (a & b) ^ ((a ^ b) >> 1)
  335.       psllq mm7, 1      // mm7 = (a & b) << 1
  336.       pand  mm5, mm2    // mm5 = 0x49494949 & ((a & b) ^ ((a ^ b) >> 1))
  337.       pand  mm7, mm6    // mm7 = (a|b) & ((a & b) << 1)
  338.       por   mm7, mm4    // mm7 = (a&b) | ((a|b)&((a&b)<<1))
  339.       add   ecx, 8
  340.       pand  mm7, mm3    // mm7 &= 0x92929292
  341.       add  edx, 8
  342.       por   mm7, mm5
  343.       dec  eax
  344.       movq [ecx-8], mm7
  345.       jnz  m2
  346.  
  347.       retn
  348.    }
  349. */
  350. }
  351.  
  352. void __fastcall render_bil(unsigned char *dst, unsigned pitch)
  353. {
  354.    render_small(snbuf, MAX_WIDTH);
  355.  
  356.    unsigned char *src = snbuf;
  357.    unsigned char ATTR_ALIGN(16) l1[MAX_WIDTH*4];
  358.    #define l2 (l1+MAX_WIDTH*2)
  359.    _bil_line1(l1, src); src += MAX_WIDTH;
  360.    memcpy(dst, l1, temp.ox);
  361.    dst += pitch;
  362.  
  363.    for (unsigned i = temp.scy/2-1; i; i--)
  364.    {
  365.       _bil_line1(l2, src); src += MAX_WIDTH;
  366.       _bil_line2(dst, l1); dst += pitch;
  367.       memcpy(dst, l2, temp.ox);
  368.       dst += pitch;
  369.  
  370.       _bil_line1(l1, src); src += MAX_WIDTH;
  371.       _bil_line2(dst, l1); dst += pitch;
  372.       memcpy(dst, l1, temp.ox);
  373.       dst += pitch;
  374.    }
  375.    _bil_line1(l2, src); src += MAX_WIDTH;
  376.    _bil_line2(dst, l1); dst += pitch;
  377.    memcpy(dst, l2, temp.ox);
  378.    dst += pitch;
  379.    memcpy(dst, l2, temp.ox);
  380.    #undef l2
  381.  
  382. //   _mm_empty();
  383. }
  384.  
  385. void __fastcall render_tv(unsigned char *dst, unsigned pitch)
  386. {
  387. // ripped from ccs and *highly* simplified and optimized
  388.  
  389.    unsigned char midbuf[MAX_WIDTH*2];
  390.    unsigned char line[MAX_WIDTH*2+4*2], line2[MAX_WIDTH*2];
  391.  
  392.    unsigned j; //Alone Coder 0.36.7
  393.    for (/*unsigned*/ j = 0; j < MAX_WIDTH/2; j++)
  394.       *(unsigned*)(midbuf+j*4) = WORD4(0,0x80,0,0x80);
  395.  
  396.    unsigned char *src = rbuf; unsigned delta = temp.scx/4;
  397.  
  398.    for (unsigned i = temp.scy; i; i--) {
  399.       *(unsigned*)line = *(unsigned*)(line+4) = WORD4(0,0x80,0,0x80);
  400.  
  401.       if (conf.noflic) line16_nf(line+8, src, t.sctab16_nf[0]);
  402.       else line16(line+8, src, t.sctab16[0]);
  403.  
  404.       src += delta;
  405.  
  406.       for (j = 0; j < temp.scx; j++) {
  407.  
  408.          unsigned Y = line[j*2+8]*9+
  409.                       line[j*2-2+8]*4+
  410.                       line[j*2-4+8]*2+
  411.                       line[j*2-8+8];
  412. /*
  413.          unsigned U = line[j*2+8+1]*12 +
  414.                       line[j*2-2+8+1]*2+
  415.                       line[j*2-4+8+1]+
  416.                       line[j*2-8+8+1];
  417. */
  418.          line2[j*2] = u8(Y>>4);
  419. //         line2[j*2+1] = U>>4;
  420.          line2[j*2+1] = line[j*2+9];
  421.       }
  422.       // there must be only fixed length fader buffer
  423.       for (j = 0; j < temp.scx/2; j++) {
  424.          *(unsigned*)(midbuf+j*4) = *(unsigned*)(dst + j*4) =
  425.          ((*(unsigned*)(midbuf+j*4) & 0xFEFEFEFE)/2 + (*(unsigned*)(line2+j*4) & 0xFEFEFEFE)/2);
  426.       }
  427.       dst += pitch;
  428.    }
  429.    if (conf.noflic) memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4);
  430. }
  431.  
  432.