Subversion Repositories pentevo

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
716 lvd 1
#include "std.h"
2
 
3
#include "emul.h"
4
#include "vars.h"
5
#include "draw.h"
6
#include "dxr_rsm.h"
7
 
8
RSM_DATA rsm;
9
 
10
void RSM_DATA::prepare_line_32(unsigned char *src0)
11
{
12
   unsigned i;
13
   for (i = 0; i < line_size_d; i++)
14
       line_buffer_d[i] = bias;
15
 
16
   unsigned line_size = line_size_d / 2, frame = 0;
17
   __m64 *tab = colortab;
18
   for (;;)
19
   {
20
      unsigned char *src = src0;
21
      for (i = 0; i < line_size; )
22
      {
23
         unsigned s = *(unsigned*)src, attr = (s >> 6) & 0x3FC;
24
         line_buffer[i+0] = _mm_add_pi8(line_buffer[i+0], tab[((s >> 6) & 3) + attr]);
25
         line_buffer[i+1] = _mm_add_pi8(line_buffer[i+1], tab[((s >> 4) & 3) + attr]);
26
         line_buffer[i+2] = _mm_add_pi8(line_buffer[i+2], tab[((s >> 2) & 3) + attr]);
27
         line_buffer[i+3] = _mm_add_pi8(line_buffer[i+3], tab[((s >> 0) & 3) + attr]);
28
         attr = (s >> 22) & 0x3FC;
29
         line_buffer[i+4] = _mm_add_pi8(line_buffer[i+4], tab[((s >>22) & 3) + attr]);
30
         line_buffer[i+5] = _mm_add_pi8(line_buffer[i+5], tab[((s >>20) & 3) + attr]);
31
         line_buffer[i+6] = _mm_add_pi8(line_buffer[i+6], tab[((s >>18) & 3) + attr]);
32
         line_buffer[i+7] = _mm_add_pi8(line_buffer[i+7], tab[((s >>16) & 3) + attr]);
33
         i += 8, src += 4;
34
      }
35
      if (++frame == mix_frames)
36
          break;
37
      src0 += rb2_offs;
38
      if (src0 >= rbuf_s + rb2_offs * mix_frames)
39
          src0 -= rb2_offs * mix_frames;
40
      tab += 0x100*4;
41
   }
42
}
43
 
44
void RSM_DATA::prepare_line_16(unsigned char *src0)
45
{
46
   unsigned i;
47
   for (i = 0; i < line_size_d; i++)
48
       line_buffer_d[i] = bias;
49
 
50
   unsigned line_size = line_size_d / 2, frame = 0;
51
   __m64 *tab = colortab;
52
   for (;;) {
53
      unsigned char *src = src0;
54
      for (i = 0; i < line_size; ) {
55
         unsigned s = *(unsigned*)src, attr = (s >> 4) & 0xFF0;
56
         line_buffer[i+0] = _mm_add_pi8(line_buffer[i+0], tab[((s >> 4)  & 0xF) + attr]);
57
         line_buffer[i+1] = _mm_add_pi8(line_buffer[i+1], tab[((s >> 0)  & 0xF) + attr]);
58
         attr = (s >> 20) & 0xFF0;
59
         line_buffer[i+2] = _mm_add_pi8(line_buffer[i+2], tab[((s >> 20) & 0xF) + attr]);
60
         line_buffer[i+3] = _mm_add_pi8(line_buffer[i+3], tab[((s >> 16) & 0xF) + attr]);
61
         src += 4; i += 4;
62
      }
63
      if (++frame == mix_frames) break;
64
      src0 += rb2_offs;
65
      if (src0 >= rbuf_s + rb2_offs * mix_frames) src0 -= rb2_offs * mix_frames;
66
      tab += 0x100*16;
67
   }
68
}
69
 
70
void RSM_DATA::prepare_line_8(unsigned char *src0)
71
{
72
   unsigned i;
73
   for (i = 0; i < line_size_d; i++)
74
       line_buffer_d[i] = bias;
75
 
76
   unsigned frame = 0;
77
   DWORD *tab = (DWORD*)colortab;
78
   for (;;) {
79
      unsigned char *src = src0;
80
      for (i = 0; i < line_size_d; ) {
81
         unsigned s = *(unsigned*)src, attr = (s >> 4) & 0xFF0;
82
         line_buffer_d[i+0] += tab[((s >> 4)  & 0xF) + attr];
83
         line_buffer_d[i+1] += tab[((s >> 0)  & 0xF) + attr];
84
         attr = (s >> 20) & 0xFF0;
85
         line_buffer_d[i+2] += tab[((s >> 20) & 0xF) + attr];
86
         line_buffer_d[i+3] += tab[((s >> 16) & 0xF) + attr];
87
         s = *(unsigned*)(src+4), attr = (s >> 4) & 0xFF0;
88
         line_buffer_d[i+4] += tab[((s >> 4)  & 0xF) + attr];
89
         line_buffer_d[i+5] += tab[((s >> 0)  & 0xF) + attr];
90
         attr = (s >> 20) & 0xFF0;
91
         line_buffer_d[i+6] += tab[((s >> 20) & 0xF) + attr];
92
         line_buffer_d[i+7] += tab[((s >> 16) & 0xF) + attr];
93
         src += 8, i += 8;
94
      }
95
      if (++frame == mix_frames) break;
96
      src0 += rb2_offs;
97
      if (src0 >= rbuf_s + rb2_offs * mix_frames) src0 -= rb2_offs * mix_frames;
98
      tab += 0x100*16;
99
   }
100
}
101
 
102
void rend_rsm_8(unsigned char *dst, unsigned pitch, unsigned char *src)
103
{
104
   unsigned delta = temp.scx/4;
105
   for (unsigned y = 0; y < temp.scy; y++) {
106
      rsm.prepare_line_8(src);
107
      for (unsigned i = 0; i < rsm.line_size_d; i++)
108
         *(unsigned*)(dst + i*4) = rsm.line_buffer_d[i];
109
      dst += pitch; src += delta;
110
   }
111
}
112
 
113
void rend_rsm_16(unsigned char *dst, unsigned pitch, unsigned char *src)
114
{
115
   unsigned delta = temp.scx/4;
116
   for (unsigned y = 0; y < temp.scy; y++) {
117
      rsm.prepare_line_32(src);
118
      // pack truecolor pixel to 16 bit
119
      if (temp.hi15 == 0)
120
         for (unsigned i = 0; i < rsm.line_size_d; i+=2) {
121
            unsigned c1 = rsm.line_buffer_d[i];
122
            unsigned c2 = rsm.line_buffer_d[i+1];
123
            *(unsigned*)(dst + i*2) =
124
              ((c1 >> 3) & 0x1F) + ((c1 >> 5) & 0x07E0) + ((c1 >> 8) & 0xF800) +
125
              ((c2 << 13) & 0x1F0000) + ((c2 << 11) & 0x07E00000) + ((c2 << 8) & 0xF8000000);
126
         }
127
      else /* if (temp.hi15 == 1) */
128
         for (unsigned i = 0; i < rsm.line_size_d; i+=2) {
129
            unsigned c1 = rsm.line_buffer_d[i];
130
            unsigned c2 = rsm.line_buffer_d[i+1];
131
            *(unsigned*)(dst + i*2) =
132
              ((c1 >> 3) & 0x1F) + ((c1 >> 6) & 0x03E0) + ((c1 >> 9) & 0x7C00) +
133
              ((c2 << 13) & 0x1F0000) + ((c2 << 10) & 0x03E00000) + ((c2 << 7) & 0x7C000000);
134
         }
135
 
136
      dst += pitch; src += delta;
137
   }
138
}
139
 
140
void rend_rsm_16o(unsigned char *dst, unsigned pitch, unsigned char *src)
141
{
142
   unsigned delta = temp.scx/4;
143
   for (unsigned y = 0; y < temp.scy; y++) {
144
      rsm.prepare_line_16(src);
145
      for (unsigned i = 0; i < rsm.line_size_d; i++)
146
         *(unsigned*)(dst + i*4) = rsm.line_buffer_d[i];
147
 
148
      dst += pitch; src += delta;
149
   }
150
}
151
 
152
void rend_rsm_32(unsigned char *dst, unsigned pitch, unsigned char *src)
153
{
154
   unsigned delta = temp.scx/4;
155
   for (unsigned y = 0; y < temp.scy; y++) {
156
      rsm.prepare_line_32(src);
157
      for (unsigned i = 0; i < rsm.line_size_d; i++)
158
         *(unsigned*)(dst + i*4) = rsm.line_buffer_d[i];
159
      dst += pitch; src += delta;
160
   }
161
}
162
 
163
void __fastcall render_rsm(unsigned char *dst, unsigned pitch)
164
{
165
   rsm.colortab = (__m64*)((int)rsm.tables + rsm.frame * rsm.frame_table_size);
166
   unsigned char *src = rbuf_s + rb2_offs * rsm.rbuf_dst;
167
 
168
   if (temp.obpp ==  8) rend_rsm_8 (dst, pitch, src);
169
   if (temp.obpp == 16) { if (rsm.mode == 0) rend_rsm_16(dst, pitch, src); else rend_rsm_16o(dst, pitch, src); }
170
   if (temp.obpp == 32) rend_rsm_32(dst, pitch, src);
171
 
172
   _mm_empty(); // EMMS
173
}
174
 
175
unsigned gcd(unsigned x, unsigned y)
176
{
177
   while (x != y) if (x > y) x -= y; else y -= x;
178
   return x;
179
}
180
 
181
unsigned lcm(unsigned x, unsigned y)
182
{
183
   return x*y / gcd(x,y);
184
}
185
 
186
void calc_rsm_tables()
187
{
188
   rsm.rbuf_dst = rsm.frame = 0;
189
 
190
   if (renders[conf.render].func != render_rsm) {
191
      rsm.mix_frames = rsm.period = 1;
192
      static unsigned char one = 1;
193
      rsm.needframes = &one; // rsm.needframes[0]=1
194
      return;
195
   }
196
 
197
   rsm.mode = (temp.obpp == 8)? 2 : 0;
198
   if (temp.obpp == 16 && temp.hi15 == 2) rsm.mode = 1;
199
 
200
   rsm.line_size_d = (temp.scx >> rsm.mode);
201
 
202
   unsigned fmax = lcm(conf.intfq, temp.ofq);
203
   rsm.period = fmax / conf.intfq;
204
   unsigned step = fmax / temp.ofq;
205
 
206
   rsm.mix_frames = (conf.rsm.mode == RSM_SIMPLE)? 2 : conf.rsm.mix_frames;
207
 
208
   rsm.frame_table_size = rsm.mix_frames * 0x100;
209
   if (rsm.mode == 0) rsm.frame_table_size *= 4*sizeof(__m64);
210
   if (rsm.mode == 1) rsm.frame_table_size *= 16*sizeof(__m64);
211
   if (rsm.mode == 2) rsm.frame_table_size *= 16*sizeof(DWORD);
212
 
213
   rsm.data = (unsigned char*)realloc(rsm.data, rsm.period * (rsm.frame_table_size + 1));
214
   rsm.tables = (__m64*)rsm.data;
215
   rsm.needframes = rsm.data + rsm.frame_table_size * rsm.period;
216
 
217
   double *weights = (double*)alloca(rsm.period * rsm.mix_frames * sizeof(double));
218
   double *dst = weights;
219
 
220
   unsigned low_bias = 0, dynamic_range = 0xFF;
221
 
222
   if (conf.rsm.mode != RSM_SIMPLE) {
223
      unsigned fsize = rsm.period * rsm.mix_frames;
224
      double *flt = (double*)alloca((fsize+1) * sizeof(double));
225
 
226
      double cutoff = 0.9;
227
      if (conf.rsm.mode == RSM_FIR1) cutoff = 0.5;
228
      if (conf.rsm.mode == RSM_FIR2) cutoff = 0.33333333;
229
 
230
      double pi = 4*atan(1.0);
231
      cutoff *= 1 / (double)rsm.period; // cutoff scale = inftq/maxfq = 1/rsm.period
232
      double c1 = 0.54 / pi, c2 = 0.46 / pi;
233
      for (unsigned i = 0; i <= fsize; i++) {
234
         if (i == fsize/2) flt[i] = cutoff;
235
         else flt[i] = sin(pi*cutoff*((double)i - fsize/2)) * (c1 - c2*cos(2*pi*(double)i/fsize)) / ((double)i - fsize/2);
236
      }
237
 
238
      double low_b = 0, high_b = 0;
239
      for (unsigned frame = 0; frame < rsm.period; frame++)
240
      {
241
         unsigned pos = frame * step, srcframe = pos / rsm.period;
242
         if (frame) srcframe++; // (pos % rsm.period) != 0
243
 
244
         unsigned nextpos = pos + step, nextframe = nextpos / rsm.period;
245
         if (frame+1 != rsm.period) nextframe++; // (nextpos % rsm.period) != 0
246
 
247
         rsm.needframes[frame] = (nextframe - srcframe);
248
         double low = 0, high = 0;
249
         unsigned offset = (srcframe * rsm.period) - pos;
250
         for (unsigned ch = 0; ch < rsm.mix_frames; ch++) {
251
            double weight = flt[offset] * rsm.period;
252
            if (weight < 0) low += weight; else high += weight;
253
            *dst++ = weight;
254
            offset += rsm.period;
255
         }
256
         if (low < low_b) low_b = low;
257
         if (high > high_b) high_b = high;
258
      }
259
      low_bias = (unsigned)((-low_b)*0xFF);
260
      dynamic_range = (0xFF - low_bias);
261
   } else { // RSM_SIMPLE
262
 
263
      double div = 1 / (double)step;
264
      for (unsigned frame = 0; frame < rsm.period; frame++)
265
      {
266
         unsigned pos = frame * step, srcframe = pos / rsm.period;
267
         unsigned nextpos = pos + step, nextframe = nextpos / rsm.period;
268
         unsigned offset = (srcframe == nextframe)? step : (nextpos - nextframe*rsm.period);
269
         rsm.needframes[frame] = (nextframe - srcframe);
270
         *dst++ = offset * div;
271
         *dst++ = (step - offset) * div;
272
      }
273
   }
274
 
275
   rsm.bias = 0x01010101 * low_bias;
276
 
277
   unsigned char *dst32 = (unsigned char*)rsm.tables;
278
   for (unsigned frame = 0; frame < rsm.period; frame++)
279
      for (unsigned ch = 0; ch < rsm.mix_frames; ch++)
280
      {
281
         unsigned char *start_frame = dst32;
282
         switch (rsm.mode)
283
         {
284
            case 0: // rgb 16/32bit - reorder table, MMX processes 2 truecolor pixels at one op
285
               for (unsigned a = 0; a < 0x100; a++)
286
                  for (unsigned pix = 0; pix < 4; pix++) {
287
                     *(DWORD*)(dst32+0) = t.sctab32[0][(pix >>1)*0x100 + a];
288
                     *(DWORD*)(dst32+4) = t.sctab32[0][(pix & 1)*0x100 + a];
289
                     dst32 += 8;
290
                  }
291
               break;
292
            case 1: // YUY2 overlay - reorder table, MMX processes 4 overlay pixels at one op
293
               for (unsigned a = 0; a < 0x100; a++)
294
                  for (unsigned pix = 0; pix < 16; pix++) {
295
                     *(DWORD*)(dst32+0) = t.sctab16[0][a*4 + (pix >> 2)];
296
                     *(DWORD*)(dst32+4) = t.sctab16[0][a*4 + (pix &  3)];
297
                     dst32 += 8;
298
                  }
299
               break;
300
            case 2:
301
               memcpy(dst32, t.sctab8[0], 0x100*16*sizeof(DWORD)), dst32 += 0x100*16*sizeof(DWORD);
302
               break;
303
            default:
304
               __assume(0);
305
         }
306
 
307
         double scale = *weights++ * dynamic_range * (1/256.0);
308
         for (unsigned char *ptr = start_frame; ptr < dst32; ptr++) {
309
            double color = scale * *ptr;
310
            if (color > 255) color = 255;
311
            if (color < 0) color += 256; // color = 0;
312
            *ptr = (unsigned char)color;
313
         }
314
      }
315
}