Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
716 | lvd | 1 | #include "std.h" |
2 | |||
3 | #include "emul.h" |
||
4 | #include "vars.h" |
||
5 | #include "dxrend.h" |
||
6 | #include "dxrcopy.h" |
||
7 | #include "dxr_512.h" |
||
8 | #include "dxr_4bpp.h" |
||
9 | #include "dxr_prof.h" |
||
10 | #include "dxr_atm.h" |
||
11 | #include "draw.h" |
||
12 | #include "util.h" |
||
13 | |||
14 | void rend_small(unsigned char *dst, unsigned pitch) |
||
15 | { |
||
16 | if (temp.obpp == 8) { rend_copy8 (dst, pitch); return; } |
||
17 | if (temp.obpp == 16) { rend_copy16(dst, pitch); return; } |
||
18 | if (temp.obpp == 32) { rend_copy32(dst, pitch); return; } |
||
19 | } |
||
20 | |||
21 | void __fastcall render_small(unsigned char *dst, unsigned pitch) |
||
22 | { |
||
23 | if (conf.noflic) |
||
24 | { |
||
25 | if (temp.obpp == 8) { rend_copy8_nf (dst, pitch); } |
||
26 | if (temp.obpp == 16) { rend_copy16_nf(dst, pitch); } |
||
27 | if (temp.obpp == 32) { rend_copy32_nf(dst, pitch); } |
||
28 | memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4); |
||
29 | return; |
||
30 | } |
||
31 | |||
32 | if (comp.pEFF7 & EFF7_4BPP) |
||
33 | { |
||
34 | rend_p4bpp_small(dst, pitch); |
||
35 | return; |
||
36 | } |
||
37 | |||
38 | if (conf.mem_model == MM_ATM450) |
||
39 | { |
||
40 | rend_atm_1_small(dst, pitch); |
||
41 | return; |
||
42 | } |
||
43 | |||
44 | if (conf.mem_model == MM_ATM710 || conf.mem_model == MM_ATM3) |
||
45 | { |
||
46 | rend_atm_2_small(dst, pitch); |
||
47 | return; |
||
48 | } |
||
49 | rend_small(dst, pitch); |
||
50 | } |
||
51 | |||
52 | void rend_dbl(unsigned char *dst, unsigned pitch) |
||
53 | { |
||
54 | if (temp.oy > temp.scy && conf.fast_sl) |
||
55 | pitch *= 2; |
||
56 | |||
57 | if (conf.noflic) |
||
58 | { |
||
59 | if (temp.obpp == 8) |
||
60 | { |
||
61 | if (conf.fast_sl) |
||
62 | rend_copy8d1_nf (dst, pitch); |
||
63 | else |
||
64 | rend_copy8d_nf (dst, pitch); |
||
65 | } |
||
66 | else if (temp.obpp == 16) |
||
67 | { |
||
68 | if (conf.fast_sl) |
||
69 | rend_copy16d1_nf(dst, pitch); |
||
70 | else |
||
71 | rend_copy16d_nf(dst, pitch); |
||
72 | } |
||
73 | else if (temp.obpp == 32) |
||
74 | { |
||
75 | if (conf.fast_sl) |
||
76 | rend_copy32d1_nf(dst, pitch); |
||
77 | else |
||
78 | rend_copy32d_nf(dst, pitch); |
||
79 | } |
||
80 | |||
81 | memcpy(rbuf_s, rbuf, temp.scy * temp.scx / 4); |
||
82 | } |
||
83 | else |
||
84 | { |
||
85 | if (temp.obpp == 8) |
||
86 | { |
||
87 | if (conf.fast_sl) |
||
88 | rend_copy8d1 (dst, pitch); |
||
89 | else |
||
90 | rend_copy8d (dst, pitch); |
||
91 | return; |
||
92 | } |
||
93 | if (temp.obpp == 16) |
||
94 | { |
||
95 | if (conf.fast_sl) |
||
96 | rend_copy16d1(dst, pitch); |
||
97 | else |
||
98 | rend_copy16d(dst, pitch); |
||
99 | return; |
||
100 | } |
||
101 | if (temp.obpp == 32) |
||
102 | { |
||
103 | if (conf.fast_sl) |
||
104 | rend_copy32d1(dst, pitch); |
||
105 | else |
||
106 | rend_copy32d(dst, pitch); |
||
107 | return; |
||
108 | } |
||
109 | } |
||
110 | } |
||
111 | |||
112 | void __fastcall render_dbl(unsigned char *dst, unsigned pitch) |
||
113 | { |
||
114 | #ifdef MOD_VID_VD |
||
115 | if ((comp.pVD & 8) && temp.obpp == 8) |
||
116 | { |
||
117 | rend_vd8dbl(dst, pitch); |
||
118 | return; |
||
119 | } |
||
120 | #endif |
||
121 | |||
122 | // todo: add ini option to show zx-screen with palette or with MC |
||
123 | if (comp.pEFF7 & EFF7_512) |
||
124 | { |
||
125 | rend_512(dst, pitch); |
||
126 | return; |
||
127 | } |
||
128 | if (comp.pEFF7 & EFF7_4BPP) |
||
129 | { |
||
130 | rend_p4bpp(dst, pitch); |
||
131 | return; |
||
132 | } |
||
133 | if ((comp.pDFFD & 0x80) && conf.mem_model == MM_PROFI) |
||
134 | { |
||
135 | rend_profi(dst, pitch); |
||
136 | return; |
||
137 | } |
||
138 | if (conf.mem_model == MM_ATM450) |
||
139 | { |
||
140 | rend_atm_1(dst, pitch); |
||
141 | return; |
||
142 | } |
||
143 | if (conf.mem_model == MM_ATM710 || conf.mem_model == MM_ATM3) |
||
144 | { |
||
145 | rend_atm_2(dst, pitch); |
||
146 | return; |
||
147 | } |
||
148 | |||
149 | rend_dbl(dst, pitch); |
||
150 | } |
||
151 | |||
152 | void __fastcall render_3x(unsigned char *dst, unsigned pitch) |
||
153 | { |
||
154 | if (conf.noflic) { |
||
155 | if (temp.obpp == 8) rend_copy8t_nf (dst, pitch); |
||
156 | if (temp.obpp == 16) rend_copy16t_nf(dst, pitch); |
||
157 | if (temp.obpp == 32) rend_copy32t_nf(dst, pitch); |
||
158 | memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4); |
||
159 | } |
||
160 | else |
||
161 | { |
||
162 | if (temp.obpp == 8) { rend_copy8t (dst, pitch); return; } |
||
163 | if (temp.obpp == 16) { rend_copy16t(dst, pitch); return; } |
||
164 | if (temp.obpp == 32) { rend_copy32t(dst, pitch); return; } |
||
165 | } |
||
166 | } |
||
167 | |||
168 | void __fastcall render_quad(unsigned char *dst, unsigned pitch) |
||
169 | { |
||
170 | if (conf.noflic) { |
||
171 | if (temp.obpp == 8) rend_copy8q_nf (dst, pitch); |
||
172 | if (temp.obpp == 16) rend_copy16q_nf(dst, pitch); |
||
173 | if (temp.obpp == 32) rend_copy32q_nf(dst, pitch); |
||
174 | memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4); |
||
175 | } else { |
||
176 | if (temp.obpp == 8) { rend_copy8q (dst, pitch); return; } |
||
177 | if (temp.obpp == 16) { rend_copy16q(dst, pitch); return; } |
||
178 | if (temp.obpp == 32) { rend_copy32q(dst, pitch); return; } |
||
179 | } |
||
180 | } |
||
181 | |||
182 | |||
183 | void __fastcall render_scale(unsigned char *dst, unsigned pitch) |
||
184 | { |
||
185 | unsigned char *src = rbuf; |
||
186 | unsigned dx = temp.scx / 4; |
||
187 | unsigned char buf[MAX_WIDTH*2]; |
||
188 | unsigned x; //Alone Coder 0.36.7 |
||
189 | for (unsigned y = 0; y < temp.scy-1; y++) |
||
190 | { |
||
191 | for (x = 0; x < dx; x += 2) |
||
192 | { |
||
193 | unsigned xx = (t.dbl[src[x]] << 16) + t.dbl[src[x+2]]; |
||
194 | unsigned yy = (t.dbl[src[x+dx]] << 16) + t.dbl[src[x+dx+2]]; |
||
195 | unsigned x1 = xx | (yy & ((xx>>1) | (xx<<1))); |
||
196 | unsigned *tab0 = t.sctab8[0] + (src[x+1] << 4); |
||
197 | *(unsigned*)(dst+x*8+ 0) = tab0[(x1>>28) & 0x0F]; |
||
198 | *(unsigned*)(dst+x*8+ 4) = tab0[(x1>>24) & 0x0F]; |
||
199 | *(unsigned*)(dst+x*8+ 8) = tab0[(x1>>20) & 0x0F]; |
||
200 | *(unsigned*)(dst+x*8+12) = tab0[(x1>>16) & 0x0F]; |
||
201 | unsigned *tab1 = t.sctab8[0] + src[x+3]; |
||
202 | *(unsigned*)(dst+x*8+16) = tab1[(x1>>12) & 0x0F]; |
||
203 | *(unsigned*)(dst+x*8+20) = tab1[(x1>> 8) & 0x0F]; |
||
204 | *(unsigned*)(dst+x*8+24) = tab1[(x1>> 4) & 0x0F]; |
||
205 | *(unsigned*)(dst+x*8+28) = tab1[(x1>> 0) & 0x0F]; |
||
206 | x1 = yy | (xx & ((yy>>1) | (yy<<1))); |
||
207 | *(unsigned*)(buf+x*8+ 0) = tab0[(x1>>28) & 0x0F]; |
||
208 | *(unsigned*)(buf+x*8+ 4) = tab0[(x1>>24) & 0x0F]; |
||
209 | *(unsigned*)(buf+x*8+ 8) = tab0[(x1>>20) & 0x0F]; |
||
210 | *(unsigned*)(buf+x*8+12) = tab0[(x1>>16) & 0x0F]; |
||
211 | *(unsigned*)(buf+x*8+16) = tab1[(x1>>12) & 0x0F]; |
||
212 | *(unsigned*)(buf+x*8+20) = tab1[(x1>> 8) & 0x0F]; |
||
213 | *(unsigned*)(buf+x*8+24) = tab1[(x1>> 4) & 0x0F]; |
||
214 | *(unsigned*)(buf+x*8+28) = tab1[(x1>> 0) & 0x0F]; |
||
215 | } |
||
216 | dst += pitch; |
||
217 | for (x = 0; x < temp.ox; x += 4) |
||
218 | *(unsigned*)(dst+x) = *(unsigned*)(buf+x); |
||
219 | src += dx; dst += pitch; |
||
220 | } |
||
221 | } |
||
222 | |||
223 | static u64 mask49 = 0x4949494949494949ULL; |
||
224 | static u64 mask92 = 0x9292929292929292ULL; |
||
225 | |||
226 | static void /*__declspec(naked)*/ __fastcall _bil_line1(unsigned char *dst, unsigned char *src) |
||
227 | { |
||
228 | for (unsigned i = 0; i < temp.scx; i += 2) |
||
229 | { |
||
230 | dst[i] = src[i]; |
||
231 | dst[i+1] = ((src[i] + src[i+1]) >> 1); |
||
232 | } |
||
233 | /* |
||
234 | __asm { |
||
235 | |||
236 | push ebx |
||
237 | push edi |
||
238 | push ebp |
||
239 | |||
240 | mov ebp, [temp.scx] |
||
241 | xor eax, eax |
||
242 | xor ebx, ebx // ebx - prev. pixel |
||
243 | shr ebp,1 |
||
244 | |||
245 | l1: |
||
246 | mov al, [edx] |
||
247 | xadd eax, ebx |
||
248 | shr eax, 1 |
||
249 | mov [ecx+1], bl |
||
250 | mov [ecx], al |
||
251 | mov al, [edx+1] |
||
252 | add ecx, 4 |
||
253 | xadd eax, ebx |
||
254 | add edx, 2 |
||
255 | shr eax, 1 |
||
256 | mov [ecx-1], bl |
||
257 | dec ebp |
||
258 | mov [ecx-2], al |
||
259 | jnz l1 |
||
260 | |||
261 | pop ebp |
||
262 | pop edi |
||
263 | pop ebx |
||
264 | retn |
||
265 | } |
||
266 | */ |
||
267 | } |
||
268 | |||
269 | static void /*__declspec(naked)*/ __fastcall _bil_line2(unsigned char *dst, unsigned char *s1) |
||
270 | { |
||
271 | u32 *s = (u32 *)s1; |
||
272 | u32 *d = (u32 *)dst; |
||
273 | |||
274 | for (unsigned j = 0; j < temp.ox/4; j++) |
||
275 | { |
||
276 | u32 a = s[j]; |
||
277 | u32 b = s[j+2*MAX_WIDTH/4]; |
||
278 | u32 x = a & b; |
||
279 | u32 y = (a ^ b) >> 1; |
||
280 | u32 z = a | b; |
||
281 | u32 n = x << 1; |
||
282 | u32 v1 = x ^ y; |
||
283 | v1 &= 0x49494949; |
||
284 | u32 v2 = z & n; |
||
285 | v2 |= x; |
||
286 | v2 &= 0x92929292; |
||
287 | |||
288 | d[j] = v1 | v2; |
||
289 | } |
||
290 | |||
291 | /* |
||
292 | __asm { |
||
293 | |||
294 | mov eax, [temp.ox] |
||
295 | movq mm2, [mask49] |
||
296 | movq mm3, [mask92] |
||
297 | shr eax, 3 |
||
298 | |||
299 | m2: movq mm0, [edx] |
||
300 | movq mm1, [edx+MAX_WIDTH*2] |
||
301 | movq mm4, mm0 |
||
302 | movq mm5, mm0 |
||
303 | pand mm4, mm1 // mm4 = a & b |
||
304 | pxor mm5, mm1 // mm5 = a ^ b |
||
305 | movq mm6, mm0 |
||
306 | psrlq mm5, 1 // mm5 = (a ^ b) >> 1 |
||
307 | por mm6, mm1 // mm6 = a | b |
||
308 | movq mm7, mm4 |
||
309 | pxor mm5, mm4 // mm5 = (a & b) ^ ((a ^ b) >> 1) |
||
310 | psllq mm7, 1 // mm7 = (a & b) << 1 |
||
311 | pand mm5, mm2 // mm5 = 0x49494949 & ((a & b) ^ ((a ^ b) >> 1)) |
||
312 | pand mm7, mm6 // mm7 = (a|b) & ((a & b) << 1) |
||
313 | por mm7, mm4 // mm7 = (a&b) | ((a|b)&((a&b)<<1)) |
||
314 | add ecx, 8 |
||
315 | pand mm7, mm3 // mm7 &= 0x92929292 |
||
316 | add edx, 8 |
||
317 | por mm7, mm5 |
||
318 | dec eax |
||
319 | movq [ecx-8], mm7 |
||
320 | jnz m2 |
||
321 | |||
322 | retn |
||
323 | } |
||
324 | */ |
||
325 | } |
||
326 | |||
327 | void __fastcall render_bil(unsigned char *dst, unsigned pitch) |
||
328 | { |
||
329 | render_small(snbuf, MAX_WIDTH); |
||
330 | |||
331 | unsigned char *src = snbuf; |
||
332 | unsigned char ATTR_ALIGN(16) l1[MAX_WIDTH*4]; |
||
333 | #define l2 (l1+MAX_WIDTH*2) |
||
334 | _bil_line1(l1, src); src += MAX_WIDTH; |
||
335 | memcpy(dst, l1, temp.ox); |
||
336 | dst += pitch; |
||
337 | |||
338 | for (unsigned i = temp.scy/2-1; i; i--) |
||
339 | { |
||
340 | _bil_line1(l2, src); src += MAX_WIDTH; |
||
341 | _bil_line2(dst, l1); dst += pitch; |
||
342 | memcpy(dst, l2, temp.ox); |
||
343 | dst += pitch; |
||
344 | |||
345 | _bil_line1(l1, src); src += MAX_WIDTH; |
||
346 | _bil_line2(dst, l1); dst += pitch; |
||
347 | memcpy(dst, l1, temp.ox); |
||
348 | dst += pitch; |
||
349 | } |
||
350 | _bil_line1(l2, src); src += MAX_WIDTH; |
||
351 | _bil_line2(dst, l1); dst += pitch; |
||
352 | memcpy(dst, l2, temp.ox); |
||
353 | dst += pitch; |
||
354 | memcpy(dst, l2, temp.ox); |
||
355 | #undef l2 |
||
356 | |||
357 | // _mm_empty(); |
||
358 | } |
||
359 | |||
360 | void __fastcall render_tv(unsigned char *dst, unsigned pitch) |
||
361 | { |
||
362 | // ripped from ccs and *highly* simplified and optimized |
||
363 | |||
364 | unsigned char midbuf[MAX_WIDTH*2]; |
||
365 | unsigned char line[MAX_WIDTH*2+4*2], line2[MAX_WIDTH*2]; |
||
366 | |||
367 | unsigned j; //Alone Coder 0.36.7 |
||
368 | for (/*unsigned*/ j = 0; j < MAX_WIDTH/2; j++) |
||
369 | *(unsigned*)(midbuf+j*4) = WORD4(0,0x80,0,0x80); |
||
370 | |||
371 | unsigned char *src = rbuf; unsigned delta = temp.scx/4; |
||
372 | |||
373 | for (unsigned i = temp.scy; i; i--) { |
||
374 | *(unsigned*)line = *(unsigned*)(line+4) = WORD4(0,0x80,0,0x80); |
||
375 | |||
376 | if (conf.noflic) line16_nf(line+8, src, t.sctab16_nf[0]); |
||
377 | else line16(line+8, src, t.sctab16[0]); |
||
378 | |||
379 | src += delta; |
||
380 | |||
381 | for (j = 0; j < temp.scx; j++) { |
||
382 | |||
383 | unsigned Y = line[j*2+8]*9+ |
||
384 | line[j*2-2+8]*4+ |
||
385 | line[j*2-4+8]*2+ |
||
386 | line[j*2-8+8]; |
||
387 | /* |
||
388 | unsigned U = line[j*2+8+1]*12 + |
||
389 | line[j*2-2+8+1]*2+ |
||
390 | line[j*2-4+8+1]+ |
||
391 | line[j*2-8+8+1]; |
||
392 | */ |
||
393 | line2[j*2] = Y>>4; |
||
394 | // line2[j*2+1] = U>>4; |
||
395 | line2[j*2+1] = line[j*2+9]; |
||
396 | } |
||
397 | // there must be only fixed length fader buffer |
||
398 | for (j = 0; j < temp.scx/2; j++) { |
||
399 | *(unsigned*)(midbuf+j*4) = *(unsigned*)(dst + j*4) = |
||
400 | ((*(unsigned*)(midbuf+j*4) & 0xFEFEFEFE)/2 + (*(unsigned*)(line2+j*4) & 0xFEFEFEFE)/2); |
||
401 | } |
||
402 | dst += pitch; |
||
403 | } |
||
404 | if (conf.noflic) memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4); |
||
405 | } |
||
406 |