Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
716 | lvd | 1 | // http://scale2x.sourceforge.net/algorithm.html |
2 | |||
3 | #include "std.h" |
||
4 | |||
5 | #include "emul.h" |
||
6 | #include "vars.h" |
||
7 | #include "draw.h" |
||
8 | #include "dxrend.h" |
||
9 | #include "dxrcopy.h" |
||
10 | #include "dxrframe.h" |
||
11 | #include "dxr_advm.h" |
||
12 | |||
13 | inline void line_8_any(unsigned char *dst, unsigned char *src) |
||
14 | { |
||
15 | if (conf.noflic) line8_nf(dst, src, t.sctab8[0]); |
||
16 | else line8(dst, src, t.sctab8[0]); |
||
17 | } |
||
18 | |||
19 | inline void line_32_any(unsigned char *dst, unsigned char *src) |
||
20 | { |
||
21 | if (conf.noflic) line32_nf(dst, src, t.sctab32[0]); |
||
22 | else line32(dst, src, t.sctab32[0]); |
||
23 | } |
||
24 | |||
25 | #if 1 // switch between vectorized and branched code |
||
26 | |||
27 | #ifdef MOD_SSE2 |
||
28 | |||
29 | void lines_scale2(const unsigned char *src, unsigned y, unsigned char *dst1, unsigned char *dst2, unsigned nPix) |
||
30 | { |
||
31 | const unsigned char |
||
32 | *u = src + ((y-1) & 7)*sc2lines_width, |
||
33 | *m = src + ((y+0) & 7)*sc2lines_width, |
||
34 | *l = src + ((y+1) & 7)*sc2lines_width; |
||
35 | |||
36 | for (unsigned i = 0; i < nPix; i += 8) { |
||
37 | |||
38 | __m64 uu = *(__m64*)(u+i); |
||
39 | __m64 ll = *(__m64*)(l+i); |
||
40 | __m64 cmp = _mm_cmpeq_pi8(uu,ll); |
||
41 | |||
42 | if (_mm_movemask_pi8(cmp) != 0xFF) { |
||
43 | |||
44 | __m128i mm = _mm_loadu_si128((__m128i*)(m+i-4)); |
||
45 | __m128i uu = _mm_loadu_si128((__m128i*)(u+i-4)); |
||
46 | __m128i ll = _mm_loadu_si128((__m128i*)(l+i-4)); |
||
47 | |||
48 | __m128i md = _mm_slli_si128(mm,1); |
||
49 | __m128i mf = _mm_srli_si128(mm,1); |
||
50 | __m128i maskall = _mm_or_si128(_mm_cmpeq_epi8(md,mf), _mm_cmpeq_epi8(uu,ll)); |
||
51 | |||
52 | __m128i e0, e1, v1, v2, v3; |
||
53 | |||
54 | e0 = _mm_cmpeq_epi8(md,uu); |
||
55 | e0 = _mm_andnot_si128(maskall, e0); |
||
56 | e0 = _mm_srli_si128(e0,4); |
||
57 | e0 = _mm_unpacklo_epi8(e0, _mm_setzero_si128()); |
||
58 | |||
59 | e1 = _mm_cmpeq_epi8(mf,uu); |
||
60 | e1 = _mm_andnot_si128(maskall, e1); |
||
61 | e1 = _mm_srli_si128(e1,4); |
||
62 | e1 = _mm_unpacklo_epi8(_mm_setzero_si128(), e1); |
||
63 | |||
64 | e0 = _mm_or_si128(e0, e1); |
||
65 | |||
66 | v1 = _mm_srli_si128(mm,4); |
||
67 | v1 = _mm_unpacklo_epi8(v1,v1); |
||
68 | v2 = _mm_srli_si128(uu,4); |
||
69 | v2 = _mm_unpacklo_epi8(v2,v2); |
||
70 | |||
71 | _mm_store_si128((__m128i*)(dst1 + 2*i), _mm_or_si128( _mm_and_si128(e0,v2), _mm_andnot_si128(e0,v1) ) ); |
||
72 | |||
73 | e0 = _mm_cmpeq_epi8(md,ll); |
||
74 | e0 = _mm_andnot_si128(maskall, e0); |
||
75 | e0 = _mm_srli_si128(e0,4); |
||
76 | e0 = _mm_unpacklo_epi8(e0, _mm_setzero_si128()); |
||
77 | |||
78 | e1 = _mm_cmpeq_epi8(mf,ll); |
||
79 | e1 = _mm_andnot_si128(maskall, e1); |
||
80 | e1 = _mm_srli_si128(e1,4); |
||
81 | e1 = _mm_unpacklo_epi8(_mm_setzero_si128(), e1); |
||
82 | |||
83 | e0 = _mm_or_si128(e0, e1); |
||
84 | |||
85 | v3 = _mm_srli_si128(ll,4); |
||
86 | v3 = _mm_unpacklo_epi8(v3,v3); |
||
87 | |||
88 | _mm_store_si128((__m128i*)(dst2 + 2*i), _mm_or_si128( _mm_and_si128(e0,v3), _mm_andnot_si128(e0,v1) ) ); |
||
89 | |||
90 | } else { |
||
91 | |||
92 | __m64 v0 = *(__m64*)(m+i); |
||
93 | __m128i v1 = _mm_movpi64_epi64(v0); |
||
94 | v1 = _mm_unpacklo_epi8(v1,v1); |
||
95 | _mm_store_si128((__m128i*)(dst1 + 2*i), v1); |
||
96 | _mm_store_si128((__m128i*)(dst2 + 2*i), v1); |
||
97 | } |
||
98 | } |
||
99 | } |
||
100 | |||
101 | #else // MMX vectorized |
||
102 | |||
103 | void lines_scale2(const unsigned char *src, unsigned y, unsigned char *dst1, unsigned char *dst2, unsigned nPix) |
||
104 | { |
||
105 | const unsigned char |
||
106 | *u = src + ((y-1) & 7)*sc2lines_width, |
||
107 | *m = src + ((y+0) & 7)*sc2lines_width, |
||
108 | *l = src + ((y+1) & 7)*sc2lines_width; |
||
109 | |||
110 | for (unsigned i = 0; i < nPix; i += 4) { |
||
111 | |||
112 | if (*(unsigned*)(u+i) ^ *(unsigned*)(l+i)) { |
||
113 | |||
114 | __m64 mm = *(__m64*)(m+i-2); |
||
115 | __m64 uu = *(__m64*)(u+i-2); |
||
116 | __m64 ll = *(__m64*)(l+i-2); |
||
117 | __m64 md = _mm_slli_si64(mm,8); |
||
118 | __m64 mf = _mm_srli_si64(mm,8); |
||
119 | __m64 maskall = _mm_or_si64(_mm_cmpeq_pi8(md,mf), _mm_cmpeq_pi8(uu,ll)); |
||
120 | |||
121 | __m64 e0, e1, v1, v2; |
||
122 | |||
123 | e0 = _mm_cmpeq_pi8(md,uu); |
||
124 | e0 = _mm_andnot_si64(maskall, e0); |
||
125 | e0 = _mm_srli_si64(e0,16); |
||
126 | e0 = _mm_unpacklo_pi8(e0, _mm_setzero_si64()); |
||
127 | |||
128 | e1 = _mm_cmpeq_pi8(mf,uu); |
||
129 | e1 = _mm_andnot_si64(maskall, e1); |
||
130 | e1 = _mm_srli_si64(e1,16); |
||
131 | e1 = _mm_unpacklo_pi8(_mm_setzero_si64(), e1); |
||
132 | |||
133 | e0 = _mm_or_si64(e0, e1); |
||
134 | |||
135 | v1 = _m_from_int(*(unsigned*)(m+i)); |
||
136 | v2 = _m_from_int(*(unsigned*)(u+i)); |
||
137 | v1 = _mm_unpacklo_pi8(v1,v1); |
||
138 | v2 = _mm_unpacklo_pi8(v2,v2); |
||
139 | |||
140 | *(__m64*)(dst1 + 2*i) = _mm_or_si64( _mm_and_si64(e0,v2), _mm_andnot_si64(e0,v1) ); |
||
141 | |||
142 | e0 = _mm_cmpeq_pi8(md,ll); |
||
143 | e0 = _mm_andnot_si64(maskall, e0); |
||
144 | e0 = _mm_srli_si64(e0,16); |
||
145 | e0 = _mm_unpacklo_pi8(e0, _mm_setzero_si64()); |
||
146 | |||
147 | e1 = _mm_cmpeq_pi8(mf,ll); |
||
148 | e1 = _mm_andnot_si64(maskall, e1); |
||
149 | e1 = _mm_srli_si64(e1,16); |
||
150 | e1 = _mm_unpacklo_pi8(_mm_setzero_si64(), e1); |
||
151 | |||
152 | e0 = _mm_or_si64(e0, e1); |
||
153 | |||
154 | v1 = _m_from_int(*(unsigned*)(m+i)); |
||
155 | v2 = _m_from_int(*(unsigned*)(l+i)); |
||
156 | v1 = _mm_unpacklo_pi8(v1,v1); |
||
157 | v2 = _mm_unpacklo_pi8(v2,v2); |
||
158 | |||
159 | *(__m64*)(dst2 + 2*i) = _mm_or_si64( _mm_and_si64(e0,v2), _mm_andnot_si64(e0,v1) ); |
||
160 | |||
161 | } else { |
||
162 | |||
163 | __m64 v1 = _m_from_int(*(unsigned*)(m+i)); |
||
164 | v1 = _mm_unpacklo_pi8(v1,v1); |
||
165 | *(__m64*)(dst1 + 2*i) = v1; |
||
166 | *(__m64*)(dst2 + 2*i) = v1; |
||
167 | |||
168 | } |
||
169 | |||
170 | } |
||
171 | } |
||
172 | |||
173 | #endif // SSE2 |
||
174 | |||
175 | #else // MMX branched |
||
176 | // src dst |
||
177 | // ABC e0e1 |
||
178 | // DEF e2e3 |
||
179 | // GHI |
||
180 | |||
181 | /* |
||
182 | if(B != H && D != F) |
||
183 | { E0 = E; |
||
184 | E0 = D == B ? D : E; E1 = E; |
||
185 | E1 = B == F ? F : E; E2 = E; |
||
186 | E2 = D == H ? D : E; E3 = E; |
||
187 | E3 = H == F ? F : E; if(B != H) continue; |
||
188 | } => if(D != F) |
||
189 | else { |
||
190 | { E0 = D == B ? D : E; |
||
191 | E0 = E; E1 = B == F ? F : E; |
||
192 | E1 = E; E2 = D == H ? D : E; |
||
193 | E2 = E; E3 = H == F ? F : E; |
||
194 | E3 = E; } |
||
195 | } |
||
196 | */ |
||
197 | |||
198 | void lines_scale2(const unsigned char *src, unsigned y, unsigned char *dst1, unsigned char *dst2, unsigned nPix) |
||
199 | { |
||
200 | const unsigned char |
||
201 | *u = src + ((y-1) & 7)*sc2lines_width, |
||
202 | *m = src + ((y+0) & 7)*sc2lines_width, |
||
203 | *l = src + ((y+1) & 7)*sc2lines_width; |
||
204 | |||
205 | // process 4pix per iteration |
||
206 | for (unsigned i = 0; i < nPix; i += 4) |
||
207 | { |
||
208 | unsigned dw = *(unsigned*)(m+i); |
||
209 | __m64 v1 = _mm_cvtsi32_si64(dw); // v1 = 0| 0| 0| 0|dw[3]|dw[2]|dw[1]|dw[0] |
||
210 | v1 = _mm_unpacklo_pi8(v1,v1); // v1 = dw[3]|dw[3]|dw[2]|dw[2]|dw[1]|dw[1]|dw[0]|dw[0] |
||
211 | *(__m64*)(dst1 + 2*i) = v1; // e0e1 = dw[3]|dw[3]|dw[2]|dw[2]|dw[1]|dw[1]|dw[0]|dw[0] |
||
212 | *(__m64*)(dst2 + 2*i) = v1; // e2e3 = dw[3]|dw[3]|dw[2]|dw[2]|dw[1]|dw[1]|dw[0]|dw[0] |
||
213 | |||
214 | dw = *(unsigned*)(u+i) ^ *(unsigned*)(l+i); |
||
215 | if (!dw) |
||
216 | continue; // u == l |
||
217 | |||
218 | #define process_pix(n) \ |
||
219 | if ((dw & (0xFF << (8*n))) && m[i+n-1] != m[i+n+1]) \ |
||
220 | { \ |
||
221 | if (u[i+n] == m[i+n-1]) \ |
||
222 | dst1[2*(i+n)] = u[i+n]; \ |
||
223 | if (u[i+n] == m[i+n+1]) \ |
||
224 | dst1[2*(i+n)+1] = u[i+n]; \ |
||
225 | if (l[i+n] == m[i+n-1]) \ |
||
226 | dst2[2*(i+n)] = l[i+n]; \ |
||
227 | if (l[i+n] == m[i+n+1]) \ |
||
228 | dst2[2*(i+n)+1] = l[i+n]; \ |
||
229 | } |
||
230 | |||
231 | process_pix(0); |
||
232 | process_pix(1); |
||
233 | process_pix(2); |
||
234 | process_pix(3); |
||
235 | #undef process_pix |
||
236 | } |
||
237 | } |
||
238 | |||
239 | #endif // MMX branched |
||
240 | |||
241 | void lines_scale2_32(const unsigned char *src, unsigned y, unsigned char *dst1, unsigned char *dst2, unsigned nPix) |
||
242 | { |
||
243 | const u32 *s = (u32 *)src; |
||
244 | const u32 *u = s + ((y-1) & 7)*sc2lines_width; |
||
245 | const u32 *m = s + ((y+0) & 7)*sc2lines_width; |
||
246 | const u32 *l = s + ((y+1) & 7)*sc2lines_width; |
||
247 | u32 *d1 = (u32 *)dst1; |
||
248 | u32 *d2 = (u32 *)dst2; |
||
249 | |||
250 | for (unsigned i = 0; i < nPix; i++) |
||
251 | { |
||
252 | d1[2*i] = d1[2*i+1] = d2[2*i] = d2[2*i+1] = m[i]; |
||
253 | |||
254 | if (u[i] != l[i] && m[i-1] != m[i+1]) |
||
255 | { |
||
256 | if (u[i] == m[i-1]) |
||
257 | d1[2*i] = u[i]; |
||
258 | if (u[i] == m[i+1]) |
||
259 | d1[2*i+1] = u[i]; |
||
260 | if (l[i] == m[i-1]) |
||
261 | d2[2*i] = l[i]; |
||
262 | if (l[i] == m[i+1]) |
||
263 | d2[2*i+1] = l[i]; |
||
264 | } |
||
265 | } |
||
266 | } |
||
267 | |||
268 | // 8bpp |
||
269 | void render_scale2(unsigned char *dst, unsigned pitch) |
||
270 | { |
||
271 | unsigned char *src = rbuf; unsigned delta = temp.scx/4; |
||
272 | line_8_any(t.scale2buf[0], src); |
||
273 | // assume 'above' screen line same as line 0 |
||
274 | memcpy(t.scale2buf[(0-1) & 7], t.scale2buf[0], temp.scx); |
||
275 | for (unsigned y = 0; y < temp.scy; y++) |
||
276 | { |
||
277 | src += delta; |
||
278 | line_8_any(t.scale2buf[(y+1) & 7], src); |
||
279 | lines_scale2(t.scale2buf[0], y, dst, dst+pitch, temp.scx); |
||
280 | dst += 2*pitch; |
||
281 | } |
||
282 | } |
||
283 | |||
284 | // 32bpp |
||
285 | void render_scale2_32(unsigned char *dst, unsigned pitch) |
||
286 | { |
||
287 | unsigned char *src = rbuf; |
||
288 | unsigned delta = temp.scx/4; |
||
289 | line_32_any((u8 *)t.scale2buf32[0], src); |
||
290 | |||
291 | // assume 'above' screen line same as line 0 |
||
292 | memcpy(t.scale2buf32[(0-1) & 7], t.scale2buf32[0], temp.scx); |
||
293 | for (unsigned y = 0; y < temp.scy; y++) |
||
294 | { |
||
295 | src += delta; |
||
296 | line_32_any((u8 *)t.scale2buf32[(y+1) & 7], src); |
||
297 | lines_scale2_32((u8 *)t.scale2buf32[0], y, dst, dst+pitch, temp.scx); |
||
298 | dst += 2*pitch; |
||
299 | } |
||
300 | } |
||
301 | |||
302 | // MMX-vectorized version is not ready yet :( |
||
303 | // 8bpp |
||
304 | void lines_scale3(unsigned y, unsigned char *dst, unsigned pitch) |
||
305 | { |
||
306 | |||
307 | const unsigned char |
||
308 | *u = t.scale2buf[(y-1) & 3], |
||
309 | *m = t.scale2buf[(y+0) & 3], |
||
310 | *l = t.scale2buf[(y+1) & 3]; |
||
311 | |||
312 | for (unsigned i = 0; i < temp.scx; i += 4) |
||
313 | { |
||
314 | unsigned char c; |
||
315 | |||
316 | c = m[i]; |
||
317 | dst[3*i+0+0*pitch+ 0] = dst[3*i+1+0*pitch+ 0] = dst[3*i+2+0*pitch+ 0] = c; |
||
318 | dst[3*i+0+1*pitch+ 0] = dst[3*i+1+1*pitch+ 0] = dst[3*i+2+1*pitch+ 0] = c; |
||
319 | dst[3*i+0+2*pitch+ 0] = dst[3*i+1+2*pitch+ 0] = dst[3*i+2+2*pitch+ 0] = c; |
||
320 | |||
321 | c = m[i+1]; |
||
322 | dst[3*i+0+0*pitch+ 3] = dst[3*i+1+0*pitch+ 3] = dst[3*i+2+0*pitch+ 3] = c; |
||
323 | dst[3*i+0+1*pitch+ 3] = dst[3*i+1+1*pitch+ 3] = dst[3*i+2+1*pitch+ 3] = c; |
||
324 | dst[3*i+0+2*pitch+ 3] = dst[3*i+1+2*pitch+ 3] = dst[3*i+2+2*pitch+ 3] = c; |
||
325 | |||
326 | c = m[i+2]; |
||
327 | dst[3*i+0+0*pitch+ 6] = dst[3*i+1+0*pitch+ 6] = dst[3*i+2+0*pitch+ 6] = c; |
||
328 | dst[3*i+0+1*pitch+ 6] = dst[3*i+1+1*pitch+ 6] = dst[3*i+2+1*pitch+ 6] = c; |
||
329 | dst[3*i+0+2*pitch+ 6] = dst[3*i+1+2*pitch+ 6] = dst[3*i+2+2*pitch+ 6] = c; |
||
330 | |||
331 | c = m[i+3]; |
||
332 | dst[3*i+0+0*pitch+ 9] = dst[3*i+1+0*pitch+ 9] = dst[3*i+2+0*pitch+ 9] = c; |
||
333 | dst[3*i+0+1*pitch+ 9] = dst[3*i+1+1*pitch+ 9] = dst[3*i+2+1*pitch+ 9] = c; |
||
334 | dst[3*i+0+2*pitch+ 9] = dst[3*i+1+2*pitch+ 9] = dst[3*i+2+2*pitch+ 9] = c; |
||
335 | |||
336 | unsigned dw = *(unsigned*)(u+i) ^ *(unsigned*)(l+i); |
||
337 | if (!dw) continue; |
||
338 | |||
339 | #define process_pix(n) \ |
||
340 | if ((dw & (0xFF << (8*n))) && m[i+n-1] != m[i+n+1]) \ |
||
341 | { \ |
||
342 | if (u[i+n] == m[i+n-1]) \ |
||
343 | dst[0*pitch+3*(i+n)] = u[i+n]; \ |
||
344 | if ((u[i+n] == m[i+n-1] && m[i+n] != u[i+n+1]) || (u[i+n] == m[i+n+1] && m[i+n] != u[i+n-1])) \ |
||
345 | dst[0*pitch+3*(i+n)+1] = u[i+n]; \ |
||
346 | if (u[i+n] == m[i+n+1]) \ |
||
347 | dst[0*pitch+3*(i+n)+2] = u[i+n]; \ |
||
348 | if ((u[i+n] == m[i+n-1] && m[i+n] != l[i+n-1]) || (l[i+n] == m[i+n-1] && m[i+n] != u[i+n-1])) \ |
||
349 | dst[1*pitch+3*(i+n)+0] = m[i+n-1]; \ |
||
350 | if ((u[i+n] == m[i+n+1] && m[i+n] != l[i+n+1]) || (l[i+n] == m[i+n+1] && m[i+n] != u[i+n+1])) \ |
||
351 | dst[1*pitch+3*(i+n)+2] = m[i+n+1]; \ |
||
352 | if (l[i+n] == m[i+n-1]) \ |
||
353 | dst[2*pitch+3*(i+n)] = l[i+n]; \ |
||
354 | if ((l[i+n] == m[i+n-1] && m[i+n] != l[i+n+1]) || (l[i+n] == m[i+n+1] && m[i+n] != l[i+n-1])) \ |
||
355 | dst[2*pitch+3*(i+n)+1] = l[i+n]; \ |
||
356 | if (l[i+n] == m[i+n+1]) \ |
||
357 | dst[2*pitch+3*(i+n)+2] = l[i+n]; \ |
||
358 | } |
||
359 | |||
360 | process_pix(0); |
||
361 | process_pix(1); |
||
362 | process_pix(2); |
||
363 | process_pix(3); |
||
364 | #undef process_pix |
||
365 | } |
||
366 | } |
||
367 | |||
368 | // 8bpp |
||
369 | void render_scale3(unsigned char *dst, unsigned pitch) |
||
370 | { |
||
371 | unsigned char *src = rbuf; unsigned delta = temp.scx/4; |
||
372 | line_8_any(t.scale2buf[0], src); |
||
373 | // assume 'above' screen line same as line 0 |
||
374 | memcpy(t.scale2buf[(0-1) & 3], t.scale2buf[0], temp.scx); |
||
375 | for (unsigned y = 0; y < temp.scy; y++) { |
||
376 | src += delta; |
||
377 | line_8_any(t.scale2buf[(y+1) & 3], src); |
||
378 | lines_scale3(y, dst, pitch); |
||
379 | dst += 3*pitch; |
||
380 | } |
||
381 | } |
||
382 | |||
383 | // 32bpp |
||
384 | void lines_scale3_32(unsigned y, unsigned char *dst, unsigned pitch) |
||
385 | { |
||
386 | const u32 *u = t.scale2buf32[(y-1) & 3]; |
||
387 | const u32 *m = t.scale2buf32[(y+0) & 3]; |
||
388 | const u32 *l = t.scale2buf32[(y+1) & 3]; |
||
389 | u32 *d = (u32 *)dst; |
||
390 | pitch /= sizeof(u32); |
||
391 | |||
392 | for (unsigned i = 0; i < temp.scx; i++) |
||
393 | { |
||
394 | d[0*pitch+3*i+0] = d[0*pitch+3*i+1] = d[0*pitch+3*i+2] = m[i]; |
||
395 | d[1*pitch+3*i+0] = d[1*pitch+3*i+1] = d[1*pitch+3*i+2] = m[i]; |
||
396 | d[2*pitch+3*i+0] = d[2*pitch+3*i+1] = d[2*pitch+3*i+2] = m[i]; |
||
397 | |||
398 | if (u[i] != l[i] && m[i-1] != m[i+1]) |
||
399 | { |
||
400 | if (u[i] == m[i-1]) |
||
401 | d[0*pitch+3*i+0] = u[i]; |
||
402 | if ((u[i] == m[i-1] && m[i] != u[i+1]) || (u[i] == m[i+1] && m[i] != u[i-1])) |
||
403 | d[0*pitch+3*i+1] = u[i]; |
||
404 | if (u[i] == m[i+1]) |
||
405 | d[0*pitch+3*i+2] = u[i]; |
||
406 | if ((u[i] == m[i-1] && m[i] != l[i-1]) || (l[i] == m[i-1] && m[i] != u[i-1])) |
||
407 | d[1*pitch+3*i+0] = m[i-1]; |
||
408 | if ((u[i] == m[i+1] && m[i] != l[i+1]) || (l[i] == m[i+1] && m[i] != u[i+1])) |
||
409 | d[1*pitch+3*i+2] = m[i+1]; |
||
410 | if (l[i] == m[i-1]) |
||
411 | d[2*pitch+3*i+0] = l[i]; |
||
412 | if ((l[i] == m[i-1] && m[i] != l[i+1]) || (l[i] == m[i+1] && m[i] != l[i-1])) |
||
413 | d[2*pitch+3*i+1] = l[i]; |
||
414 | if (l[i] == m[i+1]) |
||
415 | d[2*pitch+3*i+2] = l[i]; |
||
416 | } |
||
417 | } |
||
418 | } |
||
419 | |||
420 | // 32bpp |
||
421 | void render_scale3_32(unsigned char *dst, unsigned pitch) |
||
422 | { |
||
423 | unsigned char *src = rbuf; unsigned delta = temp.scx/4; |
||
424 | line_32_any((u8 *)t.scale2buf32[0], src); |
||
425 | // assume 'above' screen line same as line 0 |
||
426 | memcpy(t.scale2buf32[(0-1) & 3], t.scale2buf32[0], temp.scx); |
||
427 | for (unsigned y = 0; y < temp.scy; y++) |
||
428 | { |
||
429 | src += delta; |
||
430 | line_32_any((u8 *)t.scale2buf32[(y+1) & 3], src); |
||
431 | lines_scale3_32(y, dst, pitch); |
||
432 | dst += 3*pitch; |
||
433 | } |
||
434 | } |
||
435 | |||
436 | void render_scale4(unsigned char *dst, unsigned pitch) |
||
437 | { |
||
438 | unsigned char *src = rbuf; unsigned delta = temp.scx/4; |
||
439 | |||
440 | line_8_any(t.scale2buf[0], src); src += delta; |
||
441 | line_8_any(t.scale2buf[1], src); src += delta; |
||
442 | // assume 'above' screen line same as line 0 |
||
443 | memcpy(t.scale2buf[(0-1) & 7], t.scale2buf[0], temp.scx); |
||
444 | lines_scale2(t.scale2buf[0], 0, t.scale4buf[0], t.scale4buf[1], temp.scx); |
||
445 | |||
446 | for (unsigned y = 0; y < temp.scy; y++) { |
||
447 | |||
448 | line_8_any(t.scale2buf[(y+2) & 7], src); src += delta; |
||
449 | |||
450 | unsigned char *dst1 = t.scale4buf[(2*y+2) & 7]; |
||
451 | unsigned char *dst2 = t.scale4buf[(2*y+3) & 7]; |
||
452 | lines_scale2(t.scale2buf[0], y+1, dst1, dst2, temp.scx); |
||
453 | |||
454 | lines_scale2(t.scale4buf[0], 2*y, dst+0*pitch, dst+1*pitch, temp.scx*2); |
||
455 | lines_scale2(t.scale4buf[0], 2*y+1, dst+2*pitch, dst+3*pitch, temp.scx*2); |
||
456 | |||
457 | dst += 4*pitch; |
||
458 | } |
||
459 | } |
||
460 | |||
461 | void __fastcall render_advmame(unsigned char *dst, unsigned pitch) |
||
462 | { |
||
463 | switch (conf.videoscale) |
||
464 | { |
||
465 | case 2: |
||
466 | if(temp.obpp == 8) render_scale2(dst, pitch); |
||
467 | else if(temp.obpp == 32) render_scale2_32(dst, pitch); |
||
468 | break; |
||
469 | case 3: |
||
470 | if(temp.obpp == 8) render_scale3(dst, pitch); |
||
471 | else if(temp.obpp == 32) render_scale3_32(dst, pitch); |
||
472 | break; |
||
473 | case 4: render_scale4(dst, pitch); break; |
||
474 | default: render_small(dst, pitch); return; // skip noflic test |
||
475 | } |
||
476 | if (conf.noflic) |
||
477 | memcpy(rbuf_s, rbuf, temp.scy*temp.scx/4); |
||
478 | _mm_empty(); |
||
479 | } |