Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
969 | dimkam | 1 | #include <stdio.h> |
2 | #include <stdlib.h> |
||
3 | #include <string.h> |
||
4 | |||
5 | #include "mhmt-types.h" |
||
6 | |||
7 | #include "mhmt-lz.h" |
||
8 | #include "mhmt-tb.h" |
||
9 | #include "mhmt-globals.h" |
||
10 | |||
11 | |||
12 | |||
13 | // Universal search function |
||
14 | // |
||
15 | void make_lz_codes(OFFSET position, ULONG actual_len, UBYTE * hash, struct lzcode * codes) |
||
16 | { |
||
17 | OFFSET i; |
||
18 | ULONG codepos; |
||
19 | ULONG codelen; |
||
20 | ULONG was_match; |
||
21 | UBYTE curr_byte,next_byte; |
||
22 | struct tb_chain * curr_tb; |
||
23 | UWORD index; |
||
24 | ULONG max_lookback,max_length,max_tbdisp; |
||
25 | |||
26 | // copy-byte code is always present |
||
27 | codes[0].length = 1; |
||
28 | codes[0].disp = 0; |
||
29 | |||
30 | // start more filling of codes[] from that position |
||
31 | codepos = 1; |
||
32 | |||
33 | |||
34 | |||
35 | if( wrk.packtype==PK_HST ) // for hrust only, |
||
36 | { // add 12,14,16,...,40,42 bytes copies, if possible |
||
37 | for(codelen=12;codelen<=42;codelen+=2) |
||
38 | { |
||
39 | if( position > (OFFSET)(actual_len-codelen) ) |
||
40 | break; |
||
41 | |||
42 | codes[codepos].length = codelen; |
||
43 | codes[codepos].disp = 0; |
||
44 | codepos++; |
||
45 | } |
||
46 | } |
||
47 | |||
48 | |||
49 | |||
50 | |||
51 | curr_byte=wrk.indata[position]; |
||
52 | |||
53 | |||
54 | |||
55 | if( wrk.packtype!=PK_ZX7 ) |
||
56 | { |
||
57 | // check for one-byter (-1..-8) |
||
58 | // |
||
59 | i = (position > (8LL-wrk.prelen) ) ? position-8 : (0LL-wrk.prelen); |
||
60 | do |
||
61 | { |
||
62 | if( wrk.indata[i] == curr_byte ) |
||
63 | { |
||
64 | codes[codepos].length = 1; |
||
65 | codes[codepos].disp = -(LONG)(position-i); |
||
66 | codepos++; |
||
67 | break; |
||
68 | } |
||
69 | } while( (++i)<position ); |
||
70 | } |
||
71 | |||
72 | |||
73 | |||
74 | |||
75 | |||
76 | // for hrust, check 3-byte insertion code (-1..-79) |
||
77 | if( (wrk.packtype==PK_HST) && (position < (OFFSET)(actual_len-2)) ) |
||
78 | { |
||
79 | i = (position > (79LL-wrk.prelen) ) ? position-79 : (0LL-wrk.prelen); |
||
80 | do |
||
81 | { |
||
82 | if( (wrk.indata[i]==curr_byte) && (wrk.indata[i+2]==wrk.indata[position+2]) ) |
||
83 | { |
||
84 | codes[codepos].length = (-3); |
||
85 | codes[codepos].disp = -(LONG)(position-i); |
||
86 | codepos++; |
||
87 | break; |
||
88 | } |
||
89 | } while( (++i)<position ); |
||
90 | } |
||
91 | |||
92 | |||
93 | |||
94 | |||
95 | switch( wrk.packtype ) // set maximum lookback and length |
||
96 | { |
||
97 | case PK_MLZ: |
||
98 | max_lookback = (wrk.maxwin<4352) ? wrk.maxwin : 4352; |
||
99 | max_length = 255; |
||
100 | max_tbdisp = 256; |
||
101 | break; |
||
102 | case PK_HRM: |
||
103 | max_lookback = (wrk.maxwin<4096) ? wrk.maxwin : 4096; |
||
104 | max_length = 255; |
||
105 | max_tbdisp = 256; |
||
106 | break; |
||
107 | case PK_HST: |
||
108 | max_lookback = (wrk.maxwin<65536) ? wrk.maxwin : 65536; |
||
109 | max_length = 3839; |
||
110 | max_tbdisp = 768; |
||
111 | break; |
||
112 | case PK_ZX7: |
||
113 | max_lookback = (wrk.maxwin<2176) ? wrk.maxwin : 2176; |
||
114 | max_length = 65536; |
||
115 | max_tbdisp = max_lookback; |
||
116 | break; |
||
117 | default: |
||
118 | printf("mhmt-lz.c:make_lz_codes() - wrong packer type!\n"); |
||
119 | exit(1); |
||
120 | } |
||
121 | |||
122 | |||
123 | |||
124 | // check for two-byter (-1..-max_tbdisp) |
||
125 | // |
||
126 | curr_tb = NULL; |
||
127 | // |
||
128 | if( position<(OFFSET)(actual_len-1) ) // don't try two-byter if we are at the byte before last one |
||
129 | { |
||
130 | next_byte = wrk.indata[position+1]; |
||
131 | index=(curr_byte<<8) + next_byte; |
||
132 | curr_tb = tb_entry[index]; |
||
133 | |||
134 | // there are two-byters! |
||
135 | if( curr_tb ) |
||
136 | { |
||
137 | if( ((position-curr_tb->pos)<=(OFFSET)max_tbdisp) && ((position-curr_tb->pos)<=(OFFSET)max_lookback) ) |
||
138 | { |
||
139 | codes[codepos].length = 2; |
||
140 | codes[codepos].disp = -(LONG)(position - curr_tb->pos); |
||
141 | codepos++; |
||
142 | } |
||
143 | } |
||
144 | } |
||
145 | |||
146 | |||
147 | // at last, check for lengths=3..max_length up to max_lookback |
||
148 | if( curr_tb && ( (position-curr_tb->pos)<=(OFFSET)max_lookback ) && ( position<(OFFSET)(actual_len-2) ) ) // if we can proceed at all |
||
149 | { |
||
150 | was_match = 1; // there was match at codelen-1 |
||
151 | |||
152 | for( codelen=3; ( codelen<=max_length )&&( position<(OFFSET)(actual_len-codelen+1) ); /*nothing*/ ) |
||
153 | { |
||
154 | if( was_match ) // for codelen-1 |
||
155 | { |
||
156 | // codelen-1 bytes are matched, compare one more byte |
||
157 | if( wrk.indata[position+codelen-1] == wrk.indata[curr_tb->pos+codelen-1] ) |
||
158 | { |
||
159 | // add code to the table |
||
160 | codes[codepos].length = codelen; |
||
161 | codes[codepos].disp = -(LONG)(position - curr_tb->pos); |
||
162 | codepos++; |
||
163 | |||
164 | codelen++; // next time do comparision of greater size |
||
165 | } |
||
166 | else // last bytes do not match |
||
167 | { |
||
168 | |||
169 | MATCH_FAIL: // entrance for failed matches here: used 3-fold so we set "goto" here |
||
170 | |||
171 | // go for older twobyter |
||
172 | curr_tb = curr_tb->next; |
||
173 | |||
174 | // no more twobyters or they are too far - stop search at all |
||
175 | if( !curr_tb ) break; |
||
176 | if( (position - curr_tb->pos)>(OFFSET)max_lookback ) break; |
||
177 | |||
178 | // mark there was no matches |
||
179 | was_match = 0; |
||
180 | } |
||
181 | } |
||
182 | else // there were no matches for previous codelen |
||
183 | { |
||
184 | // next twobyter is already taken, but no comparision is done for codelen bytes |
||
185 | // first we check if we need to do such comparision at all by seeing to the hashes of the ends of strings |
||
186 | if( hash[position+codelen-1] == hash[curr_tb->pos+codelen-1] ) |
||
187 | { // hashes match, so try matching complete string |
||
188 | if( !memcmp( &wrk.indata[position], &wrk.indata[curr_tb->pos], codelen ) ) |
||
189 | { |
||
190 | was_match = 1; |
||
191 | codes[codepos].length = codelen; |
||
192 | codes[codepos].disp = -(LONG)(position - curr_tb->pos); |
||
193 | codepos++; |
||
194 | |||
195 | codelen++; |
||
196 | } |
||
197 | else |
||
198 | // no match of whole string |
||
199 | goto MATCH_FAIL; |
||
200 | } |
||
201 | else |
||
202 | // no match of hashes |
||
203 | goto MATCH_FAIL; |
||
204 | } |
||
205 | } |
||
206 | } |
||
207 | |||
208 | // here we assume to have found all possible matches. check for codes[] table overflow: |
||
209 | // there could be matches for length 1..3839, and there is copy-1-byte, 16 copymanybyters, 1 insertion match, total 3857 entries for hrust, 256 for megalz & hrum |
||
210 | // 65536 for zx7 |
||
211 | if( codepos > ( (wrk.packtype==PK_HST) ? 3857 : (wrk.packtype==PK_ZX7) ? 65536 : 256 ) ) // this should not happen! |
||
212 | { |
||
213 | printf("mhmt-lz.c:make_lz_codes() encountered too many entries in codes[] table. Fatal error.\n"); |
||
214 | exit(1); |
||
215 | } |
||
216 | |||
217 | // mark end-of-records in codes[] |
||
218 | codes[codepos].length = 0; |
||
219 | codes[codepos].disp = 0; |
||
220 | } |
||
221 | |||
222 | |||
223 | |||
224 | |||
225 | |||
226 | |||
227 | |||
228 | // returns price in bits or zero if error |
||
229 | // |
||
230 | ULONG get_lz_price_megalz(OFFSET position, struct lzcode * lzcode) |
||
231 | { |
||
232 | ULONG varbits,varlen; |
||
233 | LONG length,disp; |
||
234 | |||
235 | length = lzcode->length; |
||
236 | disp = lzcode->disp; |
||
237 | |||
238 | if( length==1 ) |
||
239 | { |
||
240 | if( disp==0 ) |
||
241 | return 9; |
||
242 | else if( (-8)<=disp && disp<=(-1) ) |
||
243 | return 6; |
||
244 | else |
||
245 | goto INVALID_CODE_MEGALZ; |
||
246 | } |
||
247 | else if( length==2 ) |
||
248 | { |
||
249 | if( (-256)<=disp && disp<=(-1) ) |
||
250 | return 11; |
||
251 | else |
||
252 | goto INVALID_CODE_MEGALZ; |
||
253 | } |
||
254 | else if( length==3 ) |
||
255 | { |
||
256 | if( (-256)<=disp && disp<=(-1) ) |
||
257 | return 12; |
||
258 | else if( (-4352)<=disp && disp<(-256) ) |
||
259 | return 16; |
||
260 | else |
||
261 | goto INVALID_CODE_MEGALZ; |
||
262 | } |
||
263 | else if( 4<=length && length<=255 ) |
||
264 | { |
||
265 | varlen = 0; |
||
266 | varbits = (length-2)>>1; |
||
267 | while( varbits ) |
||
268 | { |
||
269 | varbits >>= 1; |
||
270 | varlen+=2; |
||
271 | } |
||
272 | |||
273 | if( (-256)<=disp && disp<=(-1) ) |
||
274 | varlen += 9; |
||
275 | else if( (-4352)<=disp && disp<(-256) ) |
||
276 | varlen += 13; |
||
277 | else |
||
278 | goto INVALID_CODE_MEGALZ; |
||
279 | |||
280 | return varlen+3; |
||
281 | } |
||
282 | else |
||
283 | { |
||
284 | INVALID_CODE_MEGALZ: |
||
285 | printf("mhmt-lz.c:get_lz_price_megalz(): Found invalid code length=%d, displacement=%d\n",length, disp); |
||
286 | return 0; |
||
287 | } |
||
288 | } |
||
289 | |||
290 | |||
291 | ULONG get_lz_price_hrum(OFFSET position, struct lzcode * lzcode) |
||
292 | { |
||
293 | ULONG varlen; |
||
294 | LONG length,disp; |
||
295 | |||
296 | length = lzcode->length; |
||
297 | disp = lzcode->disp; |
||
298 | |||
299 | if( length==1 ) |
||
300 | { |
||
301 | if( disp==0 ) |
||
302 | return 9; |
||
303 | else if( (-8)<=disp && disp<=(-1) ) |
||
304 | return 6; |
||
305 | else |
||
306 | goto INVALID_CODE_HRUM; |
||
307 | } |
||
308 | else if( length==2 ) |
||
309 | { |
||
310 | if( (-256)<=disp && disp<=(-1) ) |
||
311 | return 11; |
||
312 | else |
||
313 | goto INVALID_CODE_HRUM; |
||
314 | } |
||
315 | else if (3<=length && length<=255) |
||
316 | { |
||
317 | varlen = 3; |
||
318 | |||
319 | if( 4<=length && length<=15 ) |
||
320 | { |
||
321 | varlen = 5; |
||
322 | if( length>=6 ) varlen += 2; |
||
323 | if( length>=9 ) varlen += 2; |
||
324 | if( length>=12) varlen += 2; |
||
325 | } |
||
326 | else if( 15<length && length<=255 ) |
||
327 | { |
||
328 | varlen = 13; |
||
329 | } |
||
330 | |||
331 | if( (-256)<=disp && disp<=(-1) ) |
||
332 | varlen += 9; |
||
333 | else if( (-4096)<=disp && disp<(-256) ) |
||
334 | varlen += 13; |
||
335 | else |
||
336 | goto INVALID_CODE_HRUM; |
||
337 | |||
338 | return varlen; |
||
339 | } |
||
340 | else |
||
341 | { |
||
342 | INVALID_CODE_HRUM: |
||
343 | printf("mhmt-lz.c:get_lz_price_hrum(): Found invalid code length=%d, displacement=%d\n",length, disp); |
||
344 | return 0; |
||
345 | } |
||
346 | } |
||
347 | |||
348 | |||
349 | |||
350 | |||
351 | |||
352 | |||
353 | |||
354 | |||
355 | |||
356 | |||
357 | |||
358 | ULONG get_lz_price_hrust(OFFSET position, struct lzcode * lzcode) |
||
359 | { |
||
360 | ULONG /*varbits,*/varlen; |
||
361 | LONG length,disp,tmp; |
||
362 | |||
363 | length = lzcode->length; |
||
364 | disp = lzcode->disp; |
||
365 | |||
366 | |||
367 | if( disp==0 ) |
||
368 | { |
||
369 | if( length==1 ) |
||
370 | { |
||
371 | return 9; // copy-1-byte |
||
372 | } |
||
373 | else if( (12<=length) && (length<=42) && ( !(length&1) ) ) |
||
374 | { |
||
375 | return 11 + 8*length; |
||
376 | } |
||
377 | else |
||
378 | goto INVALID_CODE_HRUST; |
||
379 | } |
||
380 | else if( length==(-3) ) // insertion match! |
||
381 | { |
||
382 | if( (-16)<=disp && disp<=(-1) ) |
||
383 | { |
||
384 | return 10+8; |
||
385 | } |
||
386 | else if( (-79)<=disp && disp<(-16) ) |
||
387 | { |
||
388 | return 5+8+8; |
||
389 | } |
||
390 | else |
||
391 | goto INVALID_CODE_HRUST; |
||
392 | } |
||
393 | else if( length==1 ) |
||
394 | { |
||
395 | if( (-8)<=disp && disp<=(-1) ) |
||
396 | return 6; |
||
397 | else |
||
398 | goto INVALID_CODE_HRUST; |
||
399 | } |
||
400 | else if( length==2 ) |
||
401 | { |
||
402 | if( (-32)<=disp && disp<=(-1) ) |
||
403 | { |
||
404 | return 10; |
||
405 | } |
||
406 | else if( (-768)<=disp && disp<(-32) ) |
||
407 | { |
||
408 | return 13; |
||
409 | } |
||
410 | else |
||
411 | goto INVALID_CODE_HRUST; |
||
412 | } |
||
413 | else if (3<=length && length<=3839 && (-65536)<=disp && disp<=(-1) ) |
||
414 | { |
||
415 | // first, calc influence of length |
||
416 | if( length<=15 ) // 3..15 |
||
417 | { |
||
418 | varlen = 3 + ( (length/3)<<1 ); |
||
419 | |||
420 | if( length==3 ) varlen = 3; |
||
421 | |||
422 | if( length==15 ) varlen = 11; |
||
423 | } |
||
424 | else if( length<=127 ) // 16..127 |
||
425 | { |
||
426 | varlen = 14; |
||
427 | } |
||
428 | else // 128..3839 |
||
429 | { |
||
430 | varlen = 14+8; |
||
431 | } |
||
432 | |||
433 | |||
434 | // add displacement length |
||
435 | if( (-32)<=disp ) // ffe0..ffff |
||
436 | { |
||
437 | varlen += 7; |
||
438 | } |
||
439 | else if( (-512)<=disp ) // fe00..ffdf |
||
440 | { |
||
441 | varlen += 10; |
||
442 | } |
||
443 | else // 0000(-65536)..fdff: -513:-1024, -1025:-2048, -2049:-4096, ... ,-32769:-65536 |
||
444 | { // bits: 12 13 14 18 |
||
445 | |||
446 | varlen += 12; |
||
447 | |||
448 | if( (position-wrk.prelen)>32768LL ) |
||
449 | { |
||
450 | varlen += 6; // 8bits |
||
451 | } |
||
452 | else |
||
453 | { |
||
454 | tmp = 1024; |
||
455 | |||
456 | while( (OFFSET)(position-wrk.prelen)>(OFFSET)tmp ) |
||
457 | { |
||
458 | varlen++; |
||
459 | |||
460 | tmp <<= 1; |
||
461 | } |
||
462 | } |
||
463 | } |
||
464 | |||
465 | return varlen; |
||
466 | } |
||
467 | else |
||
468 | { |
||
469 | INVALID_CODE_HRUST: |
||
470 | printf("mhmt-lz.c:get_lz_price_hrust(): Found invalid code length=%d, displacement=%d\n",length, disp); |
||
471 | return 0; |
||
472 | } |
||
473 | } |
||
474 | |||
475 | |||
476 | |||
477 | |||
478 | ULONG get_lz_price_zx7(OFFSET position, struct lzcode * lzcode) |
||
479 | { |
||
480 | ULONG varbits,varlen; |
||
481 | LONG length,disp; |
||
482 | |||
483 | length = lzcode->length; |
||
484 | disp = lzcode->disp; |
||
485 | |||
486 | if( length==1 ) |
||
487 | { |
||
488 | if( disp==0 ) |
||
489 | return 9; |
||
490 | else |
||
491 | goto INVALID_CODE_ZX7; |
||
492 | } |
||
493 | else if( 2<=length && length<=65536 ) |
||
494 | { |
||
495 | varbits = length-1; |
||
496 | varlen=0; |
||
497 | while( varbits ) |
||
498 | { |
||
499 | varbits>>=1; |
||
500 | varlen+=2; |
||
501 | } |
||
502 | |||
503 | if( (-128)<=disp && disp<=(-1) ) |
||
504 | varlen += 8; |
||
505 | else if( (-2176)<=disp && disp<(-128) ) |
||
506 | varlen += 12; |
||
507 | else |
||
508 | goto INVALID_CODE_ZX7; |
||
509 | |||
510 | return varlen; |
||
511 | } |
||
512 | else |
||
513 | { |
||
514 | INVALID_CODE_ZX7: |
||
515 | printf("mhmt-lz.c:get_lz_price_zx7(): Found invalid code length=%d, displacement=%d\n",length, disp); |
||
516 | return 0; |
||
517 | } |
||
518 | } |