1 /******************************************************************************
2 *
3 * Copyright (C) 2009, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * Author: Alexey Krivonogov ( gentee )
11 *
12 ******************************************************************************/
13
14 #include "match.h"
15 #include "lzge.h"
16 //#include "K:\Gentee\Gentee Language\VM Lib\memory.h"
17
18 //--------------------------------------------------------------------------
19
20 dword STDCALL match_new( psmatch match, pbyte start, pbyte end, dword level )
21 {
22 dword i;
23 dword hsize[] = { 16, 16, 19 };
24 dword wsize[] = { 15, 16, 15 };
25
26 match->hash2 = ( pdword )mem_allocz( ( 1 << 16 ) * sizeof( dword ));
27
28 wsize[2] = min( 21, lzge_bits( end - start ));
29
30 for ( i = 0; i < 3; i++ )
31 {
32 if ( i == 1 && wsize[ 1 ] > wsize[2] ) break;
33 if ( i == 2 && wsize[ 1 ] == wsize[2] ) break;
34
35 match->hash[ i ] = ( pdword )mem_allocz( ( 1 << hsize[ i ] ) * sizeof( dword ));
36 match->hsize[ i ] = 1 << hsize[i];
37 match->next[ i ] = ( pdword )mem_alloc( ( 1 << wsize[i] ) * sizeof( dword ));
38 match->size[ i ] = 1 << wsize[i];
39 match->top[ i ] = 0;
40 match->limit[i] = ( 1 << min( 14, level + 3 + 21 - wsize[2] ));
41 }
42 match->count = i;
43 // printf("Count=%i size=%i\n", match->count, 1<< wsize[2] );
44 match->start = start;
45 match->end = end;
46
47 return ( 1 << wsize[2] ) - 1;
48 }
49
50 //--------------------------------------------------------------------------
51
52 void STDCALL match_destroy( psmatch match )
53 {
54 dword i;
55
56 mem_free( match->hash2 );
57 for ( i = 0; i < match->count; i++ )
58 {
59 mem_free( match->hash[i] );
60 mem_free( match->next[i] );
61 }
62 }
63
64 //--------------------------------------------------------------------------
65
66 dword STACKAPI match_find( psmatch match, pbyte input, pdword retoff )
67 // Вызов должен быть со второго символа
68 {
69 dword off = input - match->start;
70 dword hash;
71 dword len = 1;
72 dword lastoff; // Последнее найденное смещение
73 dword i, limit;
74 dword cur, offset;
75 uint k;//, j;
76
77 if ( !off || input + 2 > match->end ) return len;
78
79 *retoff = 0;
80 // Ищем по длине 2
81 if ( cur = match->hash2[ *( pword )input ] )
82 {
83 offset = off - cur + 1;
84 if ( offset < 256 + MIN_OFFSET )
85 {
86 len = 2;
87 while ( input[ len ] == *( input - offset + len ) &&
88 input + len < match->end )
89 len++;
90 *retoff = offset;
91 }
92 }
93 hash = ((( dword )input[0] ) << 3 ) ^ input[ 1 ];
94 // Ищем по остальным длинам
95 for ( i = 0; i < match->count; i++ )
96 {
97 // if ( input + 3 + i > match->end ) break;
98 if ( input + len == match->end ||
99 input + 3 + i > match->end )
100 break;
101
102 limit = 0;
103 hash = ( hash << 3 ) ^ input[ 2 + i ];
104 hash &= match->hsize[i] - 1;
105 lastoff = 0;
106 cur = match->hash[ i ][ hash ];
107
108 while ( cur-- )
109 {
110 // Находим смещение
111 offset = match->top[i] + ( cur < match->top[i] ? 0 : match->size[i] ) -
112 cur + 2 + i;
113 if ( offset <= lastoff || offset >= match->size[i] ) break;
114 lastoff = offset;
115 if ( offset <= *retoff ) goto next;
116
117 if ( len > 2 + i ) // Уже есть найденные вхождения
118 {
119 for ( k = len; k >= 0; k-- )
120 {
121 if ( input[ k ] != *( input - offset + k ))
122 goto next;
123 }
124 len++;
125 }
126 else
127 {
128 for ( k = 0; k < 3 + i; k++ )
129 {
130 if ( input[ k ] != *( input - offset + k ))
131 goto next;
132 }
133 len = 3 + i;
134 }
135 // Теперь смотрим вправо
136 while ( input[ len ] == *( input - offset + len ) && input + len < match->end )
137 len++;
138 *retoff = offset;
139 next:
140 cur = match->next[i][ cur ];
141 if ( ++limit > match->limit[ i ] || input + len == match->end )
142 break;
143 }
144 }
145 if ( len >= match->size[ match->count - 1 ] - 1 )
146 len = match->size[ match->count - 1 ] - 1;
147 // if ( input + len > match->end )
148 // printf("Error off=%i len=%i\n ", *retoff, len );
149 // Обновляем хэш-таблицы
150 k = len;
151 while ( k )
152 {
153 match_update( match, input );
154
155 /* match->hash2[ *( pword )( input - 2 ) ] = input - match->start - 1;
156
157 // Ищем по остальным длинам
158 for ( i = 0; i < match->count; i++ )
159 {
160 if ( input - match->start < i + 2 ) break;
161 hash = *( input - 2 - i );
162 for ( j = 1 + i; j >= 0; j-- )
163 hash = ( hash << 3 ) ^ *( input - j );
164 hash &= match->hsize[i] - 1;
165
166 match->next[i][ match->top[i]] = match->hash[ i ][ hash ];
167 match->hash[i][ hash ] = match->top[i] + 1;
168 match->top[i]++;
169 if ( match->top[i] == match->size[i] )
170 match->top[i] = 0;
171 }*/
172 k--;
173 input++;
174 }
175 return len;
176 }
177
178 //--------------------------------------------------------------------------
179
180 dword STACKAPI match_update( psmatch match, pbyte input )
181 {
182 dword i;
183 int j;
184 dword hash;
185
186 // if ( input == match->start ) return 1;
187
188 match->hash2[ *( pword )( input - 2 ) ] = input - match->start - 1;
189
190 // Ищем по остальным длинам
191 for ( i = 0; i < match->count; i++ )
192 {
193 if ( input - match->start < (int)( i + 2 )) break;
194 hash = *( input - 2 - i );
195 for ( j = 1 + i; j >= 0; j-- )
196 hash = ( hash << 3 ) ^ *( input - j );
197 hash &= match->hsize[i] - 1;
198
199 match->next[i][ match->top[i]] = match->hash[ i ][ hash ];
200 match->hash[i][ hash ] = match->top[i] + 1;
201 match->top[i]++;
202
203 if ( match->top[i] == match->size[i] )
204 match->top[i] = 0;
205 }
206 return 1;
207 }
208
209 //--------------------------------------------------------------------------
210
Редактировать