1 /******************************************************************************
2 *
3 * Copyright (C) 2006, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * ID: test 06.10.06 0.0.A.
11 *
12 * Author: Alexey Krivonogov
13 *
14 ******************************************************************************/
15
16 #include "../os/user/defines.h"
17 #include "../common/gentee.h"
18 #include "windows.h"
19 #include "../compiler/lextbl.h"
20
21 extern memory _memory;
22
23 enum // Таблица для обработки GT данных
24 {
25 GTDO_MAIN = 0x010000,
26 GTDO_TEXT = 0x020000, // Простой текст. Имеет флаг LEXF_ITEM
27 GTDO_SIGN = 0x030000, // Служебный символ '#' LEXF_ITEM
28 GTDO_NAME = 0x040000, // Имя макроса после SIGN LEXF_ITEM
29 GTDO_AMP = 0x050000, // Символ '&'
30 GTDO_ISHEX = 0x060000, // Является ли шестнадцатеричным значением
31 GTDO_HEXOK = 0x070000, // Ждем ; у &xff;
32 GTDO_ISPAR = 0x080000, // Является ли параметром
33 GTDO_PAROK = 0x090000, // Ждем ; у
34 GTDO_LP = 0x0A0000, // Пошла левая скобка после макроса LEXF_ITEM
35 GTDO_PARTEXT = 0x0B0000, // Обычный текст внутри скобок LEXF_ITEM
36 GTDO_SPACE = 0x0C0000, // <= ' ' внутри скобок LEXF_ITEM
37 GTDO_DQ = 0x0D0000, // текст в двойных кавычках внутри скобок LEXF_ITEM
38 GTDO_Q = 0x0E0000, // текст в одинарных кавычках внутри скобок LEXF_ITEM
39 GTDO_AMPTRY = 0x0F000000,
40
41 GTDO_DOT = 0x01000000, // Точка в имени макроса LEXF_ITEM
42 GTDO_HEX = 0x02000000, // Шестнадцатеричное значение символа &xff; LEXF_ITEM
43 GTDO_PAR = 0x03000000, // Номер параметра LEXF_ITEM
44 GTDO_COMMA = 0x04000000, // , у списка параметров LEXF_ITEM
45 GTDO_RP = 0x05000000, // Правая скобка LEXF_ITEM
46 };
47
48 enum {
49 GT_MAIN = 0x010000,
50 GT_ISBEGIN = 0x020000,
51 GT_TRYBEGIN = 0x03000000,
52 GT_COMMENT = 0x040000, // Комментарий LEXF_ITEM
53 GT_VLINE = 0x050000, // <|
54 GT_STAR = 0x060000, // Звездочка перед именем для совместимости
55 GT_BEGIN = 0x070000, // Начало GT объекта. Имеет флаг LEXF_ITEM
56 GT_ATTRIB = 0x080000, // Разбор атрибутов.
57 GT_NAME = 0x090000, // Идентификатор. LEXF_ITEM
58 GT_EQUAL = 0x0A0000, // Равенство. LEXF_ITEM
59 GT_STRATTR = 0x0B0000, // Значение атрибута. LEXF_ITEM
60 GT_STRDQ = 0x0C0000, // Строка в двойных кавычках. LEXF_ITEM
61 GT_STRQ = 0x0D0000, // 10 Строка в одинарных кавычках. LEXF_ITEM
62 GT_ISEND = 0x0E0000, // Возможное окончание объекта
63 GT_TRYISEND = 0x0F000000,
64 GT_ENDATTR = 0x100000, // Конец атрибутов
65 GT_DATA = 0x110000, // Данные
66 GT_TRYENDDATA = 0x12000000,
67 GT_TRYWHAT = 0x13000000,
68 GT_TRYDATABEG = 0x14000000,
69 /* GT_ISATTREND,
70 GT_ISEND,
71 GT_ENDATTR, // Конец атрибутов
72 GT_DATA, // Данные GT объекта LEXF_ITEM
73 GT_SUBDATA, // Возможное окончание данных или вхождение детей
74 GT_SUBDATATRY, // Информация о перемещениях после try
75 GT_SUBDATA1, // Возможное окончание данных или вхождение детей
76 GT_SUBDATA1TRY, // Информация о перемещениях после try
77 GT_ISENDCMT, // проверка на окончание комментария
78 GT_ISENDCMTTRY,
79 */
80 GT_END = 0x80000000 // Конец GT объекта LEXF_ITEM
81 };
82
83 const uint tbl_gt[97] = { 20,
84 // GT_MAIN
85 1, LEX_SKIP, 0x3c3c, GT_TRYBEGIN | GT_ISBEGIN | LEXF_POS | LEXF_TRY | LEXF_PUSH,
86 // GT_ISBEGIN
87 4, LEXF_RET, 0x2d2d, GT_COMMENT | LEXF_ITSTATE,
88 0x2a2a, GT_STAR,
89 0x7c7c, GT_VLINE,
90 0x3000, GT_BEGIN | LEXF_ITSTATE | LEXF_PUSHLI,
91 // GT_TRYBEGIN
92 0, LEXF_POP | LEX_SKIP,
93 // GT_COMMENT
94 1, LEX_OK, 0x3e2d, LEX_OK | LEXF_POP | LEXF_MULTI,
95 // GT_VLINE
96 2, LEXF_RET, 0x2a2a, GT_STAR, 0x3000, GT_BEGIN | LEXF_ITSTATE | LEXF_PUSHLI,
97 // GT_STAR
98 1, LEXF_RET, 0x3000, GT_BEGIN | LEXF_ITSTATE | LEXF_PUSHLI,
99 // GT_BEGIN Копим '0' - '9' '_' 'A'-'Z' 'a'-'z' и больше или равно 128
100 1, GT_ATTRIB | LEXF_STAY, 0x3000, LEX_OK,
101 // GT_ATTRIB
102 4, LEX_SKIP, 0x4100, GT_NAME | LEXF_ITSTATE | LEXF_POS,
103 0x2f2f, GT_TRYISEND | GT_ISEND | LEXF_POS | LEXF_TRY,
104 0x3d3d, GT_EQUAL | LEXF_ITSTATE | LEXF_POS,
105 0x3e3e, GT_ENDATTR,
106 // GT_NAME
107 1, GT_ATTRIB | LEXF_STAY, 0x3000, LEX_OK,
108 // GT_EQUAL
109 4, GT_STRATTR | LEXF_ITSTATE | LEXF_POS,
110 0x0120, LEX_SKIP,
111 0x3e2f2f, GT_ATTRIB | LEXF_STAY,
112 0x2222, GT_STRDQ | LEXF_ITSTATE | LEXF_POS,
113 0x2727, GT_STRQ | LEXF_ITSTATE | LEXF_POS,
114 // GT_STRATTR
115 1, LEX_OK, 0x2f3e0120, GT_ATTRIB | LEXF_STAY,
116 // GT_STRDQ
117 1, LEX_OK, 0x2222, LEX_OK | GT_ATTRIB,
118 // GT_STRQ
119 1, LEX_OK, 0x2727, LEX_OK | GT_ATTRIB,
120 // GT_ISEND
121 2, LEXF_RET,
122 0x3e3e, GT_END | LEXF_ITCMD | LEXF_POP | LEXF_POPLI,
123 0x3000, LEX_GTNAME,
124 // GT_TRYISEND
125 0, LEX_SKIP | GT_ATTRIB,
126 // GT_ENDATTRIB
127 3, GT_DATA | LEXF_ITSTATE | LEXF_POS,
128 0x0120, LEX_SKIP,
129 0x3c3c, GT_ISBEGIN | LEXF_POS | LEXF_PUSH | LEXF_TRY | GT_TRYDATABEG,
130 0x2f3c, LEXF_MULTI | GT_ISEND | LEXF_TRY | GT_TRYWHAT,
131 // GT_DATA
132 1, LEX_OK,
133 0x2f3c, LEXF_MULTI | GT_ISEND | LEXF_TRY | GT_TRYENDDATA,
134 // GT_TRYENDDATA
135 0, LEX_OK | GT_DATA,
136 // GT_TRYWHAT
137 0, GT_DATA | LEXF_ITSTATE,
138 // GT_TRYDATABEG
139 0, GT_DATA | LEXF_ITSTATE | LEXF_POP,
140
141 /*
142 // GT_ISATTREND
143 0x03, LEXF_RET | LEXF_STAY, 0x0000, LEXF_RET | LEXF_STAY,
144 0x3e3e, GT_ISEND | LEXF_POP | LEXF_STAY, 0x3000, LEX_GTNAME,
145 // GT_ISEND
146 0x01, GT_END | LEXF_ITEM | LEXF_POP | LEXF_NAME | LEXF_RET,
147 0x0000, GT_END | LEXF_ITEM | LEXF_POP | LEXF_NAME | LEXF_RET,
148 // GT_ENDATTRIB
149 0x02, GT_DATA | LEXF_ITEM | LEXF_POS, 0x0120, LEX_SKIP,
150 0x3c3c, GT_SUBDATA | LEXF_TRY | LEXF_POS,
151 // GT_DATA
152 0x01, LEX_OK, 0x3c3c, GT_SUBDATA1 | LEXF_TRY | LEXF_POS,
153 // GT_SUBDATA
154 0x04, LEXF_RET | LEXF_STAY, 0x0000, LEXF_RET | LEXF_STAY,
155 0x2d2d, GT_COMMENT | LEXF_ITEM | LEXF_PUSH,
156 0x2f2f, GT_ISATTREND, 0x3000, GT_ISBEGIN | LEXF_STAY,
157 // GT_SUBDATATRY
158 0x01, GT_DATA | LEXF_ITEM | LEXF_POS, 0x0000, GT_DATA,
159 // GT_SUBDATA1
160 0x02, LEXF_RET | LEXF_STAY, 0x0000, LEXF_RET | LEXF_STAY,
161 0x2f2f, GT_ISATTREND,
162 // GT_SUBDATA1TRY
163 0x01, LEX_OK, 0x0000, GT_DATA,
164 0x0000,*/
165 };
166
167 /*
168 const ushort tbl_gt[115] = {
169 0x01, LEX_SKIP, 0x3c3c, GT_ISBEGIN | LEXF_POS,
170 // GT_ISBEGIN Смотрим начало на _ 'A'-'Z' 'a'-'z' и больше или равно 128
171 0x02, LEXF_RET | LEXF_STAY, 0x2d2d, GT_COMMENT | LEXF_ITEM | LEXF_PUSH,
172 0x3000, GT_BEGIN | LEXF_ITEM | LEXF_PUSH | LEXF_NAME,
173 // GT_BEGIN Копим '0' - '9' '_' 'A'-'Z' 'a'-'z' и больше или равно 128
174 0x01, GT_ATTRIB | LEXF_PUSH | LEXF_STAY, 0x3000, LEX_OK,
175 // GT_ATTRIB
176 0x04, LEX_SKIP, 0x4100, GT_NAME | LEXF_ITEM | LEXF_POS, 0x2f2f, GT_ISATTREND | LEXF_POS,
177 0x3d3d, GT_EQUAL | LEXF_ITEM | LEXF_POS,
178 0x3e3e, GT_ENDATTR | LEXF_POP | LEXF_PUSH,
179 // GT_NAME
180 0x01, LEXF_RET | LEXF_STAY, 0x3000, LEX_OK,
181 // GT_ISATTREND
182 0x03, LEXF_RET | LEXF_STAY, 0x0000, LEXF_RET | LEXF_STAY,
183 0x3e3e, GT_ISEND | LEXF_POP | LEXF_STAY, 0x3000, LEX_GTNAME,
184 // GT_ISEND
185 0x01, GT_END | LEXF_ITEM | LEXF_POP | LEXF_NAME | LEXF_RET,
186 0x0000, GT_END | LEXF_ITEM | LEXF_POP | LEXF_NAME | LEXF_RET,
187 // GT_EQUAL
188 0x05, GT_STRATTR | LEXF_ITEM | LEXF_POS, 0x0120, LEX_SKIP, 0x2f2f, LEXF_RET | LEXF_STAY, 0x3e3e, LEXF_RET | LEXF_STAY,
189 0x2222, GT_STRDQ | LEXF_PUSH | LEXF_POS | LEXF_ITEM,
190 0x2727, GT_STRQ | LEXF_PUSH | LEXF_POS | LEXF_ITEM,
191 // GT_STRATTR
192 0x03, LEX_OK, 0x0120, LEXF_RET | LEXF_STAY, 0x2f2f, LEXF_RET | LEXF_STAY,
193 0x3e3e, LEXF_RET | LEXF_STAY,
194 // GT_STRDQ
195 0x01, LEX_OK, 0x2222, LEX_OK | LEXF_POP | LEXF_RET,
196 // GT_STRQ
197 0x01, LEX_OK, 0x2727, LEX_OK | LEXF_POP | LEXF_RET,
198 // GT_ENDATTRIB
199 0x02, GT_DATA | LEXF_ITEM | LEXF_POS, 0x0120, LEX_SKIP,
200 0x3c3c, GT_SUBDATA | LEXF_TRY | LEXF_POS,
201 // GT_DATA
202 0x01, LEX_OK, 0x3c3c, GT_SUBDATA1 | LEXF_TRY | LEXF_POS,
203 // GT_SUBDATA
204 0x04, LEXF_RET | LEXF_STAY, 0x0000, LEXF_RET | LEXF_STAY,
205 0x2d2d, GT_COMMENT | LEXF_ITEM | LEXF_PUSH,
206 0x2f2f, GT_ISATTREND, 0x3000, GT_ISBEGIN | LEXF_STAY,
207 // GT_SUBDATATRY
208 0x01, GT_DATA | LEXF_ITEM | LEXF_POS, 0x0000, GT_DATA,
209 // GT_SUBDATA1
210 0x02, LEXF_RET | LEXF_STAY, 0x0000, LEXF_RET | LEXF_STAY,
211 0x2f2f, GT_ISATTREND,
212 // GT_SUBDATA1TRY
213 0x01, LEX_OK, 0x0000, GT_DATA,
214 // GT_COMMENT
215 0x01, LEX_OK, 0x2d2d, GT_ISENDCMT | LEXF_TRY,
216 // GT_ISENDCMT
217 0x01, LEXF_RET | LEXF_STAY, 0x3e3e, LEX_OKDBL | LEXF_POP | LEXF_RET,
218 // GT_ISENDCMTTRY
219 0x01, LEX_OK, 0x0000, GT_COMMENT,
220 0x0000,
221 };
222
223 const ushort tbl_gtdo[ 113 ] = {
224 0x02, GTDO_TEXT | LEXF_ITEM | LEXF_POS, 0x2626, GTDO_AMP | LEXF_TRY | LEXF_POS,
225 0x2323, GTDO_SIGN | LEXF_POS | LEXF_ITEM,
226 //0x2323 можно менять вручную в таблице на нужный служебный символ
227 // GTDO_TEXT накапливаем обычный текст
228 // 0x01, LEX_OK, 0x2626, GTDO_AMP | LEXF_TRY | LEXF_POS,
229 0x02, LEX_OK, 0x2626, GTDO_MAIN | LEXF_STAY, 0x2323, GTDO_MAIN | LEXF_STAY,
230 // GTDO_AMP
231 0x04, LEXF_RET, 0x0000, LEXF_RET, 0x2323, GTDO_PAR,
232 0x5858, GTDO_HEX, 0x7878, GTDO_HEX,
233 // GTDO_AMPTRY
234 0x01, GTDO_TEXT | LEXF_ITEM | LEXF_POS, 0x0000, GTDO_TEXT,
235 // GTDO_HEX
236 0x02, LEXF_RET, 0x0000, LEXF_RET, 0x5800, GTDO_HEXOK,
237 // GTDO_HEXOK
238 0x02, LEXF_RET, 0x3B3B, GTDO_HEXOK2 | LEXF_ITEM,
239 0x5800, LEX_SKIP,
240 // GTDO_HEXOK2
241 0x01, GTDO_MAIN | LEXF_STAY, 0x0, LEX_STOP,
242 // GTDO_PAR
243 0x02, LEXF_RET, 0x0000, LEXF_RET, 0x3039, GTDO_PAROK,
244 // GTDO_PAROK
245 0x02, LEXF_RET, 0x3B3B, GTDO_PAROK2 | LEXF_ITEM, 0x3039, LEX_SKIP,
246 // GTDO_PAROK2
247 0x01, GTDO_MAIN | LEXF_STAY, 0x0, LEX_STOP,
248 // GTDO_SIGN
249 0x03, GTDO_MAIN | LEXF_STAY, 0x3000, GTDO_NAME | LEXF_POS | LEXF_ITEM,
250 0x2e2e, GTDO_DOT | LEXF_POS | LEXF_ITEM, 0x2f2f, GTDO_NAME | LEXF_POS | LEXF_ITEM,
251 // GTDO_NAME
252 0x04, GTDO_MAIN | LEXF_STAY, 0x3000, LEX_OK, 0x2828, GTDO_LP | LEXF_ITEM | LEXF_POS,
253 0x2f2f, LEX_OK, 0x2e2e, GTDO_SIGN | LEXF_STAY,
254 // GTDO_LP
255 0x05, GTDO_PARTEXT | LEXF_ITEM | LEXF_POS, 0x0120, GTDO_SPACE | LEXF_ITEM | LEXF_POS,
256 0x2c2c, GTDO_COMMA | LEXF_ITEM | LEXF_POS, 0x2929, GTDO_RP | LEXF_ITEM | LEXF_POS,
257 0x2222, GTDO_DQ | LEXF_ITEM | LEXF_POS | LEXF_PUSH,
258 0x2727, GTDO_Q | LEXF_ITEM | LEXF_POS | LEXF_PUSH,
259 // GTDO_PARTEXT
260 0x03, LEX_OK, 0x0120, GTDO_LP | LEXF_STAY, 0x2c2c, GTDO_LP | LEXF_STAY,
261 0x2929, GTDO_LP | LEXF_STAY,
262 // GTDO_SPACE
263 0x01, GTDO_LP | LEXF_STAY, 0x0120, LEX_OK,
264 // GTDO_DQ
265 0x01, LEX_OK, 0x2222, LEX_OK | LEXF_POP | LEXF_RET,
266 // GTDO_Q
267 0x01, LEX_OK, 0x2727, LEX_OK | LEXF_POP | LEXF_RET,
268 // GTDO_RP
269 0x01, GTDO_MAIN | LEXF_STAY, 0x0001, LEX_STOP,
270
271 0x0000,
272 };
273 */
274
275 const uint tbl_gtdo[81] = { 15,
276 // GTDO_MAIN
277 2, GTDO_TEXT | LEXF_ITSTATE | LEXF_POS,
278 0x2626, GTDO_AMP | LEXF_TRY | LEXF_POS | GTDO_AMPTRY,
279 0x2323, GTDO_SIGN | LEXF_POS | LEXF_ITSTATE,
280 // GTDO_TEXT
281 1, LEX_OK, 0x262323, GTDO_MAIN | LEXF_STAY,
282 // GTDO_SIGN
283 2, GTDO_MAIN | LEXF_STAY,
284 0x2f3000, GTDO_NAME | LEXF_POS | LEXF_ITSTATE,
285 0x2e2e, GTDO_DOT | LEXF_POS | LEXF_ITCMD,
286 // GTDO_NAME
287 3, GTDO_MAIN | LEXF_STAY, 0x2f3000, LEX_OK,
288 0x2828, GTDO_LP | LEXF_ITSTATE | LEXF_POS,
289 0x2e2e, GTDO_SIGN | LEXF_STAY,
290 // GTDO_AMP
291 2, LEXF_RET, 0x785858, GTDO_ISHEX,
292 0x2323, GTDO_ISPAR,
293 // GTDO_ISHEX
294 1, LEXF_RET,
295 0x5800, GTDO_HEXOK,
296 // GTDO_HEXOK
297 2, LEXF_RET,
298 0x3B3B, GTDO_HEX | LEXF_ITCMD | GTDO_MAIN,
299 0x5800, LEX_SKIP,
300 // GTDO_ISPAR
301 1, LEXF_RET,
302 0x3039, GTDO_PAROK,
303 // GTDO_PAROK
304 2, LEXF_RET,
305 0x3B3B, GTDO_PAR | LEXF_ITCMD | GTDO_MAIN,
306 0x3039, LEX_SKIP,
307 // GTDO_LP
308 5, GTDO_PARTEXT | LEXF_ITSTATE | LEXF_POS,
309 0x0120, GTDO_SPACE | LEXF_ITSTATE | LEXF_POS,
310 0x2c2c, GTDO_COMMA | LEXF_ITCMD | LEXF_POS,
311 0x2929, GTDO_RP | LEXF_ITCMD | LEXF_POS | GTDO_MAIN,
312 0x2222, GTDO_DQ | LEXF_ITSTATE | LEXF_POS,
313 0x2727, GTDO_Q | LEXF_ITSTATE | LEXF_POS,
314 // GTDO_PARTEXT
315 1, LEX_OK, 0x2c290120, GTDO_LP | LEXF_STAY,
316 // GTDO_SPACE
317 1, GTDO_LP | LEXF_STAY, 0x0120, LEX_OK,
318 // GTDO_DQ
319 1, LEX_OK, 0x2222, LEX_OK | GTDO_LP,
320 // GTDO_Q
321 1, LEX_OK, 0x2727, LEX_OK | GTDO_LP,
322 // GTDO_AMPTRY
323 0, GTDO_TEXT | LEXF_ITSTATE,// | LEXF_POS,
324 };
325
326 int __cdecl main( int argc, char *argv[] )
327 {
328 lex ilex;
329 plexitem pil;
330 buf in;
331 str fn;
332 arr out;
333 uint i;
334 uint fout;
335
336 gentee_init();
337 printf("Start\n");
338 str_init( &fn );
339
340 str_copyzero( &fn, "gttbl.dat");
341 fout = os_fileopen( &fn, FOP_CREATE );
342 printf("Fout=%i %s\n", fout, str_ptr( &fn ));
343 os_filewrite( fout, ( pubyte )&tbl_gt, 97 * sizeof( uint ));
344 os_fileclose( ( pvoid )fout );
345
346 str_copyzero( &fn, "gtdotbl.dat");
347 fout = os_fileopen( &fn, FOP_CREATE );
348 printf("Fout=%i %s\n", fout, str_ptr( &fn ));
349 str_delete( &fn );
350 os_filewrite( fout, ( pubyte )&tbl_gtdo, 81 * sizeof( uint ));
351 os_fileclose( ( pvoid )fout );
352
353 arr_init( &out, sizeof( lexitem ));
354 buf_init( &in );
355 buf_copyzero( &in,
356 "</r/&xfa;
&xfa; #ap/dfield( 'qwer ()ty' , \"my , name\" , qqq)#asdf.fgwsw/# se# &xaa;"
357 "<2 qqq> </2><1 a2345=&xf0;> 223&</1><-qwe-rty->"
358 "<mygt /asd = \"qwerty sese'\" qq21 = 'dedxd' 'esese;' aqaq=325623/>"
359 "<a asdff /a> <mygtdd><a /><-ooops-><ad />< qq</>"
360 "xxx </r/nm <_aa aqaqa /_aaaa /_a/_aa><a22222/ >"
361 "<*abc ></abc><|*aaa = qqqq></aaa>ooops aaa</eee>\"\r\n</>");
362 // buf_copyzero( &in, "<mygt > <aaa asdff>qqqq</> </mygtdd>qq </> xxx </r/nm <_aa aqaqa /_aaaa /_aa> <a22222/ /> </ > ");
363 printf("lex_init\n");
364 lex_init( &ilex, (puint)&tbl_gtdo );
365 printf("gentee_lex\n");
366 gentee_lex( &in, &ilex, &out );
367 if (arr_count(&ilex.state))
368 printf("================= State=%x/%i \n", arr_getuint( &ilex.state,
369 arr_count(&ilex.state) - 1 ), arr_count(&ilex.state));
370 for ( i = 0; i < arr_count( &out ); i++ )
371 {
372 pil = ( plexitem )arr_ptr( &out, i );
373 printf("ID=%x pos=%i len=%i \n", pil->type, pil->pos, pil->len,
374 buf_ptr( &in ) + pil->pos );
375 }
376 // gentee_compile();
377 lex_delete( &ilex );
378 buf_delete( &in );
379 arr_delete( &out );
380 gentee_deinit();
381 printf("OK\n");
382 getch();
383 return 0;
384 }
385