1 /******************************************************************************
2 *
3 * Copyright (C) 2004-2008, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * Author: Alexander Krivonogov ( algen )
11 *
12 ******************************************************************************/
13
14 /*-----------------------------------------------------------------------------
15 * Id: xml L "XML"
16 *
17 * Summary: XML file processing. This library is used for XML file processing
18 and XML tree building. Neither a multibyte-character set nor a
19 document type description #b(#lgt[!DOCTYPE .....]) are handled in
20 the current version. For using this library, it is required to
21 specify the file xml.g (from lib\xml subfolder) with include
22 command. #srcg[
23 |include : $"...\gentee\lib\xml\xml.g"]
24 *
25 * List: *,xml_desc,
26 *#lng/opers#,xml_opfor,
27 *#lng/methods#,xml_addentity,xml_getroot,xml_procfile,xml_procstr,
28 *Methods of XML tree items,xmlitem_chtag,xmlitem_findtag,
29 xmlitem_getattrib,xmlitem_getchild,xmlitem_getchildtag,
30 xmlitem_getchildtext,xmlitem_getname,xmlitem_getnext,
31 xmlitem_getnexttag,xmlitem_getnexttext,xmlitem_getparent,
32 xmlitem_gettext,xmlitem_isemptytag,
33 xmlitem_ispitag,xmlitem_istag,xmlitem_istext
34 *
35 -----------------------------------------------------------------------------*/
36
37 define
38 {
39 NUMSYM = 256
40 }
41
42 type posb
43 {
44 byte state
45 byte afunc
46 byte retstate
47 }
48
49 type pos
50 {
51 int state
52 uint afunc
53 int retstate
54 uint r
55 }
56
57 operator pos = ( pos l, posb r)
58 {
59 l.state = r.state
60 l.afunc = r.afunc
61 l.retstate = r.retstate
62 return l
63 }
64
65 define {
66 TG_TEXT = 0x01 //Текст (в tgstart номер начального текста, в tgend номер конечного текста)
67 TG_TAG = 0x10 //Тэг (в tgid идентификатор имени, в tgstart номер начального атрибута, в tgend номер конечного атрибута)
68 TG_QUEST = 0x12 //<? ?>
69 TG_NOCHILD = 0x14 //< />
70
71 TX_TEXT = 0x01 //Просто текст
72 TX_SYMBOL = 0x02 //Символ, в txaddr_code код вставляемого символа
73 TX_ENTITY = 0x03 //Сущность, в txaddr_code код имени сущности в хэш таблице сущностей
74 }
75 //Элемент дерева разбора текст или тэг
76 type xmlitem {
77 uint tgtype //Тип элемента TG_*
78 uint tgid //Идентфикатор имени тэга в хэш таблице тэгов
79 uint tgstart //Номер начального атрибута/текста в таблице атрибутов/текстов
80 uint tgend //Номер конечного+1 атрибута/текста в таблице атрибутов/текстов
81 uint nparent //Номер тэга владельца
82 uint nnext //Номер следующего тэга
83 uint nchild //Номер первого потомка
84 uint xml
85 }
86
87 type xmltags <index = xmlitem>
88 {
89 uint parent
90 //uint cur
91 }
92
93 //Элемент массива атрибутов
94 type xattrib {
95 uint attid //Идентификатор имени атрибута
96 uint attstart //Номер начального текста(значение атрибута) в таблице текстов
97 uint attend //Номер конечного текста(значение атрибута) в таблице текстов
98 }
99
100 //Элемент массива текстов
101 type xtext {
102 uint txtype //Тип текста TX_*
103 uint txaddr_code //Адрес начала исходного текста/код символа/код имени сущности
104 uint txlen //Длина исходного теста
105 }
106
107 //Объект разбора xml текста
108 type xml
109 {
110 buf src //Исходный текст
111 arr tags of xmlitem //Массив/дерево тэгов
112 arr attribs of xattrib //Массив сущностей
113 arr texts of xtext //Массив текстов
114 hash hnames //Хэш таблица имён тэгов
115 arr names of str //Таблица строк для хэш таблицы hnames
116 hash hentities //Хэш таблица имён сущностей
117 uint err
118 uint encoding //1 - utf8
119 }
120
121 global
122 {
123 arr tp[1,256] of pos
124
125 uint X_ia
126 uint X_curtag, X_curtext, X_curattrib
127 uint X_ncurtag, X_nparenttag, X_ncurtext, X_ncurattrib
128 uint X_maxtag, X_maxattrib, X_maxtext, X_maxstack
129 arr X_stacktags of uint
130 uint X_nstack
131 uint X_curnameoff
132 uint X_x
133 buf X_tblsrc = '\<sp.tbl>'
134 str X_sname
135 uint X_n, X_tparenttag
136 }
137
138 include {
139 "xmlfuncs.g"
140 "xmluser.g"
141 }
142
143 method xml.init()
144 {
145 uint i,j
146 arr ar[0,$NUMSYM] of posb
147 ar->buf = X_tblsrc//.read( "sp.X_tblsrc" )
148 tp.expand( (*ar/$NUMSYM)*256 )
149 fornum i=0, *ar/$NUMSYM
150 {
151 fornum j=0, $NUMSYM
152 {
153 tp[i+1,j] = ar[i,j]
154 if ar[i,j].state && ar[i,j].state !=-1
155 {
156 tp[i+1,j].state <<= 12
157 tp[i+1,j].state += tp.ptr()
158 }
159 if ar[i,j].retstate && ar[i,j].retstate !=-1
160 {
161 tp[i+1,j].retstate <<= 12
162 tp[i+1,j].retstate += tp.ptr()
163 }
164 switch ar[i,j].afunc
165 {
166 case 1 : tp[i+1,j].afunc = &f_begent
167 case 2 : tp[i+1,j].afunc = &f_endent
168 case 3 : tp[i+1,j].afunc = &f_endentnum
169 case 4 : tp[i+1,j].afunc = &f_endenthex
170 case 5 : tp[i+1,j].afunc = &f_begatrval
171 case 6 : tp[i+1,j].afunc = &f_endatrval
172 case 7 : tp[i+1,j].afunc = &f_begquest
173 case 8 : tp[i+1,j].afunc = &f_endquest
174 case 9 : tp[i+1,j].afunc = &f_endtagname
175 case 10 : tp[i+1,j].afunc = &f_begatr
176 case 11 : tp[i+1,j].afunc = &f_endatr
177 case 12 : tp[i+1,j].afunc = &f_begtag
178 case 13 : tp[i+1,j].afunc = &f_endtag
179 case 14 : tp[i+1,j].afunc = &f_endtagend
180 case 15 : tp[i+1,j].afunc = &f_begendtag
181 case 16 : tp[i+1,j].afunc = &f_begendtagend
182 case 17 : tp[i+1,j].afunc = &f_begcdata
183 case 18 : tp[i+1,j].afunc = &f_endcdata
184 case 255 : tp[i+1,j].afunc = &f_error
185 }
186 }
187 }
188 }
189
190 method uint xml.process()
191 {
192 uint off
193 arr ars[512] of uint
194 uint state, retstate
195 uint afunc
196
197 state = tp.ptr() + (1 << 12)
198 //Инициализация
199 if X_x: return 0
200 X_x = &this
201 this.err = 0
202 this.encoding = 0
203 this.hnames.clear()
204 this.hentities.clear()
205 this.tags.clear()
206 this.texts.clear()
207 this.names.clear()
208 this.attribs.clear()
209
210 this.hnames.ignorecase()
211 this.hentities.ignorecase()
212 //this.names.reserve(100)
213 this.names.expand(1)
214 X_maxtag = max( *this.src/10, 100 )
215 this.tags.expand( X_maxtag )
216 X_maxtext = max( *this.src/20, 100 )
217 this.texts.expand(X_maxtext)
218 X_maxattrib = max( *this.src/40, 100 )
219 this.attribs.expand(X_maxattrib)
220
221 X_ncurattrib = 0
222 X_curattrib = &this.attribs[X_ncurattrib]
223
224 X_ncurtext = 0
225 X_curtext = &this.texts[X_ncurtext]
226
227 X_ncurtag = 0
228 //Корневой элемент
229 X_curtag = &this.tags[X_ncurtag]
230 X_curtag->xmlitem.tgtype = 0//$TG_TEXT
231 X_curtag->xmlitem.nchild = ++X_ncurtag
232
233 //Добавляем тэг заготовку
234 X_curtag = &this.tags[X_ncurtag]
235 X_curtag->xmlitem.nchild = 1
236 X_curtag->xmlitem.tgstart = X_ncurtext
237 X_nparenttag = 0
238
239 X_ia=this.src.ptr()
240 X_stacktags.clear()
241 X_maxstack = 100
242 X_stacktags.expand(X_maxstack)
243 X_nstack = 0
244
245 //Начать пустой текст
246 X_curtext->xtext.txaddr_code = X_ia
247 X_curtext->xtext.txtype = 0
248 uint arrs = ars.ptr()
249 fornum X_ia, this.src.ptr() + *this.src
250 {
251 if afunc = ((off = state + (X_ia->ubyte << 4)) + 4)->uint {
252 afunc->func()
253 }
254 if state = off->uint
255 {
256 if retstate = (off + 8)->uint
257 {
258 arrs->uint = retstate
259 arrs += 4
260 }
261 continue
262 }
263 state = (arrs -= 4)->uint
264 }
265 if !X_curtext->xtext.txtype &&
266 X_curtext->xtext.txaddr_code != X_ia
267 { //Заканчиваем текст-текст
268 X_curtext->xtext.txlen = X_ia - X_curtext->xtext.txaddr_code
269 X_curtext->xtext.txtype = $TX_TEXT
270 X_curtag->xmlitem.tgtype = $TG_TEXT
271 X_curtag->xmlitem.tgend = X_ncurtext + 1
272 }
273 this.tags.del(X_ncurtag + 1)
274 this.texts.del(X_ncurtext + 1)
275 this.attribs.del(X_ncurattrib + 1)
276 uint i
277 fornum i = 0, *this.tags
278 {
279 this.tags[i].xml = &this
280 }
281 X_x = 0
282 if !this.err
283 {
284 this.addentity("amp","&")
285 this.addentity("quot","\"")
286 this.addentity("apos","'")
287 this.addentity("gt",">")
288 this.addentity("lt","<")
289 }
290 uint qxml as .getroot()->xmlitem.chtag( "/xml" )
291 if &qxml
292 {
293 str res
294 qxml.getattrib( "encoding", res )
295 if res %== "utf-8"
296 {
297 .encoding = 1
298 }
299 }
300 return !this.err
301 }
302
303 /*-----------------------------------------------------------------------------
304 * Id: xml_procfile F2
305 *
306 * Summary: Process an XML file. Reads the XML file, the name of which is
307 specified as a parameter, and process it.
308 *
309 * Params: filename - Name of the file processed.
310 *
311 * Return: #lng/retf#
312 *
313 -----------------------------------------------------------------------------*/
314
315 method uint xml.procfile( str filename )
316 {
317 if this.src.read( filename )
318 {
319 return this.process()
320 }
321 return 0
322 }
323
324 /*-----------------------------------------------------------------------------
325 * Id: xml_procstr F2
326 *
327 * Summary: Processes a string contained the XML document.
328 *
329 * Params: src - XML data string.
330 *
331 * Return: #lng/retf#
332 *
333 -----------------------------------------------------------------------------*/
334
335 method uint xml.procstr( str src )
336 {
337 this.src = src->buf
338 return this.process()
339 }
340
341 /*-----------------------------------------------------------------------------
342 ** Id: xml_desc F1
343 *
344 * Summary: A brief description of XML library. Variables of either the #b(xml)
345 and the #b(xmlitem) type (an XML tree item) are used for processing
346 XML documents. An XML tree item can be of two types: a #b(text item)
347 and a #b(tag item). There are several types of tag items:
348 #ul[
349 |tag item that contains other items #b(#lgt[tag ...].....#lgt[/tag]);
350 |tag item that contains no other items #b(#lgt[tag .../]);
351 |tag item of processing instruction #b(#lgt[?tag ...?]).
352 ]
353 #p[A tag item may contain attributes.]
354
355 #p[The sequence of operations for processing an XML document:]
356 #ul[
357 process a document (build an XML tree) with the help of the #a(xml_procfile)
358 | method or the #a(xml_procstr) method;
359 |add entity definitions, using the #a(xml_addentity) method if necessary;
360 search for the required items in the XML tree using the following methods:
361 #a(xml_getroot), #a(xmlitem_chtag), #a(xmlitem_findtag),
362 | #a(xmlitem_getnext), etc.;
363 use the #b(foreach) statement in order to process similar elements if
364 | necessary;
365 gain access to tag attributes with the help of the #a(xmlitem_getattrib)
366 | method and get a text using the #a(xmlitem_gettext) method.
367 ]
368 *
369 * Title: XML description
370 *
371 * Define:
372 *
373 -----------------------------------------------------------------------------*/
374
375 //----------------------------------------------------------------------------
376