1 /******************************************************************************
2 *
3 * Copyright (C) 2004-2007, The Gentee Group. All rights reserved.
4 * This file is part of the Gentee open source project - http://www.gentee.com.
5 *
6 * THIS FILE IS PROVIDED UNDER THE TERMS OF THE GENTEE LICENSE ("AGREEMENT").
7 * ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE CONSTITUTES RECIPIENTS
8 * ACCEPTANCE OF THE AGREEMENT.
9 *
10 * Author: Alexander Krivonogov ( algen )
11 *
12 ******************************************************************************/
13
14 /*-----------------------------------------------------------------------------
15 * Id: stringuni L "String - Unicode"
16 *
17 * Summary: Unicode strings. It is possible to use variables of the #b(ustr)
18 type for working with Unicode strings. The #b(ustr) type is
19 inherited from the #b(buf) type. So, you can also use
20 #a(buffer, methods of the buf type).
21 *
22 * List: *Operators,ustr_oplen,ustr_opind,ustr_opsum,ustr_opeq,ustr_opeqa,
23 ustr_opadd,ustr_opeqeq,ustr_opless,ustr_opgr,ustr_str2ustr,
24 ustr_ustr2str,
25 *Methods,ustr_clear,ustr_copy,ustr_del,ustr_findch,ustr_fromutf8,
26 ustr_insert,ustr_lines,ustr_read,ustr_replace,ustr_reserve,
27 ustr_setlen,ustr_split,ustr_substr,ustr_toutf8,ustr_trim,ustr_write
28 *
29 -----------------------------------------------------------------------------*/
30
31 define {
32 CP_ACP = 0
33 CP_UTF8 = 65001
34 MB_PRECOMPOSED = 1
35 }
36
37 /*-----------------------------------------------------------------------------
38 * Id: tustr T ustr
39 *
40 * Summary: The Unicode string type.
41 *
42 -----------------------------------------------------------------------------*/
43
44 type ustr <index=ushort inherit = buf>
45 {
46
47 }
48
49 /*-----------------------------------------------------------------------------
50 * Id: ustr_opless F4
51 *
52 * Summary: Comparison operation.
53 *
54 * Title: ustr < ustr
55 *
56 * Return: Returns #b(1) if the first string is less than the second one.
57 Otherwise, it returns #b(0).
58 *
59 -----------------------------------------------------------------------------*/
60
61 operator uint <( ustr left, ustr right )
62 {
63 if CompareStringW( 0, 0, left.ptr(), *left, right.ptr(),
64 *right ) == 1 : return 1
65 return 0
66 }
67
68 /*-----------------------------------------------------------------------------
69 * Id: ustr_opless_1 FC
70 *
71 * Summary: Comparison operation.
72 *
73 * Title: ustr <= ustr
74 *
75 * Return: Returns #b(1) if the first string is less or equal the second one.
76 Otherwise, it returns #b(0).
77 *
78 * Define: operator uint <=( ustr left, ustr right )
79 *
80 -----------------------------------------------------------------------------*/
81
82 /*-----------------------------------------------------------------------------
83 * Id: ustr_opgr F4
84 *
85 * Summary: Comparison operation.
86 *
87 * Title: ustr > ustr
88 *
89 * Return: Returns #b(1) if the first string is greater than the second one.
90 Otherwise, it returns #b(0).
91 *
92 -----------------------------------------------------------------------------*/
93
94 operator uint >( ustr left, ustr right )
95 {
96 if CompareStringW( 0, 0, left.ptr(), *left, right.ptr(),
97 *right ) == 3 : return 1
98 return 0
99 }
100
101 /*-----------------------------------------------------------------------------
102 * Id: ustr_opgr_1 FC
103 *
104 * Summary: Comparison operation.
105 *
106 * Title: ustr >= ustr
107 *
108 * Return: Returns #b(1) if the first string is greater or equal the second one.
109 Otherwise, it returns #b(0).
110 *
111 * Define: operator uint >=( ustr left, ustr right )
112 *
113 -----------------------------------------------------------------------------*/
114
115 /*
116 operator uint %==( ustr left right )
117 {
118 if *left != *right : return 0
119 return !ustrcmpign( left.ptr(), right.ptr())
120 }
121
122 operator uint %<( ustr left right )
123 {
124 if ustrcmpign( left.ptr(), right.ptr()) < 0 : return 1
125 return 0
126 }
127
128 operator uint %>( ustr left right )
129 {
130 if ustrcmpign( left.ptr(), right.ptr() ) > 0 : return 1
131 return 0
132 }*/
133
134 /*-----------------------------------------------------------------------------
135 * Id: ustr_opind F4
136 *
137 * Summary: Getting ushort character #b([i]) of the Unicode string.
138 *
139 * Title: ustr[ i ]
140 *
141 * Return: The #b([i]) ushort character of the Unicode string.
142 *
143 -----------------------------------------------------------------------------*/
144
145 method uint ustr.index( uint id )
146 {
147 return this.ptr() + ( id << 1 )
148 }
149
150 /*-----------------------------------------------------------------------------
151 * Id: ustr_oplen F4
152 *
153 * Summary: Get the length of a unicode string.
154 *
155 * Return: The length of the unicode string.
156 *
157 * Define: operator uint *( ustr left )
158 *
159 -----------------------------------------------------------------------------*/
160
161 operator uint *( ustr src )
162 {
163 return ( src.use >> 1 ) - 1
164 }
165
166 /*-----------------------------------------------------------------------------
167 * Id: ustr_reserve F2
168 *
169 * Summary: Memory reservation. The method increases the size of the memory
170 allocated for the unicode string.
171 *
172 * Params: len - The summary requested length of th eunicode string. If it is /
173 less than the current size, nothing happens. If the size is /
174 increased, the current string data is saved.
175 *
176 * Return: #lng/retobj#
177 *
178 -----------------------------------------------------------------------------*/
179
180 method ustr.reserve( uint len )
181 {
182 this->buf.reserve( len << 1 )
183 }
184
185 /*-----------------------------------------------------------------------------
186 * Id: ustr_opeq F4
187 *
188 * Summary: Assign types to unicode string. Copy a string to the unicode string
189 #b(ustr = str).
190 *
191 * Title: ustr = type
192 *
193 * Return: The result unicode string.
194 *
195 -----------------------------------------------------------------------------*/
196
197 operator ustr =( ustr left, str right )
198 {
199 uint len = ( MultiByteToWideChar( $CP_ACP, $MB_PRECOMPOSED, right.ptr(),
200 *right, left.ptr(), 0 ) + 1 )
201 left.reserve( len )
202 MultiByteToWideChar( $CP_ACP, $MB_PRECOMPOSED, right.ptr(), *right,
203 left.ptr(), len )
204 len = len << 1
205 // (&((left->buf)[len-2]))->ushort = 0
206 ( left.ptr() + len - 2 )->ushort = 0
207 left.use = len
208 return left
209 }
210
211 /*-----------------------------------------------------------------------------
212 * Id: ustr_opeqa F4
213 *
214 * Summary: Copy a unicode string to a string.
215 *
216 * Title: str = ustr
217 *
218 * Return: The result string.
219 *
220 -----------------------------------------------------------------------------*/
221
222 operator str =( str left, ustr right )
223 {
224
225 uint len = WideCharToMultiByte( $CP_ACP, 0, right.ptr(), *right,
226 left.ptr(), 0, 0, 0 )
227 left.reserve( len + 1 )
228 WideCharToMultiByte( $CP_ACP, 0, right.ptr(), *right,
229 left.ptr(), len + 1, 0, 0 )
230 left.setlen( len /*- 1*/ )
231 return left
232 }
233
234 /*-----------------------------------------------------------------------------
235 * Id: ustr_setlen F2
236 *
237 * Summary: Setting a new size of the unicode string. The method does not
238 reserve space.
239 You cannot specify the size of a string greater than the reserved
240 space you have. Mostly, this function is used for specifying the
241 size of a string after external functions write data to it.
242 *
243 * Params: len - New string size.
244 *
245 * Return: #lng/retobj#
246 *
247 -----------------------------------------------------------------------------*/
248
249 method ustr ustr.setlen( uint len )
250 {
251 len = ( ( len + 1 )<< 1 )
252 (&((this->buf)[len-2]))->ushort = 0
253 this.use = len
254 return this
255 }
256
257 /*-----------------------------------------------------------------------------
258 * Id: ustr_setlen_1 FB
259 *
260 * Summary: Recalculate the size of a unicode string to the zero character. The
261 function can be used to determine the size of a string after
262 other functions write data into it.
263 *
264 -----------------------------------------------------------------------------*/
265
266 method ustr ustr.setlenptr
267 {
268 this.setlen( max( int( ( this.size >> 1 ) - 1 ), 0 ))
269 return this.setlen( this.findsh( 0, 0 ))
270 }
271
272 /*-----------------------------------------------------------------------------
273 * Id: ustr_opeq_1 FC
274 *
275 * Summary: Copy a unicode string to another unicode string.
276 *
277 * Title: ustr = ustr
278 *
279 -----------------------------------------------------------------------------*/
280
281 operator ustr =( ustr left, ustr right )
282 {
283 left->buf = right->buf
284 return left
285 }
286
287 /*-----------------------------------------------------------------------------
288 * Id: ustr_str2ustr F4
289 *
290 * Summary: Converting a string to a unicode string #b('ustr( str )').
291 *
292 * Title: ustr( str )
293 *
294 * Return: The result unicode string.
295 *
296 -----------------------------------------------------------------------------*/
297
298 method ustr str.ustr<result>()
299 {
300 result = this
301 }
302
303 /*-----------------------------------------------------------------------------
304 * Id: ustr_ustr2str F4
305 *
306 * Summary: Converting a unicode string to a string #b('str( ustr )').
307 *
308 * Title: str( ustr )
309 *
310 * Return: The result string.
311 *
312 -----------------------------------------------------------------------------*/
313
314 method str ustr.str<result>()
315 {
316 result = this
317 }
318
319 /*-----------------------------------------------------------------------------
320 * Id: ustr_opadd F4
321 *
322 * Summary: Appending types to the unicode string. Append #b(ustr) to #b(ustr)
323 => #b( ustr += ustr ).
324 *
325 * Title: ustr += type
326 *
327 * Return: The result unicode string.
328 *
329 -----------------------------------------------------------------------------*/
330
331 operator ustr +=( ustr left, ustr right )
332 {
333 left.use -= 2
334 left->buf += right->buf
335 return left
336 }
337
338 /*-----------------------------------------------------------------------------
339 * Id: ustr_opadd_1 FC
340 *
341 * Summary: Append #b(str) to #b(ustr) => #b( ustr += str ).
342 *
343 * Title: ustr += str
344 *
345 -----------------------------------------------------------------------------*/
346
347 operator ustr +=( ustr left, str right )
348 {
349 return left += right.ustr()
350 }
351
352 /*-----------------------------------------------------------------------------
353 * Id: ustr_opsum F4
354 *
355 * Summary: Add two strings. Putting two unicode strings together and creating
356 a resulting unicode string.
357 *
358 * Return: The new result unicode string.
359 *
360 -----------------------------------------------------------------------------*/
361
362 operator ustr +<result> ( ustr left, ustr right )
363 {
364 ( result = left ) += right
365 }
366
367 /*-----------------------------------------------------------------------------
368 * Id: ustr_opsum_1 FC
369 *
370 * Summary: Add a unicode string and a string.
371 *
372 * Return: The new result unicode string.
373 *
374 -----------------------------------------------------------------------------*/
375
376 operator ustr +<result>( ustr left, str right )
377 {
378 ( result = left ) += (right.ustr())
379 }
380
381 /*-----------------------------------------------------------------------------
382 * Id: ustr_write F2
383 *
384 * Summary: Writing a unicode string to a file.
385 *
386 * Params: filename - The name of the file for writing. If the file already /
387 exists, it will be overwritten.
388 *
389 * Return: The size of the written data.
390 *
391 -----------------------------------------------------------------------------*/
392
393 method uint ustr.write( str filename )
394 {
395 uint wr
396
397 this->buf.use -= 2
398 wr = this->buf.write( filename )
399 this->buf.use += 2
400 return wr
401 }
402
403 /*-----------------------------------------------------------------------------
404 * Id: ustr_read F2
405 *
406 * Summary: Read a unicode string from a file.
407 *
408 * Params: filename - Filename.
409 *
410 * Return: The size of the read data.
411 *
412 -----------------------------------------------------------------------------*/
413
414 method uint ustr.read( str filename )
415 {
416 uint wr
417
418 wr = this->buf.read( filename )
419 this->buf.expand(2)
420 (&((this->buf)[this.use]))->ushort = 0
421 this.use += 2
422 return wr
423 }
424
425 /*-----------------------------------------------------------------------------
426 * Id: ustr_toutf8 F2
427 *
428 * Summary: Convert a unicode string to UTF-8 string.
429 *
430 * Params: dest - Destination string.
431 *
432 * Return: The dest parameter.
433 *
434 -----------------------------------------------------------------------------*/
435
436 method str ustr.toutf8( str dest )
437 {
438 uint len = WideCharToMultiByte( $CP_UTF8, 0, this.ptr(), -1, dest.ptr(),
439 0, 0, 0 )
440 dest.reserve( len )
441 WideCharToMultiByte( $CP_UTF8, 0, this.ptr(), -1, dest.ptr(), len, 0, 0 )
442 dest.setlen( len - 1 )
443 return dest
444 }
445
446 /*-----------------------------------------------------------------------------
447 * Id: ustr_fromutf8 F2
448 *
449 * Summary: Convert a UTF-8 string to a unicode string.
450 *
451 * Params: src - Source UTF-8 string.
452 *
453 * Return: #lng/retobj#.
454 *
455 -----------------------------------------------------------------------------*/
456
457 method ustr ustr.fromutf8( str src )
458 {
459 uint len = ( MultiByteToWideChar( $CP_UTF8, 0, src.ptr(), *src,
460 this.ptr(), 0 ) + 1 )
461 this.reserve( len )
462 len = len << 1
463 MultiByteToWideChar( $CP_UTF8, 0, src.ptr(), *src, this.ptr(), len )
464 (&((this->buf)[len-2]))->ushort = 0
465 this.use = len
466 return this
467 }
468
469 /*func ustr fromutf8<result>( str src )
470 {
471 result.fromutf8( src )
472 }*/
473
474 /*-----------------------------------------------------------------------------
475 * Id: ustr_substr F2
476 *
477 * Summary: Getting a unicode substring.
478 *
479 * Params: src - Initial unicode string.
480 start - Substring offset.
481 len - Substring size.
482 *
483 * Return: #lng/retobj#
484 *
485 -----------------------------------------------------------------------------*/
486
487 method ustr ustr.substr( ustr src, uint start, uint len )
488 {
489 uint blen = len << 1
490 this.reserve( len )
491 this->buf.copy( src.ptr() + ( start << 1 ), blen )
492 this.setlen( len )
493 return this
494 }
495
496 method ustr ustr.init()
497 {
498 this->buf.reserve( 2 )
499 this.setlen( 0 )
500 return this
501 }
502
503 /*-----------------------------------------------------------------------------
504 * Id: ustr_findch F2
505 *
506 * Summary: Find the character in the unicode string.
507 *
508 * Params: off - The offset to start searching from.
509 symbol - Search character.
510 *
511 * Return: The offset of the character if it is found. If the character is not
512 found, the length of the string is returned.
513 *
514 -----------------------------------------------------------------------------*/
515
516 method uint ustr.findch( uint off, ushort symbol )
517 {
518 /*uint i
519 fornum i = off, *this
520 {
521 if this[i] == symbol
522 {
523 break
524 }
525 }
526 return i*/
527 return .findsh( off, symbol )
528 }
529
530 /*-----------------------------------------------------------------------------
531 * Id: ustr_findch_1 FA
532 *
533 * Summary: Find the character in the unicode string from the beginning of
534 the string.
535 *
536 * Params: symbol - Search character.
537 *
538 -----------------------------------------------------------------------------*/
539
540 method uint ustr.findch( ushort symbol )
541 {
542 //return .findch( 0, symbol )
543 return .findsh( 0, symbol )
544 }
545
546 /*-----------------------------------------------------------------------------
547 * Id: ustr_del F2
548 *
549 * Summary: Delete a substring.
550 *
551 * Params: off - The offset of the substring being deleted.
552 len - The size of the substring being deleted.
553 *
554 * Return: #lng/retobj#
555 *
556 -----------------------------------------------------------------------------*/
557
558 method ustr ustr.del( uint off, uint len )
559 {
560 uint slen = *this
561
562 if off > slen : return this
563 if off + len > slen : len = slen - off
564 this->buf.del( off<<1, len )
565 this.setlen( slen - len )
566
567 return this
568 }
569
570 /*-----------------------------------------------------------------------------
571 * Id: ustr_trim F2
572 *
573 * Summary: Trimming a unicode string.
574 *
575 * Title: ustr.trim...
576 *
577 * Params: symbol - The character being deleted.
578 flag - Flags. $$[trimflags]
579 *
580 * Return: #lng/retobj#
581 *
582 -----------------------------------------------------------------------------*/
583
584 method ustr ustr.trim( uint symbol, uint flag )
585 {
586 uint rsymbol = symbol
587 uint i, found
588
589 if flag & $TRIM_PAIR
590 {
591 switch symbol
592 {
593 case '(' : rsymbol = ')'
594 case '{' : rsymbol = '}'
595 case '[' : rsymbol = ']'
596 case '<' : rsymbol = '>'
597 }
598 }
599 if flag & $TRIM_SYS
600 {
601 if flag & $TRIM_RIGHT
602 {
603 i = *this-1
604 while this[i] <= 0x0020
605 {
606 this.setlen( i )
607 if flag & $TRIM_ONE : break
608 i--
609 }
610 }
611 if flag & $TRIM_LEFT
612 {
613 fornum i = 0, *this
614 {
615 if this[i] <= 0x0020
616 {
617 found++
618 if flag & $TRIM_ONE : break
619 }
620 else : break
621 }
622 if found : this.del( 0, found )
623 }
624 }
625 else
626 {
627 if flag & $TRIM_RIGHT
628 {
629 i = *this-1
630 while this[i] == symbol
631 {
632 this.setlen( i )
633 // print( "setlen \(*this) \(this.str()) \(this[8]) \(this[9])\n" )
634 if flag & $TRIM_ONE : break
635 i--
636 }
637 }
638 if flag & $TRIM_LEFT
639 {
640 fornum i = 0, *this
641 {
642 if this[i] == symbol
643 {
644 found++
645 if flag & $TRIM_ONE : break
646 }
647 else : break
648 }
649 if found : this.del( 0, found )
650 }
651 }
652
653 return this;
654 }
655
656 /*-----------------------------------------------------------------------------
657 * Id: ustr_trim_1 FB
658 *
659 * Summary: Deleting spaces on the right.
660 *
661 -----------------------------------------------------------------------------*/
662
663 method ustr ustr.trimrspace()
664 {
665 return this.trim( ' ', $TRIM_RIGHT )
666 }
667
668 /*-----------------------------------------------------------------------------
669 * Id: ustr_trim_2 FB
670 *
671 * Summary: Deleting spaces on the both sides.
672 *
673 -----------------------------------------------------------------------------*/
674
675 method ustr ustr.trimspace()
676 {
677 return this.trim( ' ', $TRIM_RIGHT | $TRIM_LEFT )
678 }
679
680 /*-----------------------------------------------------------------------------
681 * Id: ustr_copy F2
682 *
683 * Summary: Copying. The method copies the specified size of the data into
684 a unicode string.
685 *
686 * Params: ptr - The pointer to the data being copied. If data does not end in /
687 a zero, it will be added automatically.
688 size - The size of the data being copied.
689 *
690 * Return: #lng/retobj#
691 *
692 -----------------------------------------------------------------------------*/
693
694 method ustr ustr.copy( uint ptr, uint size )
695 {
696 this->buf.copy( ptr, size << 1 )
697 .setlen( size )
698 return this
699 }
700
701 /*-----------------------------------------------------------------------------
702 * Id: ustr_copy_1 FB
703 *
704 * Summary: The method copies data into a unicode string.
705 *
706 * Params: ptr - The pointer to the data being copied. All data to the zero /
707 ushort will be copied.
708 *
709 * Return: #lng/retobj#
710 *
711 -----------------------------------------------------------------------------*/
712
713 method ustr ustr.copy( uint ptr )
714 {
715 .copy( ptr, mlensh( ptr ))
716 return this
717 }
718
719 /*-----------------------------------------------------------------------------
720 * Id: ustr_replace F2
721 *
722 * Summary: Replacing in a unicode string. The method replaces data in
723 a unicode string.
724 *
725 * Params: offset - The offset of the data being replaced.
726 size - The size of the data being replaced.
727 value - The unicode string being inserted.
728 *
729 * Return: #lng/retobj#
730 *
731 -----------------------------------------------------------------------------*/
732
733 method ustr ustr.replace( uint offset, uint size, ustr value )
734 {
735 if offset >= *this : this += value
736 else
737 {
738 value->buf.use -= 2
739 this->buf.replace( offset << 1, size << 1, value->buf )
740 value->buf.use += 2
741 }
742 return this
743 }
744
745 /*-----------------------------------------------------------------------------
746 * Id: ustr_insert F2
747 *
748 * Summary: Insertion. The method inserts one unicode string into another.
749 *
750 * Params: offset - The offset where string will be inserted.
751 value - The unicode string being inserted.
752 *
753 * Return: #lng/retobj#
754 *
755 -----------------------------------------------------------------------------*/
756
757 method ustr ustr.insert( uint offset, ustr value )
758 {
759 return this.replace( offset, 0, value )
760 }
761
762 /*-----------------------------------------------------------------------------
763 * Id: ustr_opeqeq F4
764 *
765 * Summary: Comparison operation.
766 *
767 * Return: Returns #b(1) if the strings are equal. Otherwise, it returns #b(0).
768 *
769 -----------------------------------------------------------------------------*/
770
771 operator uint ==( str left, ustr right )
772 {
773 return left.ustr() == right
774 }
775
776 /*-----------------------------------------------------------------------------
777 * Id: ustr_opeqeq_1 FC
778 *
779 * Summary: Comparison operation.
780 *
781 * Return: Returns #b(1) if the strings are equal. Otherwise, it returns #b(0).
782 *
783 -----------------------------------------------------------------------------*/
784
785 operator uint ==( ustr left, str right )
786 {
787 return left == right.ustr()
788 }
789
790 /*-----------------------------------------------------------------------------
791 ** Id: ustr_clear F3
792 *
793 * Summary: Clearing a unicode string.
794 *
795 * Return: #lng/retobj#
796 *
797 -----------------------------------------------------------------------------*/
798
799 method ustr ustr.clear
800 {
801 return this.setlen( 0 )
802 }
803
804 /* gentee !!! */
805 method ustr ustr.appendch( uint ch )
806 {
807 uint len = this->buf.use
808 this->buf.expand(2)
809 this->buf.use += 2
810 (&((this->buf)[len-2]))->ushort = ch
811 (&((this->buf)[len]))->ushort = 0
812 return this
813 }