ICU 55.1
55.1
source
common
unicode
utf_old.h
Go to the documentation of this file.
1
/*
2
*******************************************************************************
3
*
4
* Copyright (C) 2002-2012, International Business Machines
5
* Corporation and others. All Rights Reserved.
6
*
7
*******************************************************************************
8
* file name: utf_old.h
9
* encoding: US-ASCII
10
* tab size: 8 (not used)
11
* indentation:4
12
*
13
* created on: 2002sep21
14
* created by: Markus W. Scherer
15
*/
16
143
#ifndef __UTF_OLD_H__
144
#define __UTF_OLD_H__
145
146
#ifndef U_HIDE_DEPRECATED_API
147
148
#include "
unicode/utf.h
"
149
#include "
unicode/utf8.h
"
150
#include "
unicode/utf16.h
"
151
152
/* Formerly utf.h, part 1 --------------------------------------------------- */
153
154
#ifdef U_USE_UTF_DEPRECATES
155
162
typedef
int32_t UTextOffset;
163
#endif
164
166
#define UTF_SIZE 16
167
174
#define UTF_SAFE
175
176
#undef UTF_UNSAFE
177
178
#undef UTF_STRICT
179
194
#define UTF8_ERROR_VALUE_1 0x15
195
201
#define UTF8_ERROR_VALUE_2 0x9f
202
209
#define UTF_ERROR_VALUE 0xffff
210
217
#define UTF_IS_ERROR(c) \
218
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
219
225
#define UTF_IS_VALID(c) \
226
(UTF_IS_UNICODE_CHAR(c) && \
227
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
228
233
#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
234
240
#define UTF_IS_UNICODE_NONCHAR(c) \
241
((c)>=0xfdd0 && \
242
((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
243
(uint32_t)(c)<=0x10ffff)
244
260
#define UTF_IS_UNICODE_CHAR(c) \
261
((uint32_t)(c)<0xd800 || \
262
((uint32_t)(c)>0xdfff && \
263
(uint32_t)(c)<=0x10ffff && \
264
!UTF_IS_UNICODE_NONCHAR(c)))
265
266
/* Formerly utf8.h ---------------------------------------------------------- */
267
272
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
273
278
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
279
281
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
282
283
#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
284
285
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
286
288
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
289
303
#if 1
304
# define UTF8_CHAR_LENGTH(c) \
305
((uint32_t)(c)<=0x7f ? 1 : \
306
((uint32_t)(c)<=0x7ff ? 2 : \
307
((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
308
) \
309
)
310
#else
311
# define UTF8_CHAR_LENGTH(c) \
312
((uint32_t)(c)<=0x7f ? 1 : \
313
((uint32_t)(c)<=0x7ff ? 2 : \
314
((uint32_t)(c)<=0xffff ? 3 : \
315
((uint32_t)(c)<=0x10ffff ? 4 : \
316
((uint32_t)(c)<=0x3ffffff ? 5 : \
317
((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
318
) \
319
) \
320
) \
321
) \
322
)
323
#endif
324
326
#define UTF8_MAX_CHAR_LENGTH 4
327
329
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
330
332
#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
333
int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
334
UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
335
UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
336
}
337
339
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
340
int32_t _utf8_get_char_safe_index=(int32_t)(i); \
341
UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
342
UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
343
}
344
346
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
347
(c)=(s)[(i)++]; \
348
if((uint8_t)((c)-0xc0)<0x35) { \
349
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
350
UTF8_MASK_LEAD_BYTE(c, __count); \
351
switch(__count) { \
352
/* each following branch falls through to the next one */
\
353
case 3: \
354
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
355
case 2: \
356
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
357
case 1: \
358
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
359
/* no other branches to optimize switch() */
\
360
break; \
361
} \
362
} \
363
}
364
366
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
367
if((uint32_t)(c)<=0x7f) { \
368
(s)[(i)++]=(uint8_t)(c); \
369
} else { \
370
if((uint32_t)(c)<=0x7ff) { \
371
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
372
} else { \
373
if((uint32_t)(c)<=0xffff) { \
374
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
375
} else { \
376
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
377
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
378
} \
379
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
380
} \
381
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
382
} \
383
}
384
386
#define UTF8_FWD_1_UNSAFE(s, i) { \
387
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
388
}
389
391
#define UTF8_FWD_N_UNSAFE(s, i, n) { \
392
int32_t __N=(n); \
393
while(__N>0) { \
394
UTF8_FWD_1_UNSAFE(s, i); \
395
--__N; \
396
} \
397
}
398
400
#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
401
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
402
}
403
405
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
406
(c)=(s)[(i)++]; \
407
if((c)>=0x80) { \
408
if(UTF8_IS_LEAD(c)) { \
409
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
410
} else { \
411
(c)=UTF8_ERROR_VALUE_1; \
412
} \
413
} \
414
}
415
417
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
418
if((uint32_t)(c)<=0x7f) { \
419
(s)[(i)++]=(uint8_t)(c); \
420
} else { \
421
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
422
} \
423
}
424
426
#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
427
429
#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
430
432
#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
433
435
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
436
(c)=(s)[--(i)]; \
437
if(UTF8_IS_TRAIL(c)) { \
438
uint8_t __b, __count=1, __shift=6; \
439
\
440
/* c is a trail byte */
\
441
(c)&=0x3f; \
442
for(;;) { \
443
__b=(s)[--(i)]; \
444
if(__b>=0xc0) { \
445
UTF8_MASK_LEAD_BYTE(__b, __count); \
446
(c)|=(UChar32)__b<<__shift; \
447
break; \
448
} else { \
449
(c)|=(UChar32)(__b&0x3f)<<__shift; \
450
++__count; \
451
__shift+=6; \
452
} \
453
} \
454
} \
455
}
456
458
#define UTF8_BACK_1_UNSAFE(s, i) { \
459
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
460
}
461
463
#define UTF8_BACK_N_UNSAFE(s, i, n) { \
464
int32_t __N=(n); \
465
while(__N>0) { \
466
UTF8_BACK_1_UNSAFE(s, i); \
467
--__N; \
468
} \
469
}
470
472
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
473
UTF8_BACK_1_UNSAFE(s, i); \
474
UTF8_FWD_1_UNSAFE(s, i); \
475
}
476
478
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
479
(c)=(s)[--(i)]; \
480
if((c)>=0x80) { \
481
if((c)<=0xbf) { \
482
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
483
} else { \
484
(c)=UTF8_ERROR_VALUE_1; \
485
} \
486
} \
487
}
488
490
#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
491
493
#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
494
496
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
497
498
/* Formerly utf16.h --------------------------------------------------------- */
499
501
#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
502
504
#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
505
507
#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
508
510
#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
511
513
#define UTF16_GET_PAIR_VALUE(first, second) \
514
(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
515
517
#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
518
520
#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
521
523
#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
524
526
#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
527
529
#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
530
532
#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
533
535
#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
536
538
#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
539
541
#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
542
544
#define UTF16_MAX_CHAR_LENGTH 2
545
547
#define UTF16_ARRAY_SIZE(size) (size)
548
560
#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
561
(c)=(s)[i]; \
562
if(UTF_IS_SURROGATE(c)) { \
563
if(UTF_IS_SURROGATE_FIRST(c)) { \
564
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
565
} else { \
566
(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
567
} \
568
} \
569
}
570
572
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
573
(c)=(s)[i]; \
574
if(UTF_IS_SURROGATE(c)) { \
575
uint16_t __c2; \
576
if(UTF_IS_SURROGATE_FIRST(c)) { \
577
if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
578
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
579
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
580
} else if(strict) {\
581
/* unmatched first surrogate */
\
582
(c)=UTF_ERROR_VALUE; \
583
} \
584
} else { \
585
if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
586
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
587
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
588
} else if(strict) {\
589
/* unmatched second surrogate */
\
590
(c)=UTF_ERROR_VALUE; \
591
} \
592
} \
593
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
594
(c)=UTF_ERROR_VALUE; \
595
} \
596
}
597
599
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
600
(c)=(s)[(i)++]; \
601
if(UTF_IS_FIRST_SURROGATE(c)) { \
602
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
603
} \
604
}
605
607
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
608
if((uint32_t)(c)<=0xffff) { \
609
(s)[(i)++]=(uint16_t)(c); \
610
} else { \
611
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
612
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
613
} \
614
}
615
617
#define UTF16_FWD_1_UNSAFE(s, i) { \
618
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
619
++(i); \
620
} \
621
}
622
624
#define UTF16_FWD_N_UNSAFE(s, i, n) { \
625
int32_t __N=(n); \
626
while(__N>0) { \
627
UTF16_FWD_1_UNSAFE(s, i); \
628
--__N; \
629
} \
630
}
631
633
#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
634
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
635
--(i); \
636
} \
637
}
638
640
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
641
(c)=(s)[(i)++]; \
642
if(UTF_IS_FIRST_SURROGATE(c)) { \
643
uint16_t __c2; \
644
if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
645
++(i); \
646
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
647
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
648
} else if(strict) {\
649
/* unmatched first surrogate */
\
650
(c)=UTF_ERROR_VALUE; \
651
} \
652
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
653
/* unmatched second surrogate or other non-character */
\
654
(c)=UTF_ERROR_VALUE; \
655
} \
656
}
657
659
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
660
if((uint32_t)(c)<=0xffff) { \
661
(s)[(i)++]=(uint16_t)(c); \
662
} else if((uint32_t)(c)<=0x10ffff) { \
663
if((i)+1<(length)) { \
664
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
665
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
666
} else
/* not enough space */
{ \
667
(s)[(i)++]=UTF_ERROR_VALUE; \
668
} \
669
} else
/* c>0x10ffff, write error value */
{ \
670
(s)[(i)++]=UTF_ERROR_VALUE; \
671
} \
672
}
673
675
#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
676
678
#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
679
681
#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
682
684
#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
685
(c)=(s)[--(i)]; \
686
if(UTF_IS_SECOND_SURROGATE(c)) { \
687
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
688
} \
689
}
690
692
#define UTF16_BACK_1_UNSAFE(s, i) { \
693
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
694
--(i); \
695
} \
696
}
697
699
#define UTF16_BACK_N_UNSAFE(s, i, n) { \
700
int32_t __N=(n); \
701
while(__N>0) { \
702
UTF16_BACK_1_UNSAFE(s, i); \
703
--__N; \
704
} \
705
}
706
708
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
709
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
710
++(i); \
711
} \
712
}
713
715
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
716
(c)=(s)[--(i)]; \
717
if(UTF_IS_SECOND_SURROGATE(c)) { \
718
uint16_t __c2; \
719
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
720
--(i); \
721
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
722
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
723
} else if(strict) {\
724
/* unmatched second surrogate */
\
725
(c)=UTF_ERROR_VALUE; \
726
} \
727
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
728
/* unmatched first surrogate or other non-character */
\
729
(c)=UTF_ERROR_VALUE; \
730
} \
731
}
732
734
#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
735
737
#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
738
740
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
741
742
/* Formerly utf32.h --------------------------------------------------------- */
743
744
/*
745
* Old documentation:
746
*
747
* This file defines macros to deal with UTF-32 code units and code points.
748
* Signatures and semantics are the same as for the similarly named macros
749
* in utf16.h.
750
* utf32.h is included by utf.h after unicode/umachine.h</p>
751
* and some common definitions.
752
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
753
* Compound statements (curly braces {}) must be used for if-else-while...
754
* bodies and all macro statements should be terminated with semicolon.</p>
755
*/
756
757
/* internal definitions ----------------------------------------------------- */
758
760
#define UTF32_IS_SAFE(c, strict) \
761
(!(strict) ? \
762
(uint32_t)(c)<=0x10ffff : \
763
UTF_IS_UNICODE_CHAR(c))
764
765
/*
766
* For the semantics of all of these macros, see utf16.h.
767
* The UTF-32 versions are trivial because any code point is
768
* encoded using exactly one code unit.
769
*/
770
771
/* single-code point definitions -------------------------------------------- */
772
773
/* classes of code unit values */
774
776
#define UTF32_IS_SINGLE(uchar) 1
777
778
#define UTF32_IS_LEAD(uchar) 0
779
780
#define UTF32_IS_TRAIL(uchar) 0
781
782
/* number of code units per code point */
783
785
#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
786
787
#define UTF32_CHAR_LENGTH(c) 1
788
789
#define UTF32_MAX_CHAR_LENGTH 1
790
791
/* average number of code units compared to UTF-16 */
792
794
#define UTF32_ARRAY_SIZE(size) (size)
795
797
#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
798
(c)=(s)[i]; \
799
}
800
802
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
803
(c)=(s)[i]; \
804
if(!UTF32_IS_SAFE(c, strict)) { \
805
(c)=UTF_ERROR_VALUE; \
806
} \
807
}
808
809
/* definitions with forward iteration --------------------------------------- */
810
812
#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
813
(c)=(s)[(i)++]; \
814
}
815
817
#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
818
(s)[(i)++]=(c); \
819
}
820
822
#define UTF32_FWD_1_UNSAFE(s, i) { \
823
++(i); \
824
}
825
827
#define UTF32_FWD_N_UNSAFE(s, i, n) { \
828
(i)+=(n); \
829
}
830
832
#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
833
}
834
836
#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
837
(c)=(s)[(i)++]; \
838
if(!UTF32_IS_SAFE(c, strict)) { \
839
(c)=UTF_ERROR_VALUE; \
840
} \
841
}
842
844
#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
845
if((uint32_t)(c)<=0x10ffff) { \
846
(s)[(i)++]=(c); \
847
} else
/* c>0x10ffff, write 0xfffd */
{ \
848
(s)[(i)++]=0xfffd; \
849
} \
850
}
851
853
#define UTF32_FWD_1_SAFE(s, i, length) { \
854
++(i); \
855
}
856
858
#define UTF32_FWD_N_SAFE(s, i, length, n) { \
859
if(((i)+=(n))>(length)) { \
860
(i)=(length); \
861
} \
862
}
863
865
#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
866
}
867
868
/* definitions with backward iteration -------------------------------------- */
869
871
#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
872
(c)=(s)[--(i)]; \
873
}
874
876
#define UTF32_BACK_1_UNSAFE(s, i) { \
877
--(i); \
878
}
879
881
#define UTF32_BACK_N_UNSAFE(s, i, n) { \
882
(i)-=(n); \
883
}
884
886
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
887
}
888
890
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
891
(c)=(s)[--(i)]; \
892
if(!UTF32_IS_SAFE(c, strict)) { \
893
(c)=UTF_ERROR_VALUE; \
894
} \
895
}
896
898
#define UTF32_BACK_1_SAFE(s, start, i) { \
899
--(i); \
900
}
901
903
#define UTF32_BACK_N_SAFE(s, start, i, n) { \
904
(i)-=(n); \
905
if((i)<(start)) { \
906
(i)=(start); \
907
} \
908
}
909
911
#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
912
}
913
914
/* Formerly utf.h, part 2 --------------------------------------------------- */
915
921
#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
922
924
#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
925
927
#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
928
929
931
#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
932
934
#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
935
936
938
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
939
941
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
942
943
945
#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
946
948
#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
949
950
952
#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
953
955
#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
956
957
959
#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
960
962
#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
963
964
966
#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
967
969
#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
970
971
973
#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
974
976
#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
977
978
980
#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
981
983
#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
984
985
987
#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
988
990
#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
991
992
/* Define default macros (UTF-16 "safe") ------------------------------------ */
993
999
#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
1000
1006
#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
1007
1013
#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
1014
1020
#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
1021
1027
#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
1028
1034
#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
1035
1045
#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
1046
1058
#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
1059
1071
#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
1072
1082
#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
1083
1093
#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
1094
1109
#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
1110
1122
#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
1123
1135
#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
1136
1148
#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
1149
1164
#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
1165
1166
#endif
/* U_HIDE_DEPRECATED_API */
1167
1168
#endif
1169
utf.h
C API: Code point macros.
utf16.h
C API: 16-bit Unicode handling macros.
utf8.h
C API: 8-bit Unicode handling macros.
Generated by
1.8.20