ICU 55.1  55.1
normlzr.h
Go to the documentation of this file.
1 /*
2  ********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 1996-2011, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************
7  */
8 
9 #ifndef NORMLZR_H
10 #define NORMLZR_H
11 
12 #include "unicode/utypes.h"
13 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/chariter.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "unicode/uobject.h"
26 
131 public:
137  enum {
138  DONE=0xffff
139  };
140 
141  // Constructors
142 
154 
166  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
167 
179 
185  Normalizer(const Normalizer& copy);
186 
191  virtual ~Normalizer();
192 
193 
194  //-------------------------------------------------------------------------
195  // Static utility methods
196  //-------------------------------------------------------------------------
197 
212  static void U_EXPORT2 normalize(const UnicodeString& source,
213  UNormalizationMode mode, int32_t options,
214  UnicodeString& result,
215  UErrorCode &status);
216 
234  static void U_EXPORT2 compose(const UnicodeString& source,
235  UBool compat, int32_t options,
236  UnicodeString& result,
237  UErrorCode &status);
238 
256  static void U_EXPORT2 decompose(const UnicodeString& source,
257  UBool compat, int32_t options,
258  UnicodeString& result,
259  UErrorCode &status);
260 
281  static inline UNormalizationCheckResult
282  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
283 
298  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
299 
320  static inline UBool
321  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
322 
338  static UBool
339  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
340 
370  static UnicodeString &
371  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
372  UnicodeString &result,
373  UNormalizationMode mode, int32_t options,
374  UErrorCode &errorCode);
375 
440  static inline int32_t
441  compare(const UnicodeString &s1, const UnicodeString &s2,
442  uint32_t options,
443  UErrorCode &errorCode);
444 
445  //-------------------------------------------------------------------------
446  // Iteration API
447  //-------------------------------------------------------------------------
448 
458 
467  UChar32 first(void);
468 
477  UChar32 last(void);
478 
493  UChar32 next(void);
494 
510 
520  void setIndexOnly(int32_t index);
521 
527  void reset(void);
528 
543  int32_t getIndex(void) const;
544 
553  int32_t startIndex(void) const;
554 
565  int32_t endIndex(void) const;
566 
575  UBool operator==(const Normalizer& that) const;
576 
585  inline UBool operator!=(const Normalizer& that) const;
586 
593  Normalizer* clone(void) const;
594 
601  int32_t hashCode(void) const;
602 
603  //-------------------------------------------------------------------------
604  // Property access methods
605  //-------------------------------------------------------------------------
606 
623 
635 
652  void setOption(int32_t option,
653  UBool value);
654 
665  UBool getOption(int32_t option) const;
666 
675  void setText(const UnicodeString& newText,
676  UErrorCode &status);
677 
686  void setText(const CharacterIterator& newText,
687  UErrorCode &status);
688 
698  void setText(const UChar* newText,
699  int32_t length,
700  UErrorCode &status);
707  void getText(UnicodeString& result);
708 
714  static UClassID U_EXPORT2 getStaticClassID();
715 
721  virtual UClassID getDynamicClassID() const;
722 
723 private:
724  //-------------------------------------------------------------------------
725  // Private functions
726  //-------------------------------------------------------------------------
727 
728  Normalizer(); // default constructor not implemented
729  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
730 
731  // Private utility methods for iteration
732  // For documentation, see the source code
733  UBool nextNormalize();
734  UBool previousNormalize();
735 
736  void init();
737  void clearBuffer(void);
738 
739  //-------------------------------------------------------------------------
740  // Private data
741  //-------------------------------------------------------------------------
742 
743  FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
744  const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
745  UNormalizationMode fUMode;
746  int32_t fOptions;
747 
748  // The input text and our position in it
749  CharacterIterator *text;
750 
751  // The normalization buffer is the result of normalization
752  // of the source in [currentIndex..nextIndex[ .
753  int32_t currentIndex, nextIndex;
754 
755  // A buffer for holding intermediate results
756  UnicodeString buffer;
757  int32_t bufferPos;
758 };
759 
760 //-------------------------------------------------------------------------
761 // Inline implementations
762 //-------------------------------------------------------------------------
763 
764 inline UBool
766 { return ! operator==(other); }
767 
769 Normalizer::quickCheck(const UnicodeString& source,
770  UNormalizationMode mode,
771  UErrorCode &status) {
772  return quickCheck(source, mode, 0, status);
773 }
774 
775 inline UBool
776 Normalizer::isNormalized(const UnicodeString& source,
777  UNormalizationMode mode,
778  UErrorCode &status) {
779  return isNormalized(source, mode, 0, status);
780 }
781 
782 inline int32_t
783 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
784  uint32_t options,
785  UErrorCode &errorCode) {
786  // all argument checking is done in unorm_compare
787  return unorm_compare(s1.getBuffer(), s1.length(),
788  s2.getBuffer(), s2.length(),
789  options,
790  &errorCode);
791 }
792 
794 
795 #endif /* #if !UCONFIG_NO_NORMALIZATION */
796 
797 #endif // NORMLZR_H
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::Normalizer::previous
UChar32 previous(void)
Return the previous character in the normalized text and decrement.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
icu::Normalizer::reset
void reset(void)
Reset the index to the beginning of the text.
icu::Normalizer::getUMode
UNormalizationMode getUMode(void) const
Return the normalization mode for this object.
icu::Normalizer::quickCheck
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status)
Performing quick check on a string; same as the other version of quickCheck but takes an extra option...
icu::Normalizer::decompose
static void decompose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Static method to decompose a UnicodeString.
icu::operator==
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
icu::Normalizer::setText
void setText(const UnicodeString &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
icu::Normalizer::last
UChar32 last(void)
Return the last character in the normalized text.
icu::UnicodeString::length
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3653
icu::Normalizer::concatenate
static UnicodeString & concatenate(const UnicodeString &left, const UnicodeString &right, UnicodeString &result, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Concatenate normalized strings, making sure that the result is normalized as well.
icu::Normalizer::Normalizer
Normalizer(const UChar *str, int32_t length, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
icu::Normalizer::setIndexOnly
void setIndexOnly(int32_t index)
Set the iteration position in the input text that is being normalized, without any immediate normaliz...
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
icu::Normalizer::setText
void setText(const UChar *newText, int32_t length, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
icu::Normalizer::startIndex
int32_t startIndex(void) const
Retrieve the index of the start of the input text.
icu::Normalizer::getText
void getText(UnicodeString &result)
Copies the input text into the UnicodeString argument.
icu::Normalizer::setMode
void setMode(UNormalizationMode newMode)
Set the normalization mode for this object.
icu::Normalizer::Normalizer
Normalizer(const Normalizer &copy)
Copy constructor.
icu::Normalizer2
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
icu::Normalizer::current
UChar32 current(void)
Return the current character in the normalized text.
UNormalizationMode
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:133
icu::operator!=
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
icu::Normalizer::getStaticClassID
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UNormalizationCheckResult
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:91
icu::Normalizer::Normalizer
Normalizer(const UnicodeString &str, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
icu::Normalizer::getOption
UBool getOption(int32_t option) const
Determine whether an option is turned on or off.
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
icu::Normalizer::hashCode
int32_t hashCode(void) const
Generates a hash code for this iterator.
icu::Normalizer::endIndex
int32_t endIndex(void) const
Retrieve the index of the end of the input text.
unorm.h
C API: Unicode Normalization.
icu::Normalizer::getDynamicClassID
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
icu::Normalizer::compose
static void compose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Compose a UnicodeString.
icu::Normalizer::Normalizer
Normalizer(const CharacterIterator &iter, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of the given text.
icu::Normalizer::getIndex
int32_t getIndex(void) const
Retrieve the current iteration position in the input text that is being normalized.
icu::Normalizer::operator==
UBool operator==(const Normalizer &that) const
Returns TRUE when both iterators refer to the same character in the same input text.
chariter.h
C++ API: Character Iterator.
icu::Normalizer::setOption
void setOption(int32_t option, UBool value)
Set options that affect this Normalizer's operation.
normalizer2.h
C++ API: New API for Unicode Normalization.
unorm_compare
int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two strings for canonical equivalence.
UChar
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
icu::Normalizer::first
UChar32 first(void)
Return the first character in the normalized text.
icu::Normalizer::clone
Normalizer * clone(void) const
Returns a pointer to a new Normalizer that is a clone of this one.
uobject.h
C++ API: Common ICU base class UObject.
icu::Normalizer::~Normalizer
virtual ~Normalizer()
Destructor.
U_NAMESPACE_END
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
icu::Normalizer::isNormalized
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Test if a string is in a given normalization form; same as the other version of isNormalized but take...
icu::Normalizer
The Normalizer class supports the standard normalization forms described in Unicode Standard Annex #1...
Definition: normlzr.h:130
icu::Normalizer::normalize
static void normalize(const UnicodeString &source, UNormalizationMode mode, int32_t options, UnicodeString &result, UErrorCode &status)
Normalizes a UnicodeString according to the specified normalization mode.
icu::Normalizer::setText
void setText(const CharacterIterator &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
unistr.h
C++ API: Unicode String.
icu::FilteredNormalizer2
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:443
icu::Normalizer::next
UChar32 next(void)
Return the next character in the normalized text.
icu::UnicodeString::getBuffer
UChar * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.