ICU 55.1  55.1
tblcoll.h
Go to the documentation of this file.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
60 #ifndef TBLCOLL_H
61 #define TBLCOLL_H
62 
63 #include "unicode/utypes.h"
64 
65 #if !UCONFIG_NO_COLLATION
66 
67 #include "unicode/coll.h"
68 #include "unicode/locid.h"
69 #include "unicode/uiter.h"
70 #include "unicode/ucol.h"
71 
73 
74 struct CollationCacheEntry;
75 struct CollationData;
76 struct CollationSettings;
77 struct CollationTailoring;
81 class StringSearch;
85 class CollationElementIterator;
86 class CollationKey;
87 class SortKeyByteSink;
88 class UnicodeSet;
89 class UnicodeString;
90 class UVector64;
91 
112 public:
122 
133  ECollationStrength collationStrength,
134  UErrorCode& status);
135 
146  UColAttributeValue decompositionMode,
147  UErrorCode& status);
148 
160  ECollationStrength collationStrength,
161  UColAttributeValue decompositionMode,
162  UErrorCode& status);
163 
164 #ifndef U_HIDE_INTERNAL_API
165 
170  UParseError &parseError, UnicodeString &reason,
171  UErrorCode &errorCode);
172 #endif /* U_HIDE_INTERNAL_API */
173 
180 
181 
199  RuleBasedCollator(const uint8_t *bin, int32_t length,
200  const RuleBasedCollator *base,
201  UErrorCode &status);
202 
208 
215 
222  virtual UBool operator==(const Collator& other) const;
223 
229  virtual Collator* clone(void) const;
230 
242  const UnicodeString& source) const;
243 
254  const CharacterIterator& source) const;
255 
256  // Make deprecated versions of Collator::compare() visible.
257  using Collator::compare;
258 
271  virtual UCollationResult compare(const UnicodeString& source,
272  const UnicodeString& target,
273  UErrorCode &status) const;
274 
288  virtual UCollationResult compare(const UnicodeString& source,
289  const UnicodeString& target,
290  int32_t length,
291  UErrorCode &status) const;
292 
309  virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
310  const UChar* target, int32_t targetLength,
311  UErrorCode &status) const;
312 
325  UCharIterator &tIter,
326  UErrorCode &status) const;
327 
341  virtual UCollationResult compareUTF8(const StringPiece &source,
342  const StringPiece &target,
343  UErrorCode &status) const;
344 
360  CollationKey& key,
361  UErrorCode& status) const;
362 
378  virtual CollationKey& getCollationKey(const UChar *source,
379  int32_t sourceLength,
380  CollationKey& key,
381  UErrorCode& status) const;
382 
388  virtual int32_t hashCode() const;
389 
400  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
401 
407  const UnicodeString& getRules() const;
408 
414  virtual void getVersion(UVersionInfo info) const;
415 
416 #ifndef U_HIDE_DEPRECATED_API
417 
433  int32_t getMaxExpansion(int32_t order) const;
434 #endif /* U_HIDE_DEPRECATED_API */
435 
446  virtual UClassID getDynamicClassID(void) const;
447 
459  static UClassID U_EXPORT2 getStaticClassID(void);
460 
461 #ifndef U_HIDE_DEPRECATED_API
462 
472  uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
473 #endif /* U_HIDE_DEPRECATED_API */
474 
485  int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
486 
498  void getRules(UColRuleOption delta, UnicodeString &buffer) const;
499 
508  UErrorCode &status);
509 
518  UErrorCode &status) const;
519 
536  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
537 
545 
562  virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
563 
579  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
580 
592  virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
593 
601  virtual uint32_t getVariableTop(UErrorCode &status) const;
602 
612  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
613 
628  virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
629  int32_t resultLength) const;
630 
647  virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
648  uint8_t *result, int32_t resultLength) const;
649 
663  virtual int32_t getReorderCodes(int32_t *dest,
664  int32_t destCapacity,
665  UErrorCode& status) const;
666 
678  virtual void setReorderCodes(const int32_t* reorderCodes,
679  int32_t reorderCodesLength,
680  UErrorCode& status) ;
681 
687  const char *left, int32_t leftLength,
688  const char *right, int32_t rightLength,
689  UErrorCode &errorCode) const;
690 
714  virtual int32_t internalGetShortDefinitionString(const char *locale,
715  char *buffer,
716  int32_t capacity,
717  UErrorCode &status) const;
718 
723  virtual int32_t internalNextSortKeyPart(
724  UCharIterator *iter, uint32_t state[2],
725  uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
726 
732 
733 #ifndef U_HIDE_INTERNAL_API
734 
740  const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
741 
755  UnicodeSet *contractions, UnicodeSet *expansions,
756  UBool addPrefixes, UErrorCode &errorCode) const;
757 
763  void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
764 
770  const UnicodeString &rules,
771  int32_t strength,
772  UColAttributeValue decompositionMode,
773  UParseError *outParseError, UnicodeString *outReason,
774  UErrorCode &errorCode);
775 
778  return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
779  }
781  static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
782  return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
783  }
784 
789  void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
790 #endif // U_HIDE_INTERNAL_API
791 
792 protected:
800  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
801 
802 private:
803  friend class CollationElementIterator;
804  friend class Collator;
805 
806  RuleBasedCollator(const CollationCacheEntry *entry);
807 
813  enum Attributes {
814  ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
815  ATTR_LIMIT
816  };
817 
818  void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
819 
820  // Both lengths must be <0 or else both must be >=0.
821  UCollationResult doCompare(const UChar *left, int32_t leftLength,
822  const UChar *right, int32_t rightLength,
823  UErrorCode &errorCode) const;
824  UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
825  const uint8_t *right, int32_t rightLength,
826  UErrorCode &errorCode) const;
827 
828  void writeSortKey(const UChar *s, int32_t length,
829  SortKeyByteSink &sink, UErrorCode &errorCode) const;
830 
831  void writeIdenticalLevel(const UChar *s, const UChar *limit,
832  SortKeyByteSink &sink, UErrorCode &errorCode) const;
833 
834  const CollationSettings &getDefaultSettings() const;
835 
836  void setAttributeDefault(int32_t attribute) {
837  explicitlySetAttributes &= ~((uint32_t)1 << attribute);
838  }
839  void setAttributeExplicitly(int32_t attribute) {
840  explicitlySetAttributes |= (uint32_t)1 << attribute;
841  }
842  UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
843  // assert(0 <= attribute < ATTR_LIMIT);
844  return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
845  }
846 
854  UBool isUnsafe(UChar32 c) const;
855 
856  static void computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
857  UBool initMaxExpansions(UErrorCode &errorCode) const;
858 
859  void setFastLatinOptions(CollationSettings &ownedSettings) const;
860 
861  const CollationData *data;
862  const CollationSettings *settings; // reference-counted
863  const CollationTailoring *tailoring; // alias of cacheEntry->tailoring
864  const CollationCacheEntry *cacheEntry; // reference-counted
865  Locale validLocale;
866  uint32_t explicitlySetAttributes;
867 
868  UBool actualLocaleIsSameAsValid;
869 };
870 
872 
873 #endif // !UCONFIG_NO_COLLATION
874 #endif // TBLCOLL_H
icu::RuleBasedCollator::internalGetCEs
void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const
Appends the CEs for the string to the vector.
coll.h
C++ API: Collation Service.
icu::RuleBasedCollator::setVariableTop
virtual void setVariableTop(uint32_t varTop, UErrorCode &status)
Sets the variable top to the specified primary weight.
icu::RuleBasedCollator::rbcFromUCollator
static RuleBasedCollator * rbcFromUCollator(UCollator *uc)
Definition: tblcoll.h:777
icu::RuleBasedCollator::createCollationElementIterator
virtual CollationElementIterator * createCollationElementIterator(const UnicodeString &source) const
Creates a collation element iterator for the source string.
icu::RuleBasedCollator::operator==
virtual UBool operator==(const Collator &other) const
Returns true if argument is the same as this object.
icu::CollationKey
Collation keys are generated by the Collator class.
Definition: sortkey.h:97
UColAttribute
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:234
icu::RuleBasedCollator::cloneBinary
int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const
Creates a binary image of a collator.
icu::RuleBasedCollator::getSortKey
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const
Get the sort key as an array of bytes from a UnicodeString.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::UnicodeSet
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:276
icu::Collator::ECollationStrength
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:194
icu::RuleBasedCollator::internalNextSortKeyPart
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
icu::RuleBasedCollator::compareUTF8
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
U_I18N_API
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:358
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
icu::RuleBasedCollator::getMaxVariable
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const RuleBasedCollator &other)
Copy constructor.
ULocDataLocaleType
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:336
icu::RuleBasedCollator::getReorderCodes
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
UCOL_ATTRIBUTE_COUNT
@ UCOL_ATTRIBUTE_COUNT
The number of UColAttribute constants.
Definition: ucol.h:340
UColRuleOption
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:346
icu::RuleBasedCollator::getTailoredSet
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
icu::RuleBasedCollator::hashCode
virtual int32_t hashCode() const
Generates the hash code for the rule-based collation object.
icu::Collator
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:163
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const UnicodeString &rules, UColAttributeValue decompositionMode, UErrorCode &status)
RuleBasedCollator constructor.
icu::RuleBasedCollator::setReorderCodes
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
UColAttributeValue
UColAttributeValue
Enum containing attribute values for controling collation behavior.
Definition: ucol.h:87
UCollationResult
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:71
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const UnicodeString &rules, ECollationStrength collationStrength, UErrorCode &status)
RuleBasedCollator constructor.
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
icu::RuleBasedCollator::getVariableTop
virtual uint32_t getVariableTop(UErrorCode &status) const
Gets the variable top value of a Collator.
icu::RuleBasedCollator::internalGetShortDefinitionString
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
icu::RuleBasedCollator::compare
virtual UCollationResult compare(UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const
Compares two strings using the Collator.
icu::RuleBasedCollator::getCollationKey
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const
Transforms the string into a series of characters that can be compared with CollationKey....
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
icu::RuleBasedCollator::getCollationKey
virtual CollationKey & getCollationKey(const UChar *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const
Transforms a specified region of the string into a series of characters that can be compared with Col...
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const UnicodeString &rules, ECollationStrength collationStrength, UColAttributeValue decompositionMode, UErrorCode &status)
RuleBasedCollator constructor.
icu::RuleBasedCollator::setMaxVariable
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
icu::RuleBasedCollator::setVariableTop
virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status)
Sets the variable top to the primary weight of the specified string.
icu::RuleBasedCollator::compare
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const
Does the same thing as compare but limits the comparison to a specified length.
icu::RuleBasedCollator
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables.
Definition: tblcoll.h:111
UCollator
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:56
icu::RuleBasedCollator::clone
virtual Collator * clone(void) const
Makes a copy of this object.
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const UnicodeString &rules, UParseError &parseError, UnicodeString &reason, UErrorCode &errorCode)
TODO: document & propose as public API.
icu::RuleBasedCollator::setLocales
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
icu::RuleBasedCollator::getSortKey
virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const
Get the sort key as an array of bytes from a UChar buffer.
icu::RuleBasedCollator::internalGetLocaleID
const char * internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const
Implements ucol_getLocaleByType().
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
icu::RuleBasedCollator::getRules
void getRules(UColRuleOption delta, UnicodeString &buffer) const
Returns current rules.
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const uint8_t *bin, int32_t length, const RuleBasedCollator *base, UErrorCode &status)
Opens a collator from a collator binary image created using cloneBinary.
icu::RuleBasedCollator::operator=
RuleBasedCollator & operator=(const RuleBasedCollator &other)
Assignment operator.
icu::RuleBasedCollator::~RuleBasedCollator
virtual ~RuleBasedCollator()
Destructor.
UCharIterator
C API for code unit iteration.
Definition: uiter.h:339
icu::RuleBasedCollator::compare
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const
The comparison function compares the character data stored in two different strings.
icu::RuleBasedCollator::getVersion
virtual void getVersion(UVersionInfo info) const
Gets the version information for a Collator.
icu::RuleBasedCollator::internalAddContractions
void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const
Adds the contractions that start with character c to the set.
icu::RuleBasedCollator::getMaxExpansion
int32_t getMaxExpansion(int32_t order) const
Returns the maximum length of any expansion sequences that end with the specified comparison order.
uiter.h
C API: Unicode Character Iteration.
icu::RuleBasedCollator::internalCompareUTF8
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
UChar
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
icu::Collator::fromUCollator
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1161
UVersionInfo
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:57
icu::RuleBasedCollator::internalGetContractionsAndExpansions
void internalGetContractionsAndExpansions(UnicodeSet *contractions, UnicodeSet *expansions, UBool addPrefixes, UErrorCode &errorCode) const
Implements ucol_getContractionsAndExpansions().
locid.h
C++ API: Locale ID object.
icu::RuleBasedCollator::getLocale
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Gets the locale of the Collator.
icu::RuleBasedCollator::internalBuildTailoring
void internalBuildTailoring(const UnicodeString &rules, int32_t strength, UColAttributeValue decompositionMode, UParseError *outParseError, UnicodeString *outReason, UErrorCode &errorCode)
Implements from-rule constructors, and ucol_openRules().
icu::RuleBasedCollator::getAttribute
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const
Universal attribute getter.
icu::RuleBasedCollator::cloneRuleData
uint8_t * cloneRuleData(int32_t &length, UErrorCode &status) const
Do not use this method: The caller and the ICU library might use different heaps.
icu::RuleBasedCollator::getRules
const UnicodeString & getRules() const
Gets the tailoring rules for this collator.
icu::Collator::compare
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator()
Only for use in ucol_openRules().
U_NAMESPACE_END
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
icu::CollationElementIterator
The CollationElementIterator class is used as an iterator to walk through each character of an inte...
Definition: coleitr.h:116
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
icu::StringPiece
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:52
icu::RuleBasedCollator::RuleBasedCollator
RuleBasedCollator(const UnicodeString &rules, UErrorCode &status)
RuleBasedCollator constructor.
icu::Locale
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:185
ucol.h
C API: Collator.
icu::RuleBasedCollator::createCollationElementIterator
virtual CollationElementIterator * createCollationElementIterator(const CharacterIterator &source) const
Creates a collation element iterator for the source.
icu::RuleBasedCollator::rbcFromUCollator
static const RuleBasedCollator * rbcFromUCollator(const UCollator *uc)
Definition: tblcoll.h:781
icu::RuleBasedCollator::compare
virtual UCollationResult compare(const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength, UErrorCode &status) const
The comparison function compares the character data stored in two different string arrays.
UColReorderCode
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
Definition: ucol.h:139
icu::RuleBasedCollator::setVariableTop
virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status)
Sets the variable top to the primary weight of the specified string.
icu::RuleBasedCollator::getDynamicClassID
virtual UClassID getDynamicClassID(void) const
Returns a unique class ID POLYMORPHICALLY.
icu::RuleBasedCollator::setAttribute
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)
Universal attribute setter.
icu::RuleBasedCollator::getStaticClassID
static UClassID getStaticClassID(void)
Returns the class ID for this class.