ICU 55.1  55.1
brkiter.h
Go to the documentation of this file.
1 /*
2 ********************************************************************************
3 * Copyright (C) 1997-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ********************************************************************************
6 *
7 * File brkiter.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/18/97 aliu Added typedef for TextCount. Made DONE const.
13 * 05/07/97 aliu Fixed DLL declaration.
14 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
15 * 08/11/98 helena Sync-up JDK1.2.
16 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
17 ********************************************************************************
18 */
19 
20 #ifndef BRKITER_H
21 #define BRKITER_H
22 
23 #include "unicode/utypes.h"
24 
30 #if UCONFIG_NO_BREAK_ITERATION
31 
33 
34 /*
35  * Allow the declaration of APIs with pointers to BreakIterator
36  * even when break iteration is removed from the build.
37  */
38 class BreakIterator;
39 
41 
42 #else
43 
44 #include "unicode/uobject.h"
45 #include "unicode/unistr.h"
46 #include "unicode/chariter.h"
47 #include "unicode/locid.h"
48 #include "unicode/ubrk.h"
49 #include "unicode/strenum.h"
50 #include "unicode/utext.h"
51 #include "unicode/umisc.h"
52 
54 
101 public:
106  virtual ~BreakIterator();
107 
121  virtual UBool operator==(const BreakIterator&) const = 0;
122 
129  UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
130 
136  virtual BreakIterator* clone(void) const = 0;
137 
143  virtual UClassID getDynamicClassID(void) const = 0;
144 
149  virtual CharacterIterator& getText(void) const = 0;
150 
151 
166  virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
167 
174  virtual void setText(const UnicodeString &text) = 0;
175 
194  virtual void setText(UText *text, UErrorCode &status) = 0;
195 
204  virtual void adoptText(CharacterIterator* it) = 0;
205 
206  enum {
212  DONE = (int32_t)-1
213  };
214 
220  virtual int32_t first(void) = 0;
221 
227  virtual int32_t last(void) = 0;
228 
235  virtual int32_t previous(void) = 0;
236 
243  virtual int32_t next(void) = 0;
244 
250  virtual int32_t current(void) const = 0;
251 
260  virtual int32_t following(int32_t offset) = 0;
261 
270  virtual int32_t preceding(int32_t offset) = 0;
271 
280  virtual UBool isBoundary(int32_t offset) = 0;
281 
291  virtual int32_t next(int32_t n) = 0;
292 
307  virtual int32_t getRuleStatus() const;
308 
337  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
338 
358  static BreakIterator* U_EXPORT2
359  createWordInstance(const Locale& where, UErrorCode& status);
360 
382  static BreakIterator* U_EXPORT2
383  createLineInstance(const Locale& where, UErrorCode& status);
384 
404  static BreakIterator* U_EXPORT2
405  createCharacterInstance(const Locale& where, UErrorCode& status);
406 
425  static BreakIterator* U_EXPORT2
426  createSentenceInstance(const Locale& where, UErrorCode& status);
427 
450  static BreakIterator* U_EXPORT2
451  createTitleInstance(const Locale& where, UErrorCode& status);
452 
462  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
463 
473  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
474  const Locale& displayLocale,
475  UnicodeString& name);
476 
485  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
486  UnicodeString& name);
487 
507  virtual BreakIterator * createBufferClone(void *stackBuffer,
508  int32_t &BufferSize,
509  UErrorCode &status) = 0;
510 
511 #ifndef U_HIDE_DEPRECATED_API
512 
519  inline UBool isBufferClone(void);
520 
521 #endif /* U_HIDE_DEPRECATED_API */
522 
523 #if !UCONFIG_NO_SERVICE
524 
539  static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
540  const Locale& locale,
541  UBreakIteratorType kind,
542  UErrorCode& status);
543 
556  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
557 
564  static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
565 #endif
566 
573 
574 #ifndef U_HIDE_INTERNAL_API
575 
581  const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
582 #endif /* U_HIDE_INTERNAL_API */
583 
609  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
610 
611  private:
612  static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
613  static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
614  static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
615 
616  friend class ICUBreakIteratorFactory;
617  friend class ICUBreakIteratorService;
618 
619 protected:
620  // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
621  // or else the compiler will create a public ones.
625  BreakIterator (const BreakIterator &other) : UObject(other) {}
627  BreakIterator (const Locale& valid, const Locale& actual);
628 private:
629 
631  char actualLocale[ULOC_FULLNAME_CAPACITY];
632  char validLocale[ULOC_FULLNAME_CAPACITY];
633 
638  BreakIterator& operator=(const BreakIterator&);
639 };
640 
641 #ifndef U_HIDE_DEPRECATED_API
642 
643 inline UBool BreakIterator::isBufferClone()
644 {
645  return FALSE;
646 }
647 
648 #endif /* U_HIDE_DEPRECATED_API */
649 
651 
652 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
653 
654 #endif // _BRKITER
655 //eof
FALSE
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:242
icu::BreakIterator
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:100
icu::BreakIterator::getLocale
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::BreakIterator::current
virtual int32_t current(void) const =0
Return character index of the current interator position within the text.
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
ubrk.h
C API: BreakIterator.
icu::BreakIterator::getRuleStatus
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the most recen...
ULocDataLocaleType
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:336
icu::BreakIterator::~BreakIterator
virtual ~BreakIterator()
destructor
icu::BreakIterator::unregister
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
icu::operator==
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
utext.h
C API: Abstract Unicode Text API.
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
icu::BreakIterator::createBufferClone
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
icu::BreakIterator::setText
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
icu::BreakIterator::isBoundary
virtual UBool isBoundary(int32_t offset)=0
Return true if the specfied position is a boundary position.
umisc.h
C API:misc definitions.
icu::BreakIterator::getDynamicClassID
virtual UClassID getDynamicClassID(void) const =0
Return a polymorphic class ID for this object.
icu::BreakIterator::preceding
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
URegistryKey
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition: umisc.h:55
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UClassID
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
icu::BreakIterator::refreshInputText
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
icu::StringEnumeration
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:55
icu::BreakIterator::getDisplayName
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired langauge.
icu::BreakIterator::createLineInstance
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
icu::BreakIterator::getDisplayName
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the langauge of the default locale.
icu::BreakIterator::following
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
UBreakIteratorType
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:87
icu::BreakIterator::registerInstance
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
UText
UText struct.
Definition: utext.h:1343
icu::BreakIterator::adoptText
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
icu::BreakIterator::operator!=
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:129
icu::BreakIterator::BreakIterator
BreakIterator(const BreakIterator &other)
Definition: brkiter.h:625
icu::BreakIterator::last
virtual int32_t last(void)=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
icu::BreakIterator::getRuleStatusVec
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the m...
icu::CharacterIterator
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
ULOC_FULLNAME_CAPACITY
#define ULOC_FULLNAME_CAPACITY
Useful constant for the maximum size of the whole locale ID (including the terminating NULL and all k...
Definition: uloc.h:262
icu::BreakIterator::createCharacterInstance
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
icu::BreakIterator::clone
virtual BreakIterator * clone(void) const =0
Return a polymorphic copy of this object.
icu::BreakIterator::operator==
virtual UBool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
icu::BreakIterator::first
virtual int32_t first(void)=0
Sets the current iteration position to the beginning of the text, position zero.
icu::BreakIterator::createWordInstance
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
icu::BreakIterator::getUText
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
strenum.h
C++ API: String Enumeration.
chariter.h
C++ API: Character Iterator.
icu::BreakIterator::next
virtual int32_t next(void)=0
Advance the iterator to the boundary following the current boundary.
icu::BreakIterator::previous
virtual int32_t previous(void)=0
Set the iterator position to the boundary preceding the current boundary.
icu::BreakIterator::getAvailableLocales
static StringEnumeration * getAvailableLocales(void)
Return a StringEnumeration over the locales available at the time of the call, including registered l...
icu::BreakIterator::createSentenceInstance
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
locid.h
C++ API: Locale ID object.
icu::BreakIterator::BreakIterator
BreakIterator(const Locale &valid, const Locale &actual)
icu::BreakIterator::getText
virtual CharacterIterator & getText(void) const =0
Return a CharacterIterator over the text being analyzed.
uobject.h
C++ API: Common ICU base class UObject.
icu::BreakIterator::createTitleInstance
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
U_NAMESPACE_END
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
icu::BreakIterator::next
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
icu::Locale
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:185
icu::BreakIterator::getAvailableLocales
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
unistr.h
C++ API: Unicode String.
icu::BreakIterator::BreakIterator
BreakIterator()
icu::BreakIterator::getLocaleID
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
icu::BreakIterator::setText
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.