ICU 55.1  55.1
messagepattern.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 * Copyright (C) 2011-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: messagepattern.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2011mar14
12 * created by: Markus W. Scherer
13 */
14 
15 #ifndef __MESSAGEPATTERN_H__
16 #define __MESSAGEPATTERN_H__
17 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_FORMATTING
26 
27 #include "unicode/parseerr.h"
28 #include "unicode/unistr.h"
29 
88 };
93 
203 };
208 
256 };
261 
268 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
269  ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
270 
271 enum {
278 
287 };
288 
295 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
296 
298 
299 class MessagePatternDoubleList;
300 class MessagePatternPartsList;
301 
359 public:
369 
380 
399  MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
400 
407 
415 
420  virtual ~MessagePattern();
421 
440  UParseError *parseError, UErrorCode &errorCode);
441 
460  UParseError *parseError, UErrorCode &errorCode);
461 
480  UParseError *parseError, UErrorCode &errorCode);
481 
500  UParseError *parseError, UErrorCode &errorCode);
501 
507  void clear();
508 
516  clear();
517  aposMode=mode;
518  }
519 
525  UBool operator==(const MessagePattern &other) const;
526 
532  inline UBool operator!=(const MessagePattern &other) const {
533  return !operator==(other);
534  }
535 
540  int32_t hashCode() const;
541 
547  return aposMode;
548  }
549 
550  // Java has package-private jdkAposMode() here.
551  // In C++, this is declared in the MessageImpl class.
552 
558  return msg;
559  }
560 
567  return hasArgNames;
568  }
569 
576  return hasArgNumbers;
577  }
578 
590  static int32_t validateArgumentName(const UnicodeString &name);
591 
603 
604  class Part;
605 
612  int32_t countParts() const {
613  return partsLength;
614  }
615 
622  const Part &getPart(int32_t i) const {
623  return parts[i];
624  }
625 
634  return getPart(i).type;
635  }
636 
644  int32_t getPatternIndex(int32_t partIndex) const {
645  return getPart(partIndex).index;
646  }
647 
655  UnicodeString getSubstring(const Part &part) const {
656  return msg.tempSubString(part.index, part.length);
657  }
658 
666  UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
667  return 0==msg.compare(part.index, part.length, s);
668  }
669 
676  double getNumericValue(const Part &part) const;
677 
684  double getPluralOffset(int32_t pluralStart) const;
685 
694  int32_t getLimitPartIndex(int32_t start) const {
695  int32_t limit=getPart(start).limitPartIndex;
696  if(limit<start) {
697  return start;
698  }
699  return limit;
700  }
701 
709  class Part : public UMemory {
710  public:
715  Part() {}
716 
723  return type;
724  }
725 
731  int32_t getIndex() const {
732  return index;
733  }
734 
741  int32_t getLength() const {
742  return length;
743  }
744 
751  int32_t getLimit() const {
752  return index+length;
753  }
754 
761  int32_t getValue() const {
762  return value;
763  }
764 
772  UMessagePatternPartType type=getType();
774  return (UMessagePatternArgType)value;
775  } else {
776  return UMSGPAT_ARG_TYPE_NONE;
777  }
778  }
779 
789  }
790 
796  UBool operator==(const Part &other) const;
797 
803  inline UBool operator!=(const Part &other) const {
804  return !operator==(other);
805  }
806 
811  int32_t hashCode() const {
812  return ((type*37+index)*37+length)*37+value;
813  }
814 
815  private:
816  friend class MessagePattern;
817 
818  static const int32_t MAX_LENGTH=0xffff;
819  static const int32_t MAX_VALUE=0x7fff;
820 
821  // Some fields are not final because they are modified during pattern parsing.
822  // After pattern parsing, the parts are effectively immutable.
824  int32_t index;
825  uint16_t length;
826  int16_t value;
827  int32_t limitPartIndex;
828  };
829 
830 private:
831  void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
832 
833  void postParse();
834 
835  int32_t parseMessage(int32_t index, int32_t msgStartLength,
836  int32_t nestingLevel, UMessagePatternArgType parentType,
837  UParseError *parseError, UErrorCode &errorCode);
838 
839  int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
840  UParseError *parseError, UErrorCode &errorCode);
841 
842  int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
843 
844  int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
845  UParseError *parseError, UErrorCode &errorCode);
846 
847  int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
848  UParseError *parseError, UErrorCode &errorCode);
849 
858  static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
859 
860  int32_t parseArgNumber(int32_t start, int32_t limit) {
861  return parseArgNumber(msg, start, limit);
862  }
863 
872  void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
873  UParseError *parseError, UErrorCode &errorCode);
874 
875  // Java has package-private appendReducedApostrophes() here.
876  // In C++, this is declared in the MessageImpl class.
877 
878  int32_t skipWhiteSpace(int32_t index);
879 
880  int32_t skipIdentifier(int32_t index);
881 
886  int32_t skipDouble(int32_t index);
887 
888  static UBool isArgTypeChar(UChar32 c);
889 
890  UBool isChoice(int32_t index);
891 
892  UBool isPlural(int32_t index);
893 
894  UBool isSelect(int32_t index);
895 
896  UBool isOrdinal(int32_t index);
897 
902  UBool inMessageFormatPattern(int32_t nestingLevel);
903 
908  UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
909 
910  void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
911  int32_t value, UErrorCode &errorCode);
912 
913  void addLimitPart(int32_t start,
914  UMessagePatternPartType type, int32_t index, int32_t length,
915  int32_t value, UErrorCode &errorCode);
916 
917  void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
918 
919  void setParseError(UParseError *parseError, int32_t index);
920 
921  UBool init(UErrorCode &errorCode);
922  UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
923 
925  UnicodeString msg;
926  // ArrayList<Part> parts=new ArrayList<Part>();
927  MessagePatternPartsList *partsList;
928  Part *parts;
929  int32_t partsLength;
930  // ArrayList<Double> numericValues;
931  MessagePatternDoubleList *numericValuesList;
932  double *numericValues;
933  int32_t numericValuesLength;
934  UBool hasArgNames;
935  UBool hasArgNumbers;
936  UBool needsAutoQuoting;
937 };
938 
940 
941 #endif // !UCONFIG_NO_FORMATTING
942 
943 #endif // __MESSAGEPATTERN_H__
icu::MessagePattern::MessagePattern
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Constructs a MessagePattern with default UMessagePatternApostropheMode and parses the MessageFormat p...
icu::MessagePattern::partSubstringMatches
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const
Compares the part's substring with the input string s.
Definition: messagepattern.h:666
icu::MessagePattern::countParts
int32_t countParts() const
Returns the number of "parts" created by parsing the pattern string.
Definition: messagepattern.h:612
parseerr.h
C API: Parse Error Information.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::MessagePattern::Part::operator==
UBool operator==(const Part &other) const
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
icu::MessagePattern::Part::hasNumericValue
static UBool hasNumericValue(UMessagePatternPartType type)
Indicates whether the Part type has a numeric value.
Definition: messagepattern.h:787
icu::MessagePattern::getPartType
UMessagePatternPartType getPartType(int32_t i) const
Returns the UMessagePatternPartType of the i-th pattern "part".
Definition: messagepattern.h:633
UMSGPAT_PART_TYPE_ARG_DOUBLE
@ UMSGPAT_PART_TYPE_ARG_DOUBLE
A numeric value, for example the offset or an explicit selector value in a PluralFormat style.
Definition: messagepattern.h:202
icu::operator==
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
U_COMMON_API
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:357
UMSGPAT_ARG_NAME_NOT_VALID
@ UMSGPAT_ARG_NAME_NOT_VALID
Return value from MessagePattern.validateArgumentName() for when the string is invalid.
Definition: messagepattern.h:286
icu::MessagePattern::getApostropheMode
UMessagePatternApostropheMode getApostropheMode() const
Definition: messagepattern.h:546
icu::MessagePattern::MessagePattern
MessagePattern(const MessagePattern &other)
Copy constructor.
icu::MessagePattern::clearPatternAndSetApostropheMode
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)
Clears this MessagePattern and sets the UMessagePatternApostropheMode.
Definition: messagepattern.h:515
UMessagePatternArgType
UMessagePatternArgType
Argument type constants.
Definition: messagepattern.h:217
icu::MessagePattern::getPart
const Part & getPart(int32_t i) const
Gets the i-th pattern "part".
Definition: messagepattern.h:622
icu::MessagePattern::validateArgumentName
static int32_t validateArgumentName(const UnicodeString &name)
Validates and parses an argument name or argument number string.
icu::MessagePattern::operator==
UBool operator==(const MessagePattern &other) const
icu::MessagePattern::Part::getValue
int32_t getValue() const
Returns a value associated with this part.
Definition: messagepattern.h:761
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
icu::MessagePattern
Parses and represents ICU MessageFormat patterns.
Definition: messagepattern.h:358
UMSGPAT_APOS_DOUBLE_REQUIRED
@ UMSGPAT_APOS_DOUBLE_REQUIRED
A literal apostrophe must be represented by a double apostrophe pattern character.
Definition: messagepattern.h:87
icu::MessagePattern::operator=
MessagePattern & operator=(const MessagePattern &other)
Assignment operator.
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
icu::MessagePattern::getSubstring
UnicodeString getSubstring(const Part &part) const
Returns the substring of the pattern string indicated by the Part.
Definition: messagepattern.h:655
icu::MessagePattern::MessagePattern
MessagePattern(UErrorCode &errorCode)
Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
UMSGPAT_PART_TYPE_ARG_SELECTOR
@ UMSGPAT_PART_TYPE_ARG_SELECTOR
A selector substring in a "complex" argument style.
Definition: messagepattern.h:187
UChar32
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
UMessagePatternApostropheMode
UMessagePatternApostropheMode
Mode for when an apostrophe starts quoted literal text for MessageFormat output.
Definition: messagepattern.h:66
icu::UObject
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:476
icu::MessagePattern::getPatternString
const UnicodeString & getPatternString() const
Definition: messagepattern.h:557
icu::MessagePattern::MessagePattern
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
Constructs an empty MessagePattern.
UMSGPAT_PART_TYPE_SKIP_SYNTAX
@ UMSGPAT_PART_TYPE_SKIP_SYNTAX
Indicates a substring of the pattern string which is to be skipped when formatting.
Definition: messagepattern.h:125
icu::MessagePattern::Part::hashCode
int32_t hashCode() const
Definition: messagepattern.h:811
UMSGPAT_ARG_TYPE_SIMPLE
@ UMSGPAT_ARG_TYPE_SIMPLE
The argument has a "simple" type which is provided by the ARG_TYPE part.
Definition: messagepattern.h:228
icu::MessagePattern::hasNumberedArguments
UBool hasNumberedArguments() const
Does the parsed pattern have numbered arguments like {2}?
Definition: messagepattern.h:575
icu::MessagePattern::Part
A message pattern "part", representing a pattern parsing event.
Definition: messagepattern.h:709
icu::UMemory
UMemory is the common ICU base class.
Definition: uobject.h:110
icu::MessagePattern::parseSelectStyle
MessagePattern & parseSelectStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a SelectFormat pattern string.
UMSGPAT_APOS_DOUBLE_OPTIONAL
@ UMSGPAT_APOS_DOUBLE_OPTIONAL
A literal apostrophe is represented by either a single or a double apostrophe pattern character.
Definition: messagepattern.h:78
UMSGPAT_PART_TYPE_ARG_TYPE
@ UMSGPAT_PART_TYPE_ARG_TYPE
The argument type.
Definition: messagepattern.h:175
UMSGPAT_PART_TYPE_MSG_START
@ UMSGPAT_PART_TYPE_MSG_START
Start of a message pattern (main or nested).
Definition: messagepattern.h:108
icu::MessagePattern::parsePluralStyle
MessagePattern & parsePluralStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a PluralFormat pattern string.
UMSGPAT_PART_TYPE_ARG_LIMIT
@ UMSGPAT_PART_TYPE_ARG_LIMIT
End of an argument.
Definition: messagepattern.h:158
icu::MessagePattern::Part::getLimit
int32_t getLimit() const
Returns the pattern string limit (exclusive-end) index associated with this Part.
Definition: messagepattern.h:751
icu::MessagePattern::Part::getType
UMessagePatternPartType getType() const
Returns the type of this part.
Definition: messagepattern.h:722
UMSGPAT_ARG_TYPE_SELECT
@ UMSGPAT_ARG_TYPE_SELECT
The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
Definition: messagepattern.h:249
icu::MessagePattern::Part::getArgType
UMessagePatternArgType getArgType() const
Returns the argument type if this part is of type ARG_START or ARG_LIMIT, otherwise UMSGPAT_ARG_TYPE_...
Definition: messagepattern.h:771
icu::MessagePattern::~MessagePattern
virtual ~MessagePattern()
Destructor.
UMSGPAT_PART_TYPE_MSG_LIMIT
@ UMSGPAT_PART_TYPE_MSG_LIMIT
End of a message pattern (main or nested).
Definition: messagepattern.h:117
UMSGPAT_ARG_TYPE_PLURAL
@ UMSGPAT_ARG_TYPE_PLURAL
The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset (e....
Definition: messagepattern.h:244
icu::MessagePattern::hasNamedArguments
UBool hasNamedArguments() const
Does the parsed pattern have named arguments like {first_name}?
Definition: messagepattern.h:566
icu::MessagePattern::Part::getIndex
int32_t getIndex() const
Returns the pattern string index associated with this Part.
Definition: messagepattern.h:731
icu::MessagePattern::getNumericValue
double getNumericValue(const Part &part) const
Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
icu::MessagePattern::parse
MessagePattern & parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a MessageFormat pattern string.
icu::MessagePattern::Part::operator!=
UBool operator!=(const Part &other) const
Definition: messagepattern.h:803
UMSGPAT_PART_TYPE_REPLACE_NUMBER
@ UMSGPAT_PART_TYPE_REPLACE_NUMBER
Indicates a syntactic (non-escaped) # symbol in a plural variant.
Definition: messagepattern.h:140
UMSGPAT_PART_TYPE_ARG_INT
@ UMSGPAT_PART_TYPE_ARG_INT
An integer value, for example the offset or an explicit selector value in a PluralFormat style.
Definition: messagepattern.h:194
icu::UnicodeString::tempSubString
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
UMSGPAT_PART_TYPE_ARG_NAME
@ UMSGPAT_PART_TYPE_ARG_NAME
The argument name.
Definition: messagepattern.h:169
UMSGPAT_PART_TYPE_INSERT_CHAR
@ UMSGPAT_PART_TYPE_INSERT_CHAR
Indicates that a syntax character needs to be inserted for auto-quoting.
Definition: messagepattern.h:132
UMSGPAT_ARG_TYPE_NONE
@ UMSGPAT_ARG_TYPE_NONE
The argument has no specified type.
Definition: messagepattern.h:222
UMessagePatternPartType
UMessagePatternPartType
MessagePattern::Part type constants.
Definition: messagepattern.h:98
UMSGPAT_PART_TYPE_ARG_STYLE
@ UMSGPAT_PART_TYPE_ARG_STYLE
The argument style text.
Definition: messagepattern.h:181
icu::MessagePattern::getPatternIndex
int32_t getPatternIndex(int32_t partIndex) const
Returns the pattern index of the specified pattern "part".
Definition: messagepattern.h:644
icu::MessagePattern::autoQuoteApostropheDeep
UnicodeString autoQuoteApostropheDeep() const
Returns a version of the parsed pattern string where each ASCII apostrophe is doubled (escaped) if it...
UMSGPAT_ARG_TYPE_SELECTORDINAL
@ UMSGPAT_ARG_TYPE_SELECTORDINAL
The argument is an ordinal-number PluralFormat with the same style parts sequence and semantics as UM...
Definition: messagepattern.h:255
UMSGPAT_ARG_NAME_NOT_NUMBER
@ UMSGPAT_ARG_NAME_NOT_NUMBER
Return value from MessagePattern.validateArgumentName() for when the string is a valid "pattern ident...
Definition: messagepattern.h:277
icu::MessagePattern::hashCode
int32_t hashCode() const
UMSGPAT_PART_TYPE_ARG_NUMBER
@ UMSGPAT_PART_TYPE_ARG_NUMBER
The argument number, provided by the value.
Definition: messagepattern.h:163
icu::MessagePattern::operator!=
UBool operator!=(const MessagePattern &other) const
Definition: messagepattern.h:532
icu::MessagePattern::getPluralOffset
double getPluralOffset(int32_t pluralStart) const
Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
U_NAMESPACE_END
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
icu::MessagePattern::parseChoiceStyle
MessagePattern & parseChoiceStyle(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
Parses a ChoiceFormat pattern string.
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
icu::MessagePattern::Part::Part
Part()
Default constructor, do not use.
Definition: messagepattern.h:715
icu::MessagePattern::getLimitPartIndex
int32_t getLimitPartIndex(int32_t start) const
Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
Definition: messagepattern.h:694
UMSGPAT_PART_TYPE_ARG_START
@ UMSGPAT_PART_TYPE_ARG_START
Start of an argument.
Definition: messagepattern.h:151
icu::MessagePattern::Part::getLength
int32_t getLength() const
Returns the length of the pattern substring associated with this Part.
Definition: messagepattern.h:741
unistr.h
C++ API: Unicode String.
UMSGPAT_ARG_TYPE_CHOICE
@ UMSGPAT_ARG_TYPE_CHOICE
The argument is a ChoiceFormat with one or more ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR,...
Definition: messagepattern.h:234
icu::MessagePattern::clear
void clear()
Clears this MessagePattern.