17 #ifndef __BYTESTRIE_H__ 
   18 #define __BYTESTRIE_H__ 
   27 #if U_SHOW_CPLUSPLUS_API 
   38 class BytesTrieBuilder;
 
   72             : ownedArray_(nullptr), bytes_(static_cast<const uint8_t *>(trieBytes)),
 
   73               pos_(bytes_), remainingMatchLength_(-1) {}
 
   88             : ownedArray_(nullptr), bytes_(other.bytes_),
 
   89               pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
 
   98         remainingMatchLength_=-1;
 
  111         return (
static_cast<uint64_t
>(remainingMatchLength_ + 2) << kState64RemainingShift) |
 
  112             (uint64_t)(pos_ - bytes_);
 
  130         remainingMatchLength_ = 
static_cast<int32_t
>(state >> kState64RemainingShift) - 2;
 
  131         pos_ = bytes_ + (state & kState64PosMask);
 
  150         const uint8_t *bytes;
 
  152         int32_t remainingMatchLength;
 
  165         state.remainingMatchLength=remainingMatchLength_;
 
  180         if(bytes_==state.bytes && bytes_!=
nullptr) {
 
  182             remainingMatchLength_=state.remainingMatchLength;
 
  204         remainingMatchLength_=-1;
 
  208         return nextImpl(bytes_, inByte);
 
  247         const uint8_t *pos=pos_;
 
  248         int32_t leadByte=*pos++;
 
  250         return readValue(pos, leadByte>>1);
 
  263         const uint8_t *pos=pos_;
 
  265         return pos!=
nullptr && findUniqueValue(pos+remainingMatchLength_+1, 
false, uniqueValue);
 
  357         UBool truncateAndStop();
 
  359         const uint8_t *branchNext(
const uint8_t *pos, int32_t length, 
UErrorCode &errorCode);
 
  361         const uint8_t *bytes_;
 
  363         const uint8_t *initialPos_;
 
  364         int32_t remainingMatchLength_;
 
  365         int32_t initialRemainingMatchLength_;
 
  383     friend class ::BytesTrieTest;
 
  391     BytesTrie(
void *adoptBytes, 
const void *trieBytes)
 
  392             : ownedArray_(static_cast<uint8_t *>(adoptBytes)),
 
  393               bytes_(static_cast<const uint8_t *>(trieBytes)),
 
  394               pos_(bytes_), remainingMatchLength_(-1) {}
 
  397     BytesTrie &operator=(
const BytesTrie &other) = 
delete;
 
  405     static int32_t readValue(
const uint8_t *pos, int32_t leadByte);
 
  406     static inline const uint8_t *skipValue(
const uint8_t *pos, int32_t leadByte) {
 
  408         if(leadByte>=(kMinTwoByteValueLead<<1)) {
 
  409             if(leadByte<(kMinThreeByteValueLead<<1)) {
 
  411             } 
else if(leadByte<(kFourByteValueLead<<1)) {
 
  414                 pos+=3+((leadByte>>1)&1);
 
  419     static inline const uint8_t *skipValue(
const uint8_t *pos) {
 
  420         int32_t leadByte=*pos++;
 
  421         return skipValue(pos, leadByte);
 
  425     static const uint8_t *jumpByDelta(
const uint8_t *pos);
 
  427     static inline const uint8_t *skipDelta(
const uint8_t *pos) {
 
  428         int32_t delta=*pos++;
 
  429         if(delta>=kMinTwoByteDeltaLead) {
 
  430             if(delta<kMinThreeByteDeltaLead) {
 
  432             } 
else if(delta<kFourByteDeltaLead) {
 
  446     UStringTrieResult branchNext(
const uint8_t *pos, int32_t length, int32_t inByte);
 
  454     static const uint8_t *findUniqueValueFromBranch(
const uint8_t *pos, int32_t length,
 
  455                                                     UBool haveUniqueValue, int32_t &uniqueValue);
 
  458     static UBool findUniqueValue(
const uint8_t *pos, 
UBool haveUniqueValue, int32_t &uniqueValue);
 
  462     static void getNextBranchBytes(
const uint8_t *pos, int32_t length, ByteSink &out);
 
  463     static void append(ByteSink &out, 
int c);
 
  504     static const int32_t kMaxBranchLinearSubNodeLength=5;
 
  507     static const int32_t kMinLinearMatch=0x10;
 
  508     static const int32_t kMaxLinearMatchLength=0x10;
 
  515     static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  
 
  517     static const int32_t kValueIsFinal=1;
 
  520     static const int32_t kMinOneByteValueLead=kMinValueLead/2;  
 
  521     static const int32_t kMaxOneByteValue=0x40;  
 
  523     static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  
 
  524     static const int32_t kMaxTwoByteValue=0x1aff;
 
  526     static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  
 
  527     static const int32_t kFourByteValueLead=0x7e;
 
  530     static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
 
  532     static const int32_t kFiveByteValueLead=0x7f;
 
  535     static const int32_t kMaxOneByteDelta=0xbf;
 
  536     static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  
 
  537     static const int32_t kMinThreeByteDeltaLead=0xf0;
 
  538     static const int32_t kFourByteDeltaLead=0xfe;
 
  539     static const int32_t kFiveByteDeltaLead=0xff;
 
  541     static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  
 
  542     static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  
 
  548     static constexpr int32_t kState64RemainingShift = 59;
 
  549     static constexpr uint64_t kState64PosMask = (
UINT64_C(1) << kState64RemainingShift) - 1;
 
  551     uint8_t *ownedArray_;
 
  554     const uint8_t *bytes_;
 
  561     int32_t remainingMatchLength_;
 
A ByteSink can be filled with bytes.
Builder class for BytesTrie.
Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
StringPiece getString() const
Iterator & reset()
Resets this iterator to its initial state.
Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode)
Iterates from the root of a byte-serialized BytesTrie.
UBool next(UErrorCode &errorCode)
Finds the next (byte sequence, value) pair if there is one.
Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode)
Iterates from the current state of the specified BytesTrie.
BytesTrie state object, for saving a trie's current state and resetting the trie back to this state l...
State()
Constructs an empty State.
Light-weight, non-const reader class for a BytesTrie.
BytesTrie & resetToState64(uint64_t state)
Resets this trie to the saved state.
int32_t getValue() const
Returns a matching byte sequence's value if called immediately after current()/first()/next() returne...
UStringTrieResult current() const
Determines whether the byte sequence so far matches, whether it has a value, and whether another inpu...
UStringTrieResult next(const char *s, int32_t length)
Traverses the trie from the current state for this byte sequence.
UStringTrieResult first(int32_t inByte)
Traverses the trie from the initial state for this input byte.
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all byte sequences reachable from the current state map to the same value.
BytesTrie & resetToState(const State &state)
Resets this trie to the saved state.
BytesTrie & reset()
Resets this trie to its initial state.
BytesTrie(const BytesTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the byte array which wil...
int32_t getNextBytes(ByteSink &out) const
Finds each byte which continues the byte sequence from the current state.
const BytesTrie & saveState(State &state) const
Saves the state of this trie.
uint64_t getState64() const
Returns the state of this trie as a 64-bit integer.
BytesTrie(const void *trieBytes)
Constructs a BytesTrie reader instance.
UStringTrieResult next(int32_t inByte)
Traverses the trie from the current state for this input byte.
A string-like object that points to a sized piece of memory.
UMemory is the common ICU base class.
C++ API: StringPiece: Read-only byte string wrapper class.
#define UINT64_C(c)
Provides a platform independent way to specify an unsigned 64-bit integer constant.
int8_t UBool
The ICU boolean type, a signed-byte integer.
C++ API: Common ICU base class UObject.
C API: Helper definitions for dictionary trie APIs.
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
@ USTRINGTRIE_INTERMEDIATE_VALUE
The input unit(s) continued a matching string and there is a value for the string so far.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.