susumu.yata
null+****@clear*****
Sun Jan 20 12:40:11 JST 2013
susumu.yata 2013-01-20 12:40:11 +0900 (Sun, 20 Jan 2013) New Revision: 78ad0c35d5310f57647495324be05b8a7e69c40b https://github.com/groonga/grnxx/commit/78ad0c35d5310f57647495324be05b8a7e69c40b Log: Add reserve_node(). (DoubleArray) Modified files: lib/alpha/double_array.cpp lib/alpha/double_array.hpp Modified: lib/alpha/double_array.cpp (+144 -4) =================================================================== --- lib/alpha/double_array.cpp 2013-01-18 15:19:24 +0900 (7c42780) +++ lib/alpha/double_array.cpp 2013-01-20 12:40:11 +0900 (ca6d4f5) @@ -33,13 +33,20 @@ DoubleArrayHeader::DoubleArrayHeader() entries_block_id_(io::BLOCK_INVALID_ID), keys_block_id_(io::BLOCK_INVALID_ID), root_node_id_(0), - inter_process_mutex_() {} + num_chunks_(0), + num_phantoms_(0), + leaders_(), + inter_process_mutex_() { + for (uint32_t i = 0; i < DOUBLE_ARRAY_MAX_CHUNK_LEVEL; ++i) { + leaders_[i] = DOUBLE_ARRAY_INVALID_LEADER; + } +} DoubleArrayKey::DoubleArrayKey(uint64_t id, const char *address, uint64_t length) : id_low_(static_cast<uint32_t>(id)), id_high_(static_cast<uint8_t>(id >> 32)), - buf_() { + buf_{ '\0', '\0', '\0' } { std::memcpy(buf_, address, length); } @@ -100,7 +107,7 @@ bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length, if (node.key_length() == length) { if (get_key(node.key_offset()).equals_to(ptr, length, query_pos)) { - if (key_offset != NULL) { + if (key_offset) { *key_offset = node.key_offset(); } return true; @@ -140,7 +147,8 @@ void DoubleArrayImpl::create_double_array(io::Pool pool) { keys_.create(pool); header_->set_keys_block_id(keys_.block_id()); - // TODO + reserve_node(root_node_id()); + nodes_[DOUBLE_ARRAY_INVALID_OFFSET].set_is_origin(true); initialized_ = true; } @@ -193,5 +201,137 @@ bool DoubleArrayImpl::search_leaf(const uint8_t *ptr, uint64_t length, return nodes_[next].is_leaf(); } +void DoubleArrayImpl::reserve_node(uint64_t node_id) { + if (node_id >= header_->num_nodes()) { +// reserve_block(node_id / DOUBLE_ARRAY_BLOCK_SIZE); + } + + DoubleArrayNode &node = nodes_[node_id]; +// GRN_DAT_DEBUG_THROW_IF(!node.is_phantom()); + + const uint64_t chunk_id = node_id / DOUBLE_ARRAY_CHUNK_SIZE; + DoubleArrayChunk &chunk = chunks_[chunk_id]; +// GRN_DAT_DEBUG_THROW_IF(chunk.num_phantoms() == 0); + + const uint64_t next = (chunk_id * DOUBLE_ARRAY_CHUNK_SIZE) | node.next(); + const uint64_t prev = (chunk_id * DOUBLE_ARRAY_CHUNK_SIZE) | node.prev(); +// GRN_DAT_DEBUG_THROW_IF(next >= header_->num_nodes()); +// GRN_DAT_DEBUG_THROW_IF(prev >= header_->num_nodes()); + + if ((node_id & DOUBLE_ARRAY_CHUNK_MASK) == chunk.first_phantom()) { + // The first phantom node is removed from the chunk and the second phantom + // node comes first. + chunk.set_first_phantom(next & DOUBLE_ARRAY_CHUNK_MASK); + } + + nodes_[next].set_prev(prev & DOUBLE_ARRAY_CHUNK_MASK); + nodes_[prev].set_next(next & DOUBLE_ARRAY_CHUNK_MASK); + + if (chunk.level() != DOUBLE_ARRAY_MAX_CHUNK_LEVEL) { + const uint64_t threshold = + uint64_t(1) << ((DOUBLE_ARRAY_MAX_CHUNK_LEVEL - chunk.level() - 1) * 2); + if (chunk.num_phantoms() == threshold) { +// update_chunk_level(chunk_id, chunk.level() + 1); + } + } + chunk.set_num_phantoms(chunk.num_phantoms() - 1); + + node.set_is_phantom(false); + +// GRN_DAT_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET); +// GRN_DAT_DEBUG_THROW_IF(node.label() != INVALID_LABEL); + + header_->set_num_phantoms(header_->num_phantoms() - 1); +} + +void DoubleArrayImpl::reserve_chunk(uint64_t chunk_id) { +// GRN_DAT_DEBUG_THROW_IF(chunk_id != num_chunks()); + // TODO +// GRN_DAT_THROW_IF(SIZE_ERROR, chunk_id >= max_num_chunks()); + + header_->set_num_chunks(chunk_id + 1); + chunks_[chunk_id].set_failure_count(0); + chunks_[chunk_id].set_first_phantom(0); + chunks_[chunk_id].set_num_phantoms(DOUBLE_ARRAY_CHUNK_SIZE); + + const uint64_t begin = chunk_id * DOUBLE_ARRAY_CHUNK_SIZE; + const uint64_t end = begin + DOUBLE_ARRAY_CHUNK_SIZE; +// GRN_DAT_DEBUG_THROW_IF(end != num_nodes()); + + DoubleArrayNode node; + node.set_offset(DOUBLE_ARRAY_INVALID_OFFSET); + node.set_is_phantom(true); + + for (uint64_t i = begin; i < end; ++i) { + node.set_prev((i - 1) & DOUBLE_ARRAY_CHUNK_MASK); + node.set_next((i + 1) & DOUBLE_ARRAY_CHUNK_MASK); + nodes_[i] = node; + } + + // The level of the new chunk is 0. + set_chunk_level(chunk_id, 0); + header_->set_num_phantoms(header_->num_phantoms() + DOUBLE_ARRAY_CHUNK_SIZE); +} + +void DoubleArrayImpl::update_chunk_level(uint64_t chunk_id, uint32_t level) { +// GRN_DAT_DEBUG_THROW_IF(chunk_id >= num_chunks()); +// GRN_DAT_DEBUG_THROW_IF(level > DOUBLE_ARRAY_MAX_CHUNK_LEVEL); + + unset_chunk_level(chunk_id); + set_chunk_level(chunk_id, level); +} + +void DoubleArrayImpl::set_chunk_level(uint64_t chunk_id, uint32_t level) { +// GRN_DAT_DEBUG_THROW_IF(chunk_id >= num_chunks()); +// GRN_DAT_DEBUG_THROW_IF(level > DOUBLE_ARRAY_MAX_CHUNK_LEVEL); + + const uint64_t leader = header_->ith_leader(level); + if (leader == DOUBLE_ARRAY_INVALID_LEADER) { + // The chunk becomes the only one member of the level group. + chunks_[chunk_id].set_next(chunk_id); + chunks_[chunk_id].set_prev(chunk_id); + header_->set_ith_leader(level, chunk_id); + } else { + // The chunk is appended to the level group, in practice. + const uint64_t next = leader; + const uint64_t prev = chunks_[leader].prev(); +// GRN_DAT_DEBUG_THROW_IF(next >= num_chunks()); +// GRN_DAT_DEBUG_THROW_IF(prev >= num_chunks()); + chunks_[chunk_id].set_next(next); + chunks_[chunk_id].set_prev(prev); + chunks_[next].set_prev(chunk_id); + chunks_[prev].set_next(chunk_id); + } + chunks_[chunk_id].set_level(level); + chunks_[chunk_id].set_failure_count(0); +} + +void DoubleArrayImpl::unset_chunk_level(uint64_t chunk_id) { +// GRN_DAT_DEBUG_THROW_IF(chunk_id >= num_chunk()); + + const uint32_t level = chunks_[chunk_id].level(); +// GRN_DAT_DEBUG_THROW_IF(level > DOUBLE_ARRAY_MAX_CHUNK_LEVEL); + + const uint64_t leader = header_->ith_leader(level); +// GRN_DAT_DEBUG_THROW_IF(leader == DOUBLE_ARRAY_INVALID_LEADER); + + const uint64_t next = chunks_[chunk_id].next(); + const uint64_t prev = chunks_[chunk_id].prev(); +// GRN_DAT_DEBUG_THROW_IF(next >= num_chunks()); +// GRN_DAT_DEBUG_THROW_IF(prev >= num_chunks()); + + if (next == chunk_id) { + // The level group becomes empty. + header_->set_ith_leader(level, DOUBLE_ARRAY_INVALID_LEADER); + } else { + chunks_[next].set_prev(prev); + chunks_[prev].set_next(next); + if (chunk_id == leader) { + // The second chunk becomes the leader of the level group. + header_->set_ith_leader(level, next); + } + } +} + } // namespace alpha } // namespace grnxx Modified: lib/alpha/double_array.hpp (+83 -4) =================================================================== --- lib/alpha/double_array.hpp 2013-01-18 15:19:24 +0900 (306a364) +++ lib/alpha/double_array.hpp 2013-01-20 12:40:11 +0900 (a8b5041) @@ -28,6 +28,29 @@ using namespace grnxx::db; extern struct DoubleArrayCreate {} DOUBLE_ARRAY_CREATE; extern struct DoubleArrayOpen {} DOUBLE_ARRAY_OPEN; +constexpr uint64_t DOUBLE_ARRAY_INVALID_ID = 0xFFFFFFFFFFULL; +constexpr uint64_t DOUBLE_ARRAY_INVALID_OFFSET = 0; + +constexpr uint64_t DOUBLE_ARRAY_CHUNK_SIZE = 0x200; +constexpr uint64_t DOUBLE_ARRAY_CHUNK_MASK = 0x1FF; + +// Chunks are grouped by the level which indicates how easily update operations +// can find a good offset in that chunk. The chunk level rises when +// find_offset() fails in that chunk many times. DOUBLE_ARRAY_MAX_FAILURE_COUNT +// is the threshold. Also, in order to limit the time cost, find_offset() scans +// at most DOUBLE_ARRAY_MAX_CHUNk_COUNT chunks. +// Larger parameters bring more chances of finding good offsets but it leads to +// more node renumberings, which are costly operations, and thus results in +// a degradation of space/time efficiencies. +constexpr uint64_t DOUBLE_ARRAY_MAX_FAILURE_COUNT = 4; +constexpr uint64_t DOUBLE_ARRAY_MAX_CHUNK_COUNT = 16; +constexpr uint64_t DOUBLE_ARRAY_MAX_CHUNK_LEVEL = 5; + +// Chunks in the same level compose a doubly linked list. The entry chunk of +// a linked list is called a leader. DOUBLE_ARRAY_INVALID_LEADER means that +// the linked list is empty and there exists no leader. +constexpr uint64_t DOUBLE_ARRAY_INVALID_LEADER = 0x7FFFFFFF; + class DoubleArrayString { public: DoubleArrayString() : ptr_(nullptr), length_(0) {} @@ -137,6 +160,19 @@ class DoubleArrayHeader { uint64_t root_node_id() const { return root_node_id_; } + uint64_t num_chunks() const { + return num_chunks_; + } + uint64_t num_nodes() const { + return num_chunks_ * DOUBLE_ARRAY_CHUNK_SIZE; + } + uint64_t num_phantoms() const { + return num_phantoms_; + } + uint64_t ith_leader(uint64_t i) const { +// GRN_DAT_DEBUG_THROW_IF(i > DOUBLE_ARRAY_MAX_CHUNK_LEVEL); + return leaders_[i]; + } void set_nodes_block_id(uint32_t value) { nodes_block_id_ = value; @@ -153,6 +189,17 @@ class DoubleArrayHeader { void set_root_node_id(uint64_t value) { root_node_id_ = value; } + void set_num_chunks(uint64_t value) { + num_chunks_ = value; + } + void set_num_phantoms(uint64_t value) { + num_phantoms_ = value; + } + void set_ith_leader(uint64_t i, uint64_t x) { +// GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL); +// GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks())); + leaders_[i] = x; + } Mutex *mutable_inter_process_mutex() { return &inter_process_mutex_; @@ -164,6 +211,9 @@ class DoubleArrayHeader { uint32_t entries_block_id_; uint32_t keys_block_id_; uint64_t root_node_id_; + uint64_t num_chunks_; + uint64_t num_phantoms_; + uint64_t leaders_[DOUBLE_ARRAY_MAX_CHUNK_LEVEL + 1]; Mutex inter_process_mutex_; }; @@ -344,7 +394,7 @@ class DoubleArrayChunk { qwords_[1] = (qwords_[1] & ~UPPER_MASK) | (value << UPPER_SHIFT); } - // The chunk level indicates how easily nodes can be put in this block. + // The chunk level indicates how easily nodes can be put in this chunk. uint64_t level() const { // 10 bits. return (qwords_[0] & MIDDLE_MASK) >> MIDDLE_SHIFT; @@ -361,7 +411,7 @@ class DoubleArrayChunk { qwords_[1] = (qwords_[1] & ~MIDDLE_MASK) | (value << MIDDLE_SHIFT); } - // The first phantom node and the number of phantom nodes in this block. + // The first phantom node and the number of phantom nodes in this chunk. uint64_t first_phantom() const { // 10 bits. return (qwords_[0] & LOWER_MASK) >> LOWER_SHIFT; @@ -397,7 +447,7 @@ class DoubleArrayEntry { DoubleArrayEntry() : qword_(0) {} // This entry is associated with a key (true) or not (false). - bool is_valid() const { + explicit operator bool() const { return qword_ & IS_VALID_FLAG; } @@ -435,6 +485,11 @@ class DoubleArrayKey { public: DoubleArrayKey(uint64_t id, const char *address, uint64_t length); + explicit operator bool() const { + // FIXME: Magic number. + return id() != DOUBLE_ARRAY_INVALID_ID; + } + uint64_t id() const { return id_low_ | (static_cast<uint64_t>(id_high_) << 32); } @@ -451,6 +506,12 @@ class DoubleArrayKey { return true; } + static const DoubleArrayKey &invalid_key() { + static const DoubleArrayKey invalid_key( + DOUBLE_ARRAY_INVALID_ID, nullptr, 0); + return invalid_key; + } + static uint64_t estimate_size(uint64_t length) { return 2 + (length / sizeof(uint32_t)); } @@ -470,11 +531,22 @@ class DoubleArrayImpl { static std::unique_ptr<DoubleArrayImpl> open(io::Pool pool, uint32_t block_id); - bool search(const uint8_t *ptr, uint64_t length, uint64_t *key_offset); + bool search(const uint8_t *ptr, uint64_t length, + uint64_t *key_offset = nullptr); + + // TODO + bool insert(const uint8_t *ptr, uint64_t length, + uint64_t *key_offset = nullptr); const DoubleArrayKey &get_key(uint64_t key_offset) { return *reinterpret_cast<const DoubleArrayKey *>(&keys_[key_offset]); } + const DoubleArrayKey &ith_key(uint64_t key_id) { + if (entries_[key_id]) { + return get_key(entries_[key_id].key_offset()); + } + return DoubleArrayKey::invalid_key(); + } uint32_t block_id() const { return block_info_->id(); @@ -503,6 +575,13 @@ class DoubleArrayImpl { bool search_leaf(const uint8_t *ptr, uint64_t length, uint64_t &node_id, uint64_t &query_pos); + + void reserve_node(uint64_t node_id); + void reserve_chunk(uint64_t chunk_id); + + void update_chunk_level(uint64_t chunk_id, uint32_t level); + void set_chunk_level(uint64_t chunk_id, uint32_t level); + void unset_chunk_level(uint64_t chunk_id); }; // TODO -------------- next part -------------- HTML����������������������������...Télécharger