[Groonga-commit] groonga/grnxx [master] Add reserve_node(). (DoubleArray)

Back to archive index

susumu.yata null+****@clear*****
Sun Jan 20 12:40:11 JST 2013


susumu.yata	2013-01-20 12:40:11 +0900 (Sun, 20 Jan 2013)

  New Revision: 78ad0c35d5310f57647495324be05b8a7e69c40b
  https://github.com/groonga/grnxx/commit/78ad0c35d5310f57647495324be05b8a7e69c40b

  Log:
    Add reserve_node(). (DoubleArray)

  Modified files:
    lib/alpha/double_array.cpp
    lib/alpha/double_array.hpp

  Modified: lib/alpha/double_array.cpp (+144 -4)
===================================================================
--- lib/alpha/double_array.cpp    2013-01-18 15:19:24 +0900 (7c42780)
+++ lib/alpha/double_array.cpp    2013-01-20 12:40:11 +0900 (ca6d4f5)
@@ -33,13 +33,20 @@ DoubleArrayHeader::DoubleArrayHeader()
     entries_block_id_(io::BLOCK_INVALID_ID),
     keys_block_id_(io::BLOCK_INVALID_ID),
     root_node_id_(0),
-    inter_process_mutex_() {}
+    num_chunks_(0),
+    num_phantoms_(0),
+    leaders_(),
+    inter_process_mutex_() {
+  for (uint32_t i = 0; i < DOUBLE_ARRAY_MAX_CHUNK_LEVEL; ++i) {
+    leaders_[i] = DOUBLE_ARRAY_INVALID_LEADER;
+  }
+}
 
 DoubleArrayKey::DoubleArrayKey(uint64_t id, const char *address,
                                uint64_t length)
   : id_low_(static_cast<uint32_t>(id)),
     id_high_(static_cast<uint8_t>(id >> 32)),
-    buf_() {
+    buf_{ '\0', '\0', '\0' } {
   std::memcpy(buf_, address, length);
 }
 
@@ -100,7 +107,7 @@ bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length,
 
   if (node.key_length() == length) {
     if (get_key(node.key_offset()).equals_to(ptr, length, query_pos)) {
-      if (key_offset != NULL) {
+      if (key_offset) {
         *key_offset = node.key_offset();
       }
       return true;
@@ -140,7 +147,8 @@ void DoubleArrayImpl::create_double_array(io::Pool pool) {
   keys_.create(pool);
   header_->set_keys_block_id(keys_.block_id());
 
-  // TODO
+  reserve_node(root_node_id());
+  nodes_[DOUBLE_ARRAY_INVALID_OFFSET].set_is_origin(true);
 
   initialized_ = true;
 }
@@ -193,5 +201,137 @@ bool DoubleArrayImpl::search_leaf(const uint8_t *ptr, uint64_t length,
   return nodes_[next].is_leaf();
 }
 
+void DoubleArrayImpl::reserve_node(uint64_t node_id) {
+  if (node_id >= header_->num_nodes()) {
+//    reserve_block(node_id / DOUBLE_ARRAY_BLOCK_SIZE);
+  }
+
+  DoubleArrayNode &node = nodes_[node_id];
+//  GRN_DAT_DEBUG_THROW_IF(!node.is_phantom());
+
+  const uint64_t chunk_id = node_id / DOUBLE_ARRAY_CHUNK_SIZE;
+  DoubleArrayChunk &chunk = chunks_[chunk_id];
+//  GRN_DAT_DEBUG_THROW_IF(chunk.num_phantoms() == 0);
+
+  const uint64_t next = (chunk_id * DOUBLE_ARRAY_CHUNK_SIZE) | node.next();
+  const uint64_t prev = (chunk_id * DOUBLE_ARRAY_CHUNK_SIZE) | node.prev();
+//  GRN_DAT_DEBUG_THROW_IF(next >= header_->num_nodes());
+//  GRN_DAT_DEBUG_THROW_IF(prev >= header_->num_nodes());
+
+  if ((node_id & DOUBLE_ARRAY_CHUNK_MASK) == chunk.first_phantom()) {
+    // The first phantom node is removed from the chunk and the second phantom
+    // node comes first.
+    chunk.set_first_phantom(next & DOUBLE_ARRAY_CHUNK_MASK);
+  }
+
+  nodes_[next].set_prev(prev & DOUBLE_ARRAY_CHUNK_MASK);
+  nodes_[prev].set_next(next & DOUBLE_ARRAY_CHUNK_MASK);
+
+  if (chunk.level() != DOUBLE_ARRAY_MAX_CHUNK_LEVEL) {
+    const uint64_t threshold =
+        uint64_t(1) << ((DOUBLE_ARRAY_MAX_CHUNK_LEVEL - chunk.level() - 1) * 2);
+    if (chunk.num_phantoms() == threshold) {
+//      update_chunk_level(chunk_id, chunk.level() + 1);
+    }
+  }
+  chunk.set_num_phantoms(chunk.num_phantoms() - 1);
+
+  node.set_is_phantom(false);
+
+//  GRN_DAT_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET);
+//  GRN_DAT_DEBUG_THROW_IF(node.label() != INVALID_LABEL);
+
+  header_->set_num_phantoms(header_->num_phantoms() - 1);
+}
+
+void DoubleArrayImpl::reserve_chunk(uint64_t chunk_id) {
+//  GRN_DAT_DEBUG_THROW_IF(chunk_id != num_chunks());
+  // TODO
+//  GRN_DAT_THROW_IF(SIZE_ERROR, chunk_id >= max_num_chunks());
+
+  header_->set_num_chunks(chunk_id + 1);
+  chunks_[chunk_id].set_failure_count(0);
+  chunks_[chunk_id].set_first_phantom(0);
+  chunks_[chunk_id].set_num_phantoms(DOUBLE_ARRAY_CHUNK_SIZE);
+
+  const uint64_t begin = chunk_id * DOUBLE_ARRAY_CHUNK_SIZE;
+  const uint64_t end = begin + DOUBLE_ARRAY_CHUNK_SIZE;
+//  GRN_DAT_DEBUG_THROW_IF(end != num_nodes());
+
+  DoubleArrayNode node;
+  node.set_offset(DOUBLE_ARRAY_INVALID_OFFSET);
+  node.set_is_phantom(true);
+
+  for (uint64_t i = begin; i < end; ++i) {
+    node.set_prev((i - 1) & DOUBLE_ARRAY_CHUNK_MASK);
+    node.set_next((i + 1) & DOUBLE_ARRAY_CHUNK_MASK);
+    nodes_[i] = node;
+  }
+
+  // The level of the new chunk is 0.
+  set_chunk_level(chunk_id, 0);
+  header_->set_num_phantoms(header_->num_phantoms() + DOUBLE_ARRAY_CHUNK_SIZE);
+}
+
+void DoubleArrayImpl::update_chunk_level(uint64_t chunk_id, uint32_t level) {
+//  GRN_DAT_DEBUG_THROW_IF(chunk_id >= num_chunks());
+//  GRN_DAT_DEBUG_THROW_IF(level > DOUBLE_ARRAY_MAX_CHUNK_LEVEL);
+
+  unset_chunk_level(chunk_id);
+  set_chunk_level(chunk_id, level);
+}
+
+void DoubleArrayImpl::set_chunk_level(uint64_t chunk_id, uint32_t level) {
+//  GRN_DAT_DEBUG_THROW_IF(chunk_id >= num_chunks());
+//  GRN_DAT_DEBUG_THROW_IF(level > DOUBLE_ARRAY_MAX_CHUNK_LEVEL);
+
+  const uint64_t leader = header_->ith_leader(level);
+  if (leader == DOUBLE_ARRAY_INVALID_LEADER) {
+    // The chunk becomes the only one member of the level group.
+    chunks_[chunk_id].set_next(chunk_id);
+    chunks_[chunk_id].set_prev(chunk_id);
+    header_->set_ith_leader(level, chunk_id);
+  } else {
+    // The chunk is appended to the level group, in practice.
+    const uint64_t next = leader;
+    const uint64_t prev = chunks_[leader].prev();
+//    GRN_DAT_DEBUG_THROW_IF(next >= num_chunks());
+//    GRN_DAT_DEBUG_THROW_IF(prev >= num_chunks());
+    chunks_[chunk_id].set_next(next);
+    chunks_[chunk_id].set_prev(prev);
+    chunks_[next].set_prev(chunk_id);
+    chunks_[prev].set_next(chunk_id);
+  }
+  chunks_[chunk_id].set_level(level);
+  chunks_[chunk_id].set_failure_count(0);
+}
+
+void DoubleArrayImpl::unset_chunk_level(uint64_t chunk_id) {
+//  GRN_DAT_DEBUG_THROW_IF(chunk_id >= num_chunk());
+
+  const uint32_t level = chunks_[chunk_id].level();
+//  GRN_DAT_DEBUG_THROW_IF(level > DOUBLE_ARRAY_MAX_CHUNK_LEVEL);
+
+  const uint64_t leader = header_->ith_leader(level);
+//  GRN_DAT_DEBUG_THROW_IF(leader == DOUBLE_ARRAY_INVALID_LEADER);
+
+  const uint64_t next = chunks_[chunk_id].next();
+  const uint64_t prev = chunks_[chunk_id].prev();
+//  GRN_DAT_DEBUG_THROW_IF(next >= num_chunks());
+//  GRN_DAT_DEBUG_THROW_IF(prev >= num_chunks());
+
+  if (next == chunk_id) {
+    // The level group becomes empty.
+    header_->set_ith_leader(level, DOUBLE_ARRAY_INVALID_LEADER);
+  } else {
+    chunks_[next].set_prev(prev);
+    chunks_[prev].set_next(next);
+    if (chunk_id == leader) {
+      // The second chunk becomes the leader of the level group.
+      header_->set_ith_leader(level, next);
+    }
+  }
+}
+
 }  // namespace alpha
 }  // namespace grnxx

  Modified: lib/alpha/double_array.hpp (+83 -4)
===================================================================
--- lib/alpha/double_array.hpp    2013-01-18 15:19:24 +0900 (306a364)
+++ lib/alpha/double_array.hpp    2013-01-20 12:40:11 +0900 (a8b5041)
@@ -28,6 +28,29 @@ using namespace grnxx::db;
 extern struct DoubleArrayCreate {} DOUBLE_ARRAY_CREATE;
 extern struct DoubleArrayOpen {} DOUBLE_ARRAY_OPEN;
 
+constexpr uint64_t DOUBLE_ARRAY_INVALID_ID     = 0xFFFFFFFFFFULL;
+constexpr uint64_t DOUBLE_ARRAY_INVALID_OFFSET = 0;
+
+constexpr uint64_t DOUBLE_ARRAY_CHUNK_SIZE      = 0x200;
+constexpr uint64_t DOUBLE_ARRAY_CHUNK_MASK      = 0x1FF;
+
+// Chunks are grouped by the level which indicates how easily update operations
+// can find a good offset in that chunk. The chunk level rises when
+// find_offset() fails in that chunk many times. DOUBLE_ARRAY_MAX_FAILURE_COUNT
+// is the threshold. Also, in order to limit the time cost, find_offset() scans
+// at most DOUBLE_ARRAY_MAX_CHUNk_COUNT chunks.
+// Larger parameters bring more chances of finding good offsets but it leads to
+// more node renumberings, which are costly operations, and thus results in
+// a degradation of space/time efficiencies.
+constexpr uint64_t DOUBLE_ARRAY_MAX_FAILURE_COUNT  = 4;
+constexpr uint64_t DOUBLE_ARRAY_MAX_CHUNK_COUNT    = 16;
+constexpr uint64_t DOUBLE_ARRAY_MAX_CHUNK_LEVEL    = 5;
+
+// Chunks in the same level compose a doubly linked list. The entry chunk of
+// a linked list is called a leader. DOUBLE_ARRAY_INVALID_LEADER means that
+// the linked list is empty and there exists no leader.
+constexpr uint64_t DOUBLE_ARRAY_INVALID_LEADER     = 0x7FFFFFFF;
+
 class DoubleArrayString {
  public:
   DoubleArrayString() : ptr_(nullptr), length_(0) {}
@@ -137,6 +160,19 @@ class DoubleArrayHeader {
   uint64_t root_node_id() const {
     return root_node_id_;
   }
+  uint64_t num_chunks() const {
+    return num_chunks_;
+  }
+  uint64_t num_nodes() const {
+    return num_chunks_ * DOUBLE_ARRAY_CHUNK_SIZE;
+  }
+  uint64_t num_phantoms() const {
+    return num_phantoms_;
+  }
+  uint64_t ith_leader(uint64_t i) const {
+//    GRN_DAT_DEBUG_THROW_IF(i > DOUBLE_ARRAY_MAX_CHUNK_LEVEL);
+    return leaders_[i];
+  }
 
   void set_nodes_block_id(uint32_t value) {
     nodes_block_id_ = value;
@@ -153,6 +189,17 @@ class DoubleArrayHeader {
   void set_root_node_id(uint64_t value) {
     root_node_id_ = value;
   }
+  void set_num_chunks(uint64_t value) {
+    num_chunks_ = value;
+  }
+  void set_num_phantoms(uint64_t value) {
+    num_phantoms_ = value;
+  }
+  void set_ith_leader(uint64_t i, uint64_t x) {
+//    GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
+//    GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks()));
+    leaders_[i] = x;
+  }
 
   Mutex *mutable_inter_process_mutex() {
     return &inter_process_mutex_;
@@ -164,6 +211,9 @@ class DoubleArrayHeader {
   uint32_t entries_block_id_;
   uint32_t keys_block_id_;
   uint64_t root_node_id_;
+  uint64_t num_chunks_;
+  uint64_t num_phantoms_;
+  uint64_t leaders_[DOUBLE_ARRAY_MAX_CHUNK_LEVEL + 1];
   Mutex inter_process_mutex_;
 };
 
@@ -344,7 +394,7 @@ class DoubleArrayChunk {
     qwords_[1] = (qwords_[1] & ~UPPER_MASK) | (value << UPPER_SHIFT);
   }
 
-  // The chunk level indicates how easily nodes can be put in this block.
+  // The chunk level indicates how easily nodes can be put in this chunk.
   uint64_t level() const {
     // 10 bits.
     return (qwords_[0] & MIDDLE_MASK) >> MIDDLE_SHIFT;
@@ -361,7 +411,7 @@ class DoubleArrayChunk {
     qwords_[1] = (qwords_[1] & ~MIDDLE_MASK) | (value << MIDDLE_SHIFT);
   }
 
-  // The first phantom node and the number of phantom nodes in this block.
+  // The first phantom node and the number of phantom nodes in this chunk.
   uint64_t first_phantom() const {
     // 10 bits.
     return (qwords_[0] & LOWER_MASK) >> LOWER_SHIFT;
@@ -397,7 +447,7 @@ class DoubleArrayEntry {
   DoubleArrayEntry() : qword_(0) {}
 
   // This entry is associated with a key (true) or not (false).
-  bool is_valid() const {
+  explicit operator bool() const {
     return qword_ & IS_VALID_FLAG;
   }
 
@@ -435,6 +485,11 @@ class DoubleArrayKey {
  public:
   DoubleArrayKey(uint64_t id, const char *address, uint64_t length);
 
+  explicit operator bool() const {
+    // FIXME: Magic number.
+    return id() != DOUBLE_ARRAY_INVALID_ID;
+  }
+
   uint64_t id() const {
     return id_low_ | (static_cast<uint64_t>(id_high_) << 32);
   }
@@ -451,6 +506,12 @@ class DoubleArrayKey {
     return true;
   }
 
+  static const DoubleArrayKey &invalid_key() {
+    static const DoubleArrayKey invalid_key(
+        DOUBLE_ARRAY_INVALID_ID, nullptr, 0);
+    return invalid_key;
+  }
+
   static uint64_t estimate_size(uint64_t length) {
     return 2 + (length / sizeof(uint32_t));
   }
@@ -470,11 +531,22 @@ class DoubleArrayImpl {
   static std::unique_ptr<DoubleArrayImpl> open(io::Pool pool,
                                                uint32_t block_id);
 
-  bool search(const uint8_t *ptr, uint64_t length, uint64_t *key_offset);
+  bool search(const uint8_t *ptr, uint64_t length,
+              uint64_t *key_offset = nullptr);
+
+  // TODO
+  bool insert(const uint8_t *ptr, uint64_t length,
+              uint64_t *key_offset = nullptr);
 
   const DoubleArrayKey &get_key(uint64_t key_offset) {
     return *reinterpret_cast<const DoubleArrayKey *>(&keys_[key_offset]);
   }
+  const DoubleArrayKey &ith_key(uint64_t key_id) {
+    if (entries_[key_id]) {
+      return get_key(entries_[key_id].key_offset());
+    }
+    return DoubleArrayKey::invalid_key();
+  }
 
   uint32_t block_id() const {
     return block_info_->id();
@@ -503,6 +575,13 @@ class DoubleArrayImpl {
 
   bool search_leaf(const uint8_t *ptr, uint64_t length,
                    uint64_t &node_id, uint64_t &query_pos);
+
+  void reserve_node(uint64_t node_id);
+  void reserve_chunk(uint64_t chunk_id);
+
+  void update_chunk_level(uint64_t chunk_id, uint32_t level);
+  void set_chunk_level(uint64_t chunk_id, uint32_t level);
+  void unset_chunk_level(uint64_t chunk_id);
 };
 
 // TODO
-------------- next part --------------
HTML����������������������������...
Télécharger 



More information about the Groonga-commit mailing list
Back to archive index