The Implementation of A B Plus Tree

IOVI
2019-08-08 (Updated: 2019-08-08)
import java.util.*;

public class BPlusTree<K extends Comparable<? super K>, V> {

    public static void main(String[] args) {
        ArrayList<Integer> words = new ArrayList<>();

        for (int i = 0; i < 10000; i++) {
            words.add(i);
        }

        BPlusTree<Integer, Integer> tree = new BPlusTree<>();

        for (Integer word : words) {
            if (!tree.contains(word)) {
                tree.insert(word, 0);
            }

            tree.set(word, tree.search(word).intValue() + 1);
        }

        System.out.println(tree);

        System.out.println(tree.searchRange(10, RangePolicy.INCLUSIVE, 20, RangePolicy.INCLUSIVE));

        for (int i = 0; i < 10000; i++) {
            tree.delete(i);
        }

        System.out.println(tree);
    }

    // The branching factor used when none specified in constructor.
    private static final int DEFAULT_BRANCHING_FACTOR = 128;

    // The branching factor for the B+ tree, that measures the capacity of nodes
    // (i.e., the number of children nodes) for internal nodes in the tree.
    private int branchingFactor;

    // The root node of the B+ tree.
    private Node root;

    public BPlusTree() {
        this(DEFAULT_BRANCHING_FACTOR);
    }

    public BPlusTree(int branchingFactor) {
        this.branchingFactor = branchingFactor;

        // initially, root is a leaf node which is able to keep values. It will split when the
        // value list is full. After that the root node only serves as a lookup table and becomes
        // a internal node.
        root = new LeafNode();
    }

    public V search(K key) {
        return root.getValue(key);
    }

    public boolean contains(K key) {
        return root.getValue(key) != null;
    }

    public List<V> searchRange(K key1, RangePolicy policy1, K key2, RangePolicy policy2) {
        return root.getRange(key1, policy1, key2, policy2);
    }

    public void insert(K key, V value) {
        root.insertValue(key, value);
    }

    public void set(K key, V value) {
        root.insertValue(key, value);
    }

    public void delete(K key) {
        root.deleteValue(key);
    }

    enum RangePolicy {
        EXCLUSIVE, INCLUSIVE
    }

    private abstract class Node {
        List<K> keys;

        int keyNumber() {
            return keys.size();
        }

        abstract V getValue(K key);

        abstract void deleteValue(K key);

        abstract void insertValue(K key, V value);

        abstract K getFirstLeafKey();

        abstract List<V> getRange(K key1, RangePolicy policy1, K key2, RangePolicy policy2);

        abstract void merge(Node sibling);

        abstract Node split();

        abstract boolean isOverflow();

        abstract boolean isUnderflow();

        public String toString() {
            return keys.toString();
        }
    }

    private class LeafNode extends Node {
        List<V> values;
        LeafNode next;

        LeafNode() {
            keys = new ArrayList<>();
            values = new ArrayList<>();
        }

        @Override
        V getValue(K key) {
            int loc = Collections.binarySearch(keys, key);
            return loc >= 0 ? values.get(loc) : null;
        }

        @Override
        void deleteValue(K key) {
            int loc = Collections.binarySearch(keys, key);

            // not found, return directly.
            if (loc < 0) {
                return;
            }

            keys.remove(loc);
            values.remove(loc);
        }

        @Override
        void insertValue(K key, V value) {

            // Collections.binarySearch() returns the index of the search key if present or the insertion point if
            // not found.

            int loc = Collections.binarySearch(keys, key);

            // loc >= 0 node binding with the given key found, otherwise not found and -loc-1 is the insertion point.
            int valueIndex = loc >= 0 ? loc : -loc - 1;

            // key exists, update it
            if (loc >= 0) {
                values.set(valueIndex, value);
            }
            // key not exists, add a new node
            else {

                // For index node, children node is always 1 more than keys
                // But leaf nodes are aways of same size about keys and values.

                keys.add(valueIndex, key);
                values.add(valueIndex, value);
            }

            // This happens only when root node is still a leaf node, or the tree just consists of one level.
            if (this == root && root.isOverflow()) {
                Node sibling = split();

                IndexNode newRoot = new IndexNode();

                newRoot.keys.add(sibling.getFirstLeafKey());
                newRoot.children.add(this);
                newRoot.children.add(sibling);

                root = newRoot;
            }
        }

        @Override
        K getFirstLeafKey() {
            return keys.get(0);
        }

        @Override
        List<V> getRange(K key1, RangePolicy policy1, K key2, RangePolicy policy2) {
            List<V> result = new LinkedList<>();
            LeafNode node = this;

            while (node != null) {
                Iterator<K> kIt = node.keys.iterator();
                Iterator<V> vIt = node.values.iterator();

                while (kIt.hasNext()) {
                    K key = kIt.next();
                    V value = vIt.next();

                    int cmp1 = key.compareTo(key1);
                    int cmp2 = key.compareTo(key2);

                    // (key2 >= key && EXCLUSIVE) || (key2 > key && INCLUSIVE)
                    boolean condition1 = (policy2 == RangePolicy.EXCLUSIVE && cmp2 >= 0) || (policy2 == RangePolicy.INCLUSIVE && cmp2 > 0);

                    // key overstep the right boundary of the given range
                    if (condition1) {
                        return result;
                    }

                    // (key1 > key && EXCLUSIVE) || (key1 >= key && INCLUSIVE)
                    boolean condition2 = ((policy1 == RangePolicy.EXCLUSIVE && cmp1 > 0) || (policy1 == RangePolicy.INCLUSIVE && cmp1 >= 0));

                    // (key2 < key && EXCLUSIVE) || (key2 <= key && INCLUSIVE)
                    boolean condition3 = ((policy2 == RangePolicy.EXCLUSIVE && cmp2 < 0) || (policy2 == RangePolicy.INCLUSIVE && cmp2 <= 0));

                    // key locates within the given range
                    if (condition2 && condition3) {
                        result.add(value);
                    }
                }

                node = node.next;
            }

            return result;
        }

        @Override
        void merge(Node sibling) {

            // leaf nodes always merge leaf nodes, index nodes will never be passed in.

            LeafNode node = (LeafNode) sibling;

            // Don't be afraid of overflow issue, it will automatically split if necessary.

            keys.addAll(node.keys);
            values.addAll(node.values);

            next = node.next;
        }

        @Override
        Node split() {
            // create a new sibling, it would be much easier than transferring to an existing sibling.
            LeafNode sibling = new LeafNode();

            // move half of the keys to next sibling
            int from = (keyNumber() + 1) / 2;
            int to = keyNumber();

            sibling.keys.addAll(keys.subList(from, to));
            sibling.values.addAll(values.subList(from, to));

            keys.subList(from, to).clear();
            values.subList(from, to).clear();

            sibling.next = next;
            next = sibling;

            return sibling;
        }

        @Override
        boolean isOverflow() {
            return values.size() > branchingFactor - 1;
        }

        @Override
        boolean isUnderflow() {
            return values.size() < branchingFactor / 2;
        }
    }

    private class IndexNode extends Node {
        List<Node> children;

        IndexNode() {
            this.keys = new ArrayList<>();
            this.children = new ArrayList<>();
        }

        @Override
        V getValue(K key) {
            return getChild(key).getValue(key);
        }

        @Override
        void deleteValue(K key) {
            Node child = getChild(key);
            child.deleteValue(key);

            if (child.isUnderflow()) {
                Node childLeftSibling = getChildLeftSibling(key);
                Node childRightSibling = getChildRightSibling(key);

                // left merge child or child merge right
                Node left = childLeftSibling != null ? childLeftSibling : child;

                Node right = childLeftSibling != null ? child : childRightSibling;

                left.merge(right);

                deleteChild(right.getFirstLeafKey());

                if (left.isOverflow()) {
                    Node sibling = left.split();
                    insertChild(sibling.getFirstLeafKey(), sibling);
                }

                if (root.keyNumber() == 0) {
                    root = left;
                }
            }
        }

        @Override
        void insertValue(K key, V value) {
            Node child = getChild(key);

            // recursively descend until reach the left node at the bottom of the tree.
            child.insertValue(key, value);

            // overflow check is always conducted by the parent node because it enables
            // parent update the index easily..
            if (child.isOverflow()) {
                Node sibling = child.split();
                insertChild(sibling.getFirstLeafKey(), sibling);
            }

            // every time after modification, check out if the root node is too full.
            // the reason is that root node has no parent, hence the overflow is conducted
            // by itself.
            if (this == root && root.isOverflow()) {
                Node sibling = split();

                IndexNode newRoot = new IndexNode();

                newRoot.keys.add(sibling.getFirstLeafKey());

                newRoot.children.add(this);
                newRoot.children.add(sibling);

                root = newRoot;
            }
        }

        @Override
        K getFirstLeafKey() {
            return children.get(0).getFirstLeafKey();
        }

        @Override
        List<V> getRange(K key1, RangePolicy policy1, K key2, RangePolicy policy2) {
            return getChild(key1).getRange(key1, policy1, key2, policy2);
        }

        @Override
        void merge(Node sibling) {

            // Index nodes always merge index nodes, leaf nodes will never be passed in.

            IndexNode node = (IndexNode) sibling;

            keys.add(node.getFirstLeafKey());
            keys.addAll(node.keys);

            children.addAll(node.children);
        }

        @Override
        Node split() {
            int from = keyNumber() / 2 + 1, to = keyNumber();

            IndexNode sibling = new IndexNode();

            sibling.keys.addAll(keys.subList(from, to));
            sibling.children.addAll(children.subList(from, to + 1));

            keys.subList(from - 1, to).clear();
            children.subList(from, to + 1).clear();

            return sibling;
        }

        @Override
        boolean isOverflow() {
            return children.size() > branchingFactor;
        }

        @Override
        boolean isUnderflow() {
            return children.size() < (branchingFactor + 1) / 2;
        }

        Node getChild(K key) {
            int loc = Collections.binarySearch(keys, key);

            int childIndex = loc >= 0 ? loc + 1 : -loc - 1;

            return children.get(childIndex);
        }

        void deleteChild(K key) {
            int loc = Collections.binarySearch(keys, key);

            // 404, return directly
            if (loc < 0) {
                return;
            }

            keys.remove(loc);
            children.remove(loc + 1);
        }

        void insertChild(K key, Node child) {
            // child is the newly created node, key is the first node key of the child node
            int loc = Collections.binarySearch(keys, key);

            int childIndex = loc >= 0 ? loc + 1 : -loc - 1;

            // index already exists, update it
            if (loc >= 0) {
                children.set(childIndex, child);
            }
            // index not exits, create one as a reference to the child node
            else {

                // For index node, children node is always 1 more than keys
                // But leaf nodes are aways of same size about keys and values.

                keys.add(childIndex, key);
                children.add(childIndex + 1, child);
            }
        }

        Node getChildLeftSibling(K key) {

            // two children associated with a single index key, < the key is the left child, >= the key is the right child.

            // loc >= 0 means found, the position next to it is the insertion point.
            // loc < 0 means not found, -loc-1 is the insertion point
            int loc = Collections.binarySearch(keys, key);

            // 4 situations

            // 1. key in keys, then loc >= 0
            // 2. key < keys.get(0), then loc = -1
            // 3. key == keys.get(0), then loc = 0
            // 4. key in range (keys.get(0), keys.get(size-1)], then 1<=loc<=size when key found, or -size-1<=loc<=-2 if not found

            // key exits
            if (loc >= 0) {
                // children.get(loc)>= key and children.get(loc-1) < key, so it is the left sibling
                return children.get(loc - 1);
            }

            // keyIndex == 0 means key < keys.get(0)
            // keyIndex == 1 means keys.get(1) > key >= keys.get(0)

            // key not exits, but the insertion point is not the head of the key list
            if (loc < -1) {
                // -loc-1 is the insertion point which is larger than given key, so return the left sibling of children.get(-loc-1)
                return children.get(-loc - 2);
            }

            return null;
        }

        Node getChildRightSibling(K key) {

            // see also above getChildLeftSibling()

            int loc = Collections.binarySearch(keys, key);

            int childIndex = loc >= 0 ? loc + 1 : -loc - 1;

            // children.size = keys.size + 1

            // childrenIndex == key.size() means values binding with given key is in children.get(childrenIndex+1)
            // which is the right most node without any right sibling nodes.
            if (childIndex < keyNumber()) {
                return children.get(childIndex + 1);
            }

            return null;
        }
    }
}
References

https://github.com/jiaguofang/b-plus-tree