1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
//![![ci-tests](https://github.com/arindas/bheap/actions/workflows/ci-tests.yml/badge.svg)](https://github.com/arindas/bheap/actions/workflows/ci-tests.yml)
//![![rustdoc](https://github.com/arindas/bheap/actions/workflows/rustdoc.yml/badge.svg)](https://github.com/arindas/bheap/actions/workflows/rustdoc.yml)
//!
//!A generic binary max heap implementation for implementing a dynamically prioritizable priority queue.
//!
//!This implementation uses a vector as the underlying data-structure. Hence, there is no oppurtunity
//!for fine grained locking. Users of this crate are request to wrap `bheap::BinaryMaxHeap` with the
//!required concurrency primitive for use in multithreaded contexts.
//!
//!## Why is this necessary?
//!The binary heap implementation provided by the standard library (`use std::collections::binary_heap::BinaryHeap;`),
//!assumes that the ordering of the elements in the domain is fixed. I needed a binary heap implementation which allowed
//!for change in ordering of elements at runtime.
//!
//!## How does it work?
//!`bheap::BinaryMaxHeap` enforces the `Ord + bheap::Uid` trait bounds on the element type. The `Uid` trait, simply
//!presents a method for returing a unique `u64` uid for the type.
//!
//!The struct maintains a `Vec<T>` as the underlying storage buffer and a `HashMap<u64, usize>` for maintaining a
//!mapping from `T::uid()` to position in vector. This map is updated on every heap operation to remain consistent.
//!
//!When the ordering of an element changes, its position in the heap can be looked up in the heap using the
//!hashmap. Then, we `heapify_up()` or `heapify_down()` as required to restore heap property.
//!
//!## Limitations
//!Since, we use `u64` for uniquely identitfying elements, this heap can only scale up `2^64 = 18446744073709551616` elements.
//!This was more than enough for my purposes.

use std::{cmp::Ordering, collections::HashMap};

/// Trait to uniquely identify elements in bheap.
pub trait Uid {
    /// Unique identifier for the implementing struct. The same value must
    /// be returned in all invocations of this method on a given struct.
    fn uid(&self) -> u64;
}

/// A re-prioritizable binary max heap containing a buffer for storing elements
/// and a hashmap index for keeping track of element positions.
pub struct BinaryMaxHeap<T>
where
    T: Ord + Uid,
{
    /// in-memory storage for elements
    buffer: Vec<T>,

    /// mapping from element uids to positions in the heap buffer
    index: HashMap<u64, usize>,
}

impl<T> BinaryMaxHeap<T>
where
    T: Ord + Uid,
{
    /// Creates a new BinaryMaxHeap from a given vector, which may or may not be
    /// empty. If the vector already contains elements, the elements are
    /// re-arranged with a `build_heap()` operation.
    pub fn from_vec(buffer: Vec<T>) -> Self {
        let mut bheap = BinaryMaxHeap {
            buffer,
            index: HashMap::new(),
        };

        if !bheap.is_empty() {
            bheap.build_heap();
        }

        bheap
    }

    /// Creates an empty binary max heap with no elements.
    pub fn new() -> Self {
        BinaryMaxHeap::from_vec(vec![])
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
        self.buffer.is_empty()
    }

    #[inline]
    pub fn len(&self) -> usize {
        self.buffer.len()
    }

    /// Swaps elements at the given indices byt first swapping the elements
    /// in the buffer vector and next updating the `HashMap` index with
    /// new indices.
    #[inline]
    fn swap_elems_at_indices(&mut self, i: usize, j: usize) {
        let index = &mut self.index;

        index.insert(self.buffer[i].uid(), j);
        index.insert(self.buffer[j].uid(), i);

        self.buffer.swap(i, j);
    }

    /// Convenience method for comparing elements at the given indices.
    #[inline]
    fn cmp(&self, i: usize, j: usize) -> Ordering {
        self.buffer[i].cmp(&self.buffer[j])
    }

    /// Restores heap property by moving the element in the given index
    /// upwards along it's parents to the root, until it has no parents
    /// or it is <= to its parents.
    /// It operates in the following manner:
    /// ```text
    /// heapify_up(heap, i) {
    ///     while i > 0 {
    ///         let parent = (i - 1) / 2;
    ///         if heap[i] > heap[parent] {
    ///             swap(heap, i, parent)
    ///         } else { break; }
    ///     }
    /// }
    /// ```
    fn heapify_up(&mut self, idx: usize) -> Option<usize> {
        let mut i = idx;

        while i > 0 {
            let parent = (i - 1) / 2;

            if let Ordering::Greater = self.cmp(i, parent) {
                self.swap_elems_at_indices(i, parent);
                i = parent;
            } else {
                break;
            };
        }

        if i != idx {
            return Some(i);
        } else {
            return None;
        }
    }

    /// Restores heap property by moving the element at the given index,
    /// downwards along it's children, towards the leaves, until it
    /// has no children or it is >= to its children.
    /// It operates in the following manner:
    /// ```text
    /// heapify_dn(heap, i) {
    ///     while i < len(heap) / 2 {
    ///         let max = i;
    ///         let lc, rc = 2 * i + 1, 2 * i + 2;
    ///
    ///         if lc < len(heap) && heap[max] < lc { max = lc; }
    ///         if rc < len(heap) && heap[max] < rc { max = rc; }
    ///
    ///         if i != max { swap(heap, i, max); i = max; }
    ///         else { break; }
    ///     }
    /// }
    /// ```
    fn heapify_dn(&mut self, idx: usize) -> Option<usize> {
        let mut i = idx;

        while i < (self.len() / 2) {
            let mut max = i;
            let (lc, rc) = (2 * i + 1, 2 * i + 2);

            if lc < self.len() {
                if let Ordering::Less = self.cmp(max, lc) {
                    max = lc;
                }
            }

            if rc < self.len() {
                if let Ordering::Less = self.cmp(max, rc) {
                    max = rc;
                }
            }

            if i != max {
                self.swap_elems_at_indices(i, max);
                i = max;
            } else {
                break;
            }
        }

        if i != idx {
            return Some(i);
        } else {
            return None;
        }
    }

    /// Corrects the `HashMap` index for the given heap position,
    /// by updating the entry with the uid of the element at that
    /// position. The values stored is the index.
    /// Concisely:
    /// ```text
    /// index[buffer[i].uid()] = i
    /// ```
    #[inline]
    fn update_index(&mut self, i: usize) -> Option<usize> {
        if i >= self.len() {
            return None;
        }

        self.index.insert(self.buffer[i].uid(), i)
    }

    /// Returns a mutable reference to the element at the givven
    /// heap position, if present. This implementation assumes
    /// that no mutation used with respect to the returned
    /// mutable reference, modifies the uid() property for the
    /// element.
    pub fn get(&mut self, i: usize) -> Option<&mut T> {
        if i >= self.len() {
            return None;
        }

        Some(&mut self.buffer[i])
    }

    /// Pushes a new element in this priority queue.
    pub fn push(&mut self, elem: T) {
        let idx = self.buffer.len();

        self.buffer.push(elem);
        self.update_index(idx);

        self.heapify_up(idx);
    }

    /// Peeks at the element with highest priority, if present.
    pub fn peek(&self) -> Option<&T> {
        if self.is_empty() {
            return None;
        }

        Some(&self.buffer[0])
    }

    /// Pops the element with the highest property, if present.
    pub fn pop(&mut self) -> Option<T> {
        if self.is_empty() {
            return None;
        }

        let elem = self.buffer.swap_remove(0);
        self.index.remove(&elem.uid());

        self.update_index(0);
        self.heapify_dn(0);

        Some(elem)
    }

    /// Builds the `HashMap` index from uids to buffer positions.
    pub fn build_index(&mut self) {
        for i in 0..self.len() {
            self.update_index(i);
        }
    }

    /// Builds a heap from un-organized elements in the in-memory buffer.
    /// It operates as follows:
    /// ```text
    /// build_heap(heap) {
    ///     build_index(heap);
    ///
    ///     for i = len(heap) / 2; i >= 0; i-- {
    ///         heapify_dn(heap, i);
    ///     }
    /// }
    /// ````
    pub fn build_heap(&mut self) {
        self.build_index();

        for i in (0..(self.len() / 2)).rev() {
            self.heapify_dn(i);
        }
    }

    /// Restores heap property at the given position.
    pub fn restore_heap_property(&mut self, idx: usize) -> Option<usize> {
        if idx >= self.len() {
            return None;
        }

        self.heapify_up(idx).or(self.heapify_dn(idx))
    }

    /// Returns the position for element with given uid in the heap buffer.
    pub fn index_in_heap_from_uid(&self, uid: u64) -> Option<usize> {
        self.index.get(&uid).map(|&elem_idx| elem_idx)
    }

    /// Returns the position for element in the heap buffer.
    pub fn index_in_heap(&self, elem: &T) -> Option<usize> {
        self.index.get(&elem.uid()).map(|&elem_idx| elem_idx)
    }
}

#[cfg(test)]
mod tests {
    use crate::{BinaryMaxHeap, Uid};

    impl Uid for u32 {
        fn uid(&self) -> u64 {
            (*self).into()
        }
    }

    impl<T> BinaryMaxHeap<T>
    where
        T: Ord + Uid,
    {
        fn index_consistent(&self) -> bool {
            let mut result = true;
            let mut i = 0;

            for elem in &self.buffer {
                let elem_consistent = self.index_in_heap(elem).map_or(false, |idx| idx == i);

                result = result && elem_consistent;
                i += 1
            }

            result
        }
    }

    #[test]
    fn empty_binary_max_heap() {
        let mut heap = BinaryMaxHeap::<u32>::new();

        assert_eq!(heap.is_empty(), true);
        assert_eq!(heap.len(), 0);
        assert_eq!(heap.peek(), None);
        assert_eq!(heap.pop(), None);

        let mut heap = BinaryMaxHeap::from_vec(Vec::<u32>::with_capacity(10));

        assert_eq!(heap.is_empty(), true);
        assert_eq!(heap.len(), 0);
        assert_eq!(heap.peek(), None);
        assert_eq!(heap.pop(), None);
    }

    #[test]
    fn binary_max_heap_from_vec_with_elems() {
        let mut heap = BinaryMaxHeap::from_vec(vec![1, 7, 2, 5, 10, 9]);
        assert_eq!(heap.peek(), Some(&10));
        assert_eq!(heap.pop(), Some(10));

        assert_eq!(heap.peek(), Some(&9));
        assert_eq!(heap.pop(), Some(9));

        assert_eq!(heap.peek(), Some(&7));
        assert_eq!(heap.pop(), Some(7));

        assert_eq!(heap.peek(), Some(&5));
        assert_eq!(heap.pop(), Some(5));

        assert_eq!(heap.peek(), Some(&2));
        assert_eq!(heap.pop(), Some(2));

        assert_eq!(heap.peek(), Some(&1));
        assert_eq!(heap.pop(), Some(1));

        assert_eq!(heap.peek(), None);
        assert_eq!(heap.pop(), None);
    }

    #[test]
    fn push_peek_pop_index_correctness() {
        let mut heap = BinaryMaxHeap::<u32>::new();

        heap.push(1);
        assert_eq!(heap.peek(), Some(&1));
        assert!(heap.index_consistent());

        heap.push(7);
        assert_eq!(heap.peek(), Some(&7));
        assert!(heap.index_consistent());

        heap.push(2);
        assert_eq!(heap.peek(), Some(&7));
        assert!(heap.index_consistent());

        heap.push(5);
        assert_eq!(heap.peek(), Some(&7));
        assert!(heap.index_consistent());

        heap.push(10);
        assert_eq!(heap.peek(), Some(&10));
        assert!(heap.index_consistent());

        heap.push(9);
        assert_eq!(heap.peek(), Some(&10));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), Some(&10));
        assert_eq!(heap.pop(), Some(10));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), Some(&9));
        assert_eq!(heap.pop(), Some(9));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), Some(&7));
        assert_eq!(heap.pop(), Some(7));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), Some(&5));
        assert_eq!(heap.pop(), Some(5));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), Some(&2));
        assert_eq!(heap.pop(), Some(2));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), Some(&1));
        assert_eq!(heap.pop(), Some(1));
        assert!(heap.index_consistent());

        assert_eq!(heap.peek(), None);
        assert_eq!(heap.pop(), None);
        assert!(heap.index_consistent());
    }
}