HN6Z5DU4WYMAIOOSNVHLIIMNF6Q53TNJ7YC27SLKWNXVYCTACQKQC
RV2L6CZWTMUQ2A52YDAFVHDFGURZL3H4SSCDC347UGN23D3J5KZQC
OP6SVMOD2GTQ7VNJ4E5KYFG4MIYA7HBMXJTADALMZH4PY7OQRMZQC
WS4ZQM4RMIHZ6XZKSDQJGHN5SSSWFL4H236USOPUA33S6RC53RFAC
G4JEQLLX6Q7VVFVAEJZAVQXX33MQ36CSCYSMJ5NQM5VZ76DXKU6QC
XEU2QVLCHPYOOD4TQIPEEVYOVSFMKFPLJYWEJYXYJAZ7S54KWDZAC
73Z2UB3JGRLFNFORE7D64O4IHIFSZASD4G4FLJ4FJLHANT75MGIAC
OFINGD26ZWCRDVVDI2ZIBLMHXKEMJA6MRNLANJYUHQPIJLPA7J2AC
H3FVSQIQGFCFKCPXVOSFHP4OSUOBBURJESCZNGQTNDAAD3WQSBEQC
LROAI3NBBSCU4T2YA6EHJYKKKL75AU5A7C7WIRCGIQ56S6HPLRXQC
6UVFCERMGSGNRWCVC3GWO5HWV6MSWE433DXBJVC7KRPP6LLJLCSQC
Q7DRIBBRE4MNG4NP3PVIXAJF5PQYLFWYIVK2O4VVLEO6XY3BOSFQC
AOX2XQISHGWNNAFBYRN44Q6AWG7H5DPBK5YMFHK42HQNZ2TMHEJQC
QEUTVAZ4F4EJXRDMWDMYXF6XEDMX7YPVG4IIXEKPIR3K54E5W5OAC
DV4A2LR7Q5LAEGAQHLO34PZCHGJUHPAMRZFGT7GUFNKVQKPJNOYQC
OTWDDJE7TTE73D6BGF4ZN6BH2NFUFLPME2VJ3CPALH463UGWLEIQC
APPY2E7M5NHNC6MFYXSVEKJVAILK7YAZVTVE3W75EK2JNFVS3XBQC
6DCQHIFPEH4GZKSRRS32GMKDRPZH4MTCGOUEI7YEUVKWENBF3JWAC
KX3WVNZW5KHVEH6EOQTZ4RBEFFJ3SGF5I467X3JWZ74PURRK4HVAC
UUUVNC4DWEEL7WV5IRPKPZ6HZMYCPA53XM7LJWICUD4E6GN37IRQC
NXMFNPZ7VWJRLC3M5QJJVTICXCMGE24F3HVIZA7A7RLVMLQMLDVQC
ONES3V466GLO5CXKRF5ENK7VFOQPWM3YXLVRGWB56V5SH3W7XNBQC
KMT3MF5NLEQIPZLHCRYDGQ5EA46HJCG3C2ANEPMZGKGHDK77ADPAC
T73WR2BX2QDQ6APOREBXUKNH52FDLJNBGWPQUYB2TAF2PT7XCL2AC
X3QVVQIS7B7L3XYZAWL3OOBUXOJ6RMOKQ45YMLLGAHYPEEKZ45ZAC
EAAYH6BQWDK52EC5RG3BEZQU3FJPN5RRRN4U5KDKDVPKXBVJMNDAC
W26CFMAQOXMUK4ZOJMAN4SMBXMWFQHO7HCTEVW73FQSRMJZFGJIQC
WAKPPBKONQUA3G7HWH52ZKYG5PLZEAG3HFAYGIYLA4NVEPRZUQEAC
YWFYZNLZ5JHLIFVBRKZK4TSWVPROUPRG77ZB5M7UHT2OKPL4ZSRQC
FMN7X4J24EYPOJNBUWM4NKGWSJTRV2DHCIBMPV2AXLZVVAMNOBKQC
PXF3R6SVXJXN2NMLMWNY5OFV5QYVE2VZTLGIZDZVK5ZVLFTVSSWQC
KM3JAFGPFV7MP7M2LJIYRVAUTU646B3IRXADTRZKOU2RF7LUB62QC
UAQX27N4PI4LHEW6LSHJETIE5MV7JTEMPLTJFYUBMYVPC43H7VOAC
EYNN7RLSFVBWDLRTLNNFUAF46Q6OX3BR5SUEJIOOHBSNP7FVBXGAC
//! This crate defines tools to implement datastructures that can live
//! in main memory or on-disk, meaning that their natural habitat is
//! memory-mapped files, but if that environment is threatened, might
//! seek refuge in lower-level environments.
//!
//! One core building block of this library is the notion of virtual
//! memory pages, which are allocated and freed by an
//! externally-provided allocator (see how the `sanakirja` crate does
//! this). The particular implementation used here is meant to allow a
//! transactional system with readers reading the structures
//! concurrently with one writer at a time.
//!
//! At the moment, only B trees are implemented, as well as three
//! important traits:
//!
//! - [`Representable`] is the trait of types that can be written to
//! disk and read from the disk.
//! - [`LoadPage`] is a trait used to get a pointer to a page. In the
//! most basic version, this may just return a pointer to the file,
//! offset by the requested offset. In more sophisticated versions,
//! this can be used to encrypt and compress pages.
//! - [`AllocPage`] allocates and frees pages, because as
//! datastructures need to be persisted on disk, we can't rely on
//! Rust's memory management to do it for us. Users of this crate
//! don't have to worry about this though.
// Copyright 2015 Pierre-Étienne Meunier and Florent Becker. See the
// COPYRIGHT file at the top-level directory of this distribution and
// at http://pijul.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
pub(super) fn new(p: crate::Page, cur: isize) -> Self {
let hdr = header(p);
pub(super) fn new(p: &crate::CowPage, cur: isize) -> Self {
let hdr = unsafe { &*(p.data as *const Header) };
let b0 = if Self::is_dirty(m.modified.page.as_page()) {
1
} else {
0
};
let b1 = if Self::is_dirty(m.other.as_page()) {
1
} else {
0
};
let b0 = if m.modified.page.is_dirty() { 1 } else { 0 };
let b1 = if m.other.is_dirty() { 1 } else { 0 };
Ordering::Less => n += 1,
Ordering::Greater => return Err(n),
Ordering::Equal => {
if let Some(v0) = v0 {
match sm.v.compare(txn, v0) {
Ordering::Less => n += 1,
Ordering::Greater => return Err(n),
Ordering::Equal => return Ok(n),
}
} else {
return Ok(n);
}
}
}
}
Err(n)
}
unsafe fn cmp<T: LoadPage, K: Representable, V: Representable>(
txn: &T,
k0: &K,
v0: Option<&V>,
p: &[u8; 4096],
off: u64,
) -> Ordering {
let off = u64::from_le(off) & 0xfff;
let (k, v) = read::<T, K, V>(txn, p.as_ptr().offset(off as isize & 0xfff));
let k = K::from_raw_ptr(txn, k);
match k.compare(txn, k0) {
Ordering::Equal => {
if let Some(v0) = v0 {
let v = V::from_raw_ptr(txn, v);
v.compare(txn, v0)
} else {
Ordering::Equal
}
}
o => o,
}
}
unsafe fn internal_search<T: LoadPage, K: Representable, V: Representable>(
txn: &T,
k0: &K,
v0: Option<&V>,
s: &[u64],
p: &[u8; 4096],
) -> Result<usize, usize> {
let mut a = 0;
let mut b = s.len();
for _ in 0..4 {
let mid = (a + b) / 2;
match cmp(txn, k0, v0, p, s[mid]) {
Ordering::Less => a = mid,
Ordering::Greater => b = mid,
Ordering::Equal => b = mid + 1,
}
}
let mut n = a;
for &off in &s[a..b] {
match cmp(txn, k0, v0, p, off) {
if let Some(v0) = v0 {
s.binary_search_by(|tup| match tup.k.compare(txn, &k0) {
Ordering::Equal => tup.v.compare(txn, &v0),
c => c,
})
} else {
// leaf_binary_search(txn, k0, s)
leaf_linear_search(txn, k0, s)
}
leaf_linear_search(txn, k0, v0, s)
if let Some(v0) = v0 {
s.binary_search_by(|&off| {
let off = u64::from_le(off) & 0xfff;
let (k, v) = read::<T, K, V>(txn, page.data.as_ptr().offset(off as isize & 0xfff));
let k = K::from_raw_ptr(txn, k);
match k.compare(txn, k0) {
Ordering::Equal => {
let v = V::from_raw_ptr(txn, v);
v.compare(txn, v0)
}
cmp => cmp,
}
})
} else {
s.binary_search_by(|&off| {
let off = u64::from_le(off) & 0xfff;
let (k, _) = read::<T, K, V>(txn, page.data.as_ptr().offset(off as isize & 0xfff));
let k = K::from_raw_ptr(txn, k);
k.compare(txn, k0)
})
}
internal_search(txn, k0, v0, s, page.data)
let mut stack: [MaybeUninit<cursor::PageCursor<K, V, P>>; cursor::N_CURSORS] =
[MaybeUninit::uninit(); cursor::N_CURSORS];
let mut stack: [MaybeUninit<cursor::PageCursor<K, V, P>>; cursor::N_CURSORS] = [
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
MaybeUninit::uninit(),
];
// Copyright 2015 Pierre-Étienne Meunier and Florent Becker. See the
// COPYRIGHT file at the top-level directory of this distribution and
// at http://pijul.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
find_min(txn, cursor)?;
let cur = cursor.current();
let mut left_page = P::right_child(cur.page.as_page(), &cur.cursor);
while left_page > 0 {
let page = txn.load_page(left_page)?;
let curs = P::cursor_first(&page);
left_page = P::left_child(page.as_page(), &curs);
cursor.push(PageCursor { page, cursor: curs });
}
// Update the reference counts, potentially freeing the page
// at the current level.
modify_rc(txn, &last_op, cursor.pointer(), cursor.first_rc_level)?;
// If the modified page is shared with another tree, we will
// clone it in the next round. Therefore, update the reference
// counts of all the references in the modified page.
if cursor.pointer() >= cursor.first_rc_level {
modify_rc(txn, &last_op)?;
}
update_root(
txn,
db,
last_op,
k0v0,
cursor.first_rc_level == 0,
&mut free,
)?;
let root_is_shared = cursor.first_rc_level == 0;
update_root(txn, db, last_op, k0v0, root_is_shared, &mut free)?;
// Finally, free all the freed pages, now that we don't need to
// read them anymore.
for (n, p) in free.iter().enumerate() {
if p[0] > 0 || p[1] > 0 {
debug!("freeing at level {:?}: {:?}", n, p);
}
// Finally, free all the pages marked as free during the deletion,
// now that we don't need to read them anymore.
for p in free.iter() {
/// Follow the leftmost pages of each page until the leftmost leaf of
/// the subtree starting at `cursor.pointer()`, and set `cursor` to
/// point to the leftmost element of that subtree.
fn find_min<
T: AllocPage + LoadPage,
K: Representable + ?Sized,
V: Representable + ?Sized,
P: BTreeMutPage<K, V> + core::fmt::Debug,
>(
txn: &mut T,
cursor: &mut Cursor<K, V, P>,
) -> Result<(), T::Error> {
let cur = cursor.current();
let mut left_page = P::right_child(cur.page.as_page(), &cur.cursor);
while left_page > 0 {
let page = txn.load_page(left_page)?;
let curs = P::cursor_first(page.as_page());
left_page = P::left_child(page.as_page(), &curs);
cursor.push(PageCursor { page, cursor: curs });
}
Ok(())
}
// In this case, we need to save the replacement. If the
// replacement has references, we will increment them later,
// when updating the page where the deletion happens.
// In this case, we need to save the replacement, and if this
// leaf is shared with another tree, we also need increment
// the replacement's references, since we are copying it.
// The middle element comes from the page above this
// concatenation, and hence has the same lifetime as that
// page. However, our choice of lifetimes can't express
// this, but we also know that we are not going to mutate
// the parent before rebalancing or merging the
// concatenation, and hence this pointer will be alive
// during these operations (i.e. during the merge or
// rebalance).
if p >= first_rc_level {
while let Some((k, v, r)) = P::next(txn, m.page.as_page(), &mut c0) {
for o in k.page_offsets().chain(v.page_offsets()) {
txn.incr_rc(o)?;
}
if left != 0 {
txn.incr_rc(left)?;
}
left = r;
while let Some((k, v, r)) = P::next(txn, m.page.as_page(), &mut c0) {
for o in k.page_offsets().chain(v.page_offsets()) {
txn.incr_rc(o)?;
// The insertions are taken care of in `handle_merge` above,
// so we can directly move to the `c1` part of the
// modification.
if m.skip_first {
P::move_next(&mut c1);
if left != 0 {
txn.incr_rc(left)?;
// If we are not updating the left child of c1's first
// element, increment that left child.
if m.l == 0 {
if left != 0 {
txn.incr_rc(left)?;
}
left = r;
}
// The insertions are taken care of in `handle_merge` above,
// so we can directly move to the `c1` part of the
// modification.
if m.skip_first {
P::move_next(&mut c1);
}
// If we are not updating the left child of c1's first
// element, increment that left child.
if m.l == 0 {
if left != 0 {
txn.incr_rc(left)?;
// If we are not updating the right child of the first
// element of `c1`, increment that right child's RC.
if let Some((k, v, r)) = P::next(txn, m.page.as_page(), &mut c1) {
for o in k.page_offsets().chain(v.page_offsets()) {
txn.incr_rc(o)?;
}
if m.r == 0 && r != 0 {
txn.incr_rc(r)?;
}
// If we are not updating the right child of the first
// element of `c1`, increment that right child's RC.
if let Some((k, v, r)) = P::next(txn, m.page.as_page(), &mut c1) {
for o in k.page_offsets().chain(v.page_offsets()) {
txn.incr_rc(o)?;
// Finally, increment the RCs of all other elements in `c1`.
while let Some((k, v, r)) = P::next(txn, m.page.as_page(), &mut c1) {
for o in k.page_offsets().chain(v.page_offsets()) {
txn.incr_rc(o)?;
}
if r != 0 {
txn.incr_rc(r)?;
}
if m.r == 0 && r != 0 {
txn.incr_rc(r)?;
}
}
// Finally, increment the RCs of all other elements in `c1`.
while let Some((k, v, r)) = P::next(txn, m.page.as_page(), &mut c1) {
for o in k.page_offsets().chain(v.page_offsets()) {
txn.incr_rc(o)?;
}
if r != 0 {
txn.incr_rc(r)?;
// We don't do this if the table is empty, in order to be
// consistent with put and drop.
debug!("single child {:?} {:?}", d, is_rc);
// We don't do this if the table is empty (i.e. if `d == 0`),
// in order to be consistent with put and drop.
if freed > 0 {
if P::is_dirty(last_op.page.as_page()) {
txn.decr_rc_owned(freed)?;
} else {
txn.decr_rc(freed)?;
}
}
free[0][0] = freed;
// If the root isn't shared with another tree, and the
// split key is different from the replacement, increment
// the RCs of the references contained in the split
// key/value.
//
// The replacement need not be incremented again, since it
// was already increment when we took it from the page in
// `leaf_delete`.
fn single_child<K: Representable+?Sized, V: Representable+?Sized, P: BTreeMutPage<K, V>>(m: &ModifiedPage<K, V, P>) -> Option<u64> {
fn single_child<K: Representable + ?Sized, V: Representable + ?Sized, P: BTreeMutPage<K, V>>(
m: &ModifiedPage<K, V, P>,
) -> Option<u64> {
}
impl<K: Representable + ?Sized, V: Representable + ?Sized, P: BTreePage<K, V>> Clone
for PageCursor<K, V, P>
{
fn clone(&self) -> Self {
PageCursor {
page: self.page,
cursor: self.cursor.clone(),
}
}
}
impl<K: Representable + ?Sized, V: Representable + ?Sized, P: BTreePage<K, V>> Copy
for PageCursor<K, V, P>
{
let mut stack = [core::mem::MaybeUninit::uninit(); N_CURSORS];
let mut stack = [
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
];
let page = current.page;
if self.first_rc_level >= N_CURSORS && txn.rc(page.offset)? >= 2 {
// let page = current.page;
if self.first_rc_level >= N_CURSORS && txn.rc(current.page.offset)? >= 2 {