places/db/tx/
coop_transaction.rs

Help
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

//! This implements "cooperative transactions" for places. It relies on our
//! decision to have exactly 1 general purpose "writer" connection and exactly
//! one "sync writer" - ie, exactly 2 write connections.
//!
//! We'll describe the implementation and strategy, but note that most callers
//! should use `PlacesDb::begin_transaction()`, which will do the right thing
//! for your db type.
//!
//! The idea is that anything that uses the sync connection should use
//! `chunked_coop_trransaction`. Code using this should regularly call
//! `maybe_commit()`, and every second, will commit the transaction and start
//! a new one.
//!
//! This means that in theory the other writable connection can start
//! transactions and should block for a max of 1 second - well under the 5
//! seconds before that other writer will fail with a SQLITE_BUSY or similar error.
//!
//! However, in practice we see the writer thread being starved - even though
//! it's waiting for a lock, the sync thread still manages to re-get the lock.
//! In other words, the locks used by sqlite aren't "fair".
//!
//! So we mitigate this with a simple approach that works fine within our
//! "exactly 2 writers" constraints:
//! * Each database connection shares a mutex.
//! * Before starting a transaction, each connection locks the mutex.
//! * It then starts an "immediate" transaction - because sqlite now holds a
//!   lock on our behalf, we release the lock on the mutex.
//!
//! In other words, the lock is held only while obtaining the DB lock, then
//! immediately released.
//!
//! The end result here is that if either connection is waiting for the
//! database lock because the other already holds it, the waiting one is
//! guaranteed to get the database lock next.
//!
//! One additional wrinkle here is that even if there was exactly one writer,
//! there's still a possibility of SQLITE_BUSY if the database is being
//! checkpointed. So we handle that case and perform exactly 1 retry.

use crate::api::places_api::ConnectionType;
use crate::db::PlacesDb;
use crate::error::*;
use parking_lot::Mutex;
use rusqlite::{Connection, TransactionBehavior};
use sql_support::{ConnExt, UncheckedTransaction};
use std::ops::Deref;
use std::time::{Duration, Instant};

impl PlacesDb {
    /// Begin a ChunkedCoopTransaction. Must be called from the
    /// sync connection, see module doc for details.
    pub(super) fn chunked_coop_trransaction(&self) -> Result<ChunkedCoopTransaction<'_>> {
        // Note: if there's actually a reason for a write conn to take this, we
        // can consider relaxing this. It's not required for correctness, just happens
        // to be the right choice for everything we expose and plan on exposing.
        assert_eq!(
            self.conn_type(),
            ConnectionType::Sync,
            "chunked_coop_trransaction must only be called by the Sync connection"
        );
        // Note that we don't allow commit_after as a param because it
        // is closely related to the timeouts configured on the database
        // itself.
        let commit_after = Duration::from_millis(1000);
        ChunkedCoopTransaction::new(self.conn(), commit_after, &self.coop_tx_lock)
    }

    /// Begin a "coop" transaction. Must be called from the write connection, see
    /// module doc for details.
    pub(super) fn coop_transaction(&self) -> Result<UncheckedTransaction<'_>> {
        // Only validate transaction types for ConnectionType::ReadWrite.
        assert_eq!(
            self.conn_type(),
            ConnectionType::ReadWrite,
            "coop_transaction must only be called on the ReadWrite connection"
        );
        let _lock = self.coop_tx_lock.lock();
        get_tx_with_retry_on_locked(self.conn())
    }
}

/// This transaction is suitable for when a transaction is used purely for
/// performance reasons rather than for data-integrity reasons, or when it's
/// used for integrity but held longer than strictly necessary for performance
/// reasons (ie, when it could be multiple transactions and still guarantee
/// integrity.) Examples of this might be for performance when updating a larger
/// number of rows, but data integrity concerns could be addressed by using
/// multiple, smaller transactions.
///
/// You should regularly call .maybe_commit() as part of your
/// processing, and if the current transaction has been open for greater than
/// some duration the transaction will be committed and another one
/// started. You should always call .commit() at the end. Note that there is
/// no .rollback() method as it will be very difficult to work out what was
/// previously committed and therefore what was rolled back - if you need to
/// explicitly roll-back, this probably isn't what you should be using. Note
/// that SQLite might rollback for its own reasons though.
///
/// Note that this can still be used for transactions which ensure data
/// integrity. For example, if you are processing a large group of items, and
/// each individual item requires multiple updates, you will probably want to
/// ensure you call .maybe_commit() after every item rather than after
/// each individual database update.
pub struct ChunkedCoopTransaction<'conn> {
    tx: UncheckedTransaction<'conn>,
    commit_after: Duration,
    coop: &'conn Mutex<()>,
}

impl<'conn> ChunkedCoopTransaction<'conn> {
    /// Begin a new transaction which may be split into multiple transactions
    /// for performance reasons. Cannot be nested, but this is not
    /// enforced - however, it is enforced by SQLite; use a rusqlite `savepoint`
    /// for nested transactions.
    pub fn new(
        conn: &'conn Connection,
        commit_after: Duration,
        coop: &'conn Mutex<()>,
    ) -> Result<Self> {
        let _lock = coop.lock();
        let tx = get_tx_with_retry_on_locked(conn)?;
        Ok(Self {
            tx,
            commit_after,
            coop,
        })
    }

    /// Returns `true` if the current transaction has been open for longer than
    /// the requested time, and should be committed; `false` otherwise. In most
    /// cases, there's no need to use this method, since `maybe_commit()` does
    /// so internally. It's exposed for consumers that need to run additional
    /// pre-commit logic, like cleaning up temp tables.
    ///
    /// If this method returns `true`, it's guaranteed that `maybe_commit()`
    /// will commit the transaction.
    #[inline]
    pub fn should_commit(&self) -> bool {
        self.tx.started_at.elapsed() >= self.commit_after
    }

    /// Checks to see if we have held a transaction for longer than the
    /// requested time, and if so, commits the current transaction and opens
    /// another.
    #[inline]
    pub fn maybe_commit(&mut self) -> Result<()> {
        if self.should_commit() {
            log::debug!("ChunkedCoopTransaction committing after taking allocated time");
            self.commit_and_start_new_tx()?;
        }
        Ok(())
    }

    fn commit_and_start_new_tx(&mut self) -> Result<()> {
        // We can't call self.tx.commit() here as it wants to consume
        // self.tx, and we can't set up the new self.tx first as then
        // we'll be trying to start a new transaction while the current
        // one is in progress. So explicitly set the finished flag on it.
        self.tx.finished = true;
        self.tx.execute_batch("COMMIT")?;
        // acquire a lock on our cooperator - if our only other writer
        // thread holds a write lock we'll block until it is released.
        // Note however that sqlite might still return a locked error if the
        // database is being checkpointed - so we still perform exactly 1 retry,
        // which we do while we have the lock, because we don't want our other
        // write connection to win this race either.
        let _lock = self.coop.lock();
        self.tx = get_tx_with_retry_on_locked(self.tx.conn)?;
        Ok(())
    }

    /// Consumes and commits a ChunkedCoopTransaction transaction.
    pub fn commit(self) -> Result<()> {
        self.tx.commit()?;
        Ok(())
    }

    /// Consumes and rolls a ChunkedCoopTransaction, but potentially only back
    /// to the last `maybe_commit`.
    pub fn rollback(self) -> Result<()> {
        self.tx.rollback()?;
        Ok(())
    }
}

impl Deref for ChunkedCoopTransaction<'_> {
    type Target = Connection;

    #[inline]
    fn deref(&self) -> &Connection {
        self.tx.conn
    }
}

impl ConnExt for ChunkedCoopTransaction<'_> {
    #[inline]
    fn conn(&self) -> &Connection {
        self
    }
}

// A helper that attempts to get an Immediate lock on the DB. If it fails with
// a "busy" or "locked" error, it does exactly 1 retry.
fn get_tx_with_retry_on_locked(conn: &Connection) -> Result<UncheckedTransaction<'_>> {
    let behavior = TransactionBehavior::Immediate;
    match UncheckedTransaction::new(conn, behavior) {
        Ok(tx) => Ok(tx),
        Err(rusqlite::Error::SqliteFailure(err, _))
            if err.code == rusqlite::ErrorCode::DatabaseBusy
                || err.code == rusqlite::ErrorCode::DatabaseLocked =>
        {
            // retry the lock - we assume that this lock request still
            // blocks for the default period, so we don't need to sleep
            // etc.
            let started_at = Instant::now();
            log::warn!("Attempting to get a read lock failed - doing one retry");
            let tx = UncheckedTransaction::new(conn, behavior).inspect_err(|_err| {
                log::warn!("Retrying the lock failed after {:?}", started_at.elapsed());
            })?;
            log::info!("Retrying the lock worked after {:?}", started_at.elapsed());
            Ok(tx)
        }
        Err(e) => Err(e.into()),
    }
}
places/db/tx/coop_transaction.rs

places/db/tx/
coop_transaction.rs