From 89ba4195bc8bb8c3c0ab2b72fafd5526a1e504f3 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 23 Dec 2022 17:08:24 +0100 Subject: [PATCH 1/4] rewrite consistency check & make it execute fixes --- Cargo.lock | 1 + Cargo.toml | 3 +- src/bin/cratesfyi.rs | 2 +- src/build_queue.rs | 2 +- src/db/mod.rs | 2 +- src/index/mod.rs | 7 +- src/utils/consistency/data.rs | 53 ++---- src/utils/consistency/db.rs | 150 ++++++++++------ src/utils/consistency/diff.rs | 275 +++++++++++++++++++--------- src/utils/consistency/index.rs | 25 ++- src/utils/consistency/mod.rs | 317 +++++++++++++++++++++++++++++---- 11 files changed, 602 insertions(+), 235 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3835f583f..db0f52958 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1444,6 +1444,7 @@ dependencies = [ "httpdate", "hyper", "indoc", + "itertools", "kuchiki", "log", "lol_html", diff --git a/Cargo.toml b/Cargo.toml index 488524dd7..7842feb35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ exclude = [ ] [features] -consistency_check = ["crates-index", "rayon"] +consistency_check = ["crates-index", "rayon", "itertools"] [dependencies] sentry = "0.29.0" @@ -67,6 +67,7 @@ zip = {version = "0.6.2", default-features = false, features = ["bzip2"]} bzip2 = "0.4.4" serde_cbor = "0.11.1" getrandom = "0.2.1" +itertools = { version = "0.10.5", optional = true} # Async tokio = { version = "1.0", features = ["rt-multi-thread", "signal", "macros"] } diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index ac5482ccf..76632affd 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -475,7 +475,7 @@ impl DatabaseSubcommand { #[cfg(feature = "consistency_check")] Self::Synchronize { dry_run } => { - docs_rs::utils::consistency::run_check(&mut *ctx.conn()?, &*ctx.index()?, dry_run)?; + docs_rs::utils::consistency::run_check(&ctx, dry_run)?; } } Ok(()) diff --git a/src/build_queue.rs b/src/build_queue.rs index a295f7854..8035772d6 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -372,7 +372,7 @@ impl BuildQueue { Ok(crates_added) } - fn set_yanked(&self, conn: &mut postgres::Client, name: &str, version: &str, yanked: bool) { + pub fn set_yanked(&self, conn: &mut postgres::Client, name: &str, version: &str, yanked: bool) { let activity = if yanked { "yanked" } else { "unyanked" }; match conn diff --git a/src/db/mod.rs b/src/db/mod.rs index 27645d1d0..6265ead5e 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -11,7 +11,7 @@ pub use self::pool::{Pool, PoolClient, PoolError}; mod add_package; pub mod blacklist; -mod delete; +pub mod delete; pub(crate) mod file; mod migrate; mod pool; diff --git a/src/index/mod.rs b/src/index/mod.rs index c98b4917a..640b999c5 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -89,12 +89,7 @@ impl Index { #[cfg(feature = "consistency_check")] pub(crate) fn crates(&self) -> Result { - use tracing::debug; - // First ensure the index is up to date, peeking will pull the latest changes without - // affecting anything else. - debug!("Updating index"); - self.diff()?.peek_changes()?; - debug!("Opening with `crates_index`"); + tracing::debug!("Opening with `crates_index`"); // crates_index requires the repo url to match the existing origin or it tries to reinitialize the repo let repo_url = self .repository_url diff --git a/src/utils/consistency/data.rs b/src/utils/consistency/data.rs index 30dcb48c8..2c1e08d06 100644 --- a/src/utils/consistency/data.rs +++ b/src/utils/consistency/data.rs @@ -1,48 +1,15 @@ -use std::{ - cmp::PartialEq, - collections::BTreeMap, - fmt::{self, Debug, Display, Formatter}, -}; - -#[derive(Default, Debug)] -pub(crate) struct Data { - pub(crate) crates: BTreeMap, -} - -#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] -pub(crate) struct CrateName(pub(crate) String); - -#[derive(Default, Debug)] -pub(crate) struct Crate { - pub(crate) releases: BTreeMap, -} - -#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Default, Debug)] -pub(crate) struct Version(pub(crate) String); - -#[derive(Default, Debug)] -pub(crate) struct Release {} - -impl PartialEq for CrateName { - fn eq(&self, other: &String) -> bool { - self.0 == *other - } +#[derive(Clone, PartialEq, Debug)] +pub(super) struct Crate { + pub(super) name: String, + pub(super) releases: Releases, } -impl PartialEq for Version { - fn eq(&self, other: &String) -> bool { - self.0 == *other - } -} +pub(super) type Crates = Vec; -impl Display for CrateName { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - Display::fmt(&self.0, f) - } -} +pub(super) type Releases = Vec; -impl Display for Version { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - Display::fmt(&self.0, f) - } +#[derive(Clone, Debug, PartialEq)] +pub(super) struct Release { + pub(super) version: String, + pub(super) yanked: Option, } diff --git a/src/utils/consistency/db.rs b/src/utils/consistency/db.rs index 1df11ced4..d72e10844 100644 --- a/src/utils/consistency/db.rs +++ b/src/utils/consistency/db.rs @@ -1,60 +1,108 @@ -use super::data::{Crate, CrateName, Data, Release, Version}; -use std::collections::BTreeMap; - -pub(crate) fn load(conn: &mut postgres::Client) -> Result { - let rows = conn.query( - " - SELECT - crates.name, - releases.version - FROM crates - INNER JOIN releases ON releases.crate_id = crates.id - ORDER BY crates.id, releases.id - ", - &[], +use super::data::{Crate, Crates, Release, Releases}; +use crate::Config; +use anyhow::Result; +use itertools::Itertools; +use postgres::fallible_iterator::FallibleIterator; +use std::iter; + +pub(super) fn load(conn: &mut postgres::Client, config: &Config) -> Result { + let rows = conn.query_raw( + "SELECT name, version, yanked + FROM ( + SELECT + crates.name, + releases.version, + releases.yanked + FROM crates + INNER JOIN releases ON releases.crate_id = crates.id + UNION ALL + -- crates & releases that are already queued + -- don't have to be requeued. + SELECT queue.name, queue.version, NULL as yanked + FROM queue + LEFT OUTER JOIN crates ON crates.name = queue.name + LEFT OUTER JOIN releases ON ( + releases.crate_id = crates.id AND + releases.version = queue.version + ) + WHERE queue.attempt < $1 AND ( + crates.id IS NULL OR + releases.id IS NULL + ) + ) AS inp + ORDER BY name, version", + iter::once(config.build_attempts as i32), )?; - let mut data = Data { - crates: BTreeMap::new(), - }; + let mut crates = Crates::new(); - let mut rows = rows.iter(); + for (crate_name, release_rows) in &rows + // `rows` is a `FallibleIterator` which needs to be converted before + // we can use Itertools.group_by on it. + .iterator() + .map(|row| row.expect("error fetching rows")) + .into_iter() + .group_by(|row| row.get("name")) + { + let releases: Releases = release_rows + .map(|row| Release { + version: row.get("version"), + yanked: row.get("yanked"), + }) + .collect(); - struct Current { - name: CrateName, - krate: Crate, + crates.push(Crate { + name: crate_name, + releases, + }); } - let mut current = if let Some(row) = rows.next() { - Current { - name: CrateName(row.get("name")), - krate: Crate { - releases: { - let mut releases = BTreeMap::new(); - releases.insert(Version(row.get("version")), Release {}); - releases - }, - }, - } - } else { - return Ok(data); - }; - - for row in rows { - let name = row.get("name"); - if current.name != name { - data.crates.insert( - std::mem::replace(&mut current.name, CrateName(name)), - std::mem::take(&mut current.krate), - ); - } - current - .krate - .releases - .insert(Version(row.get("version")), Release::default()); - } + Ok(crates) +} - data.crates.insert(current.name, current.krate); +#[cfg(test)] +mod tests { + use super::*; + use super::{Crate, Release}; + use crate::test::wrapper; - Ok(data) + #[test] + fn test_load() { + wrapper(|env| { + env.build_queue().add_crate("queued", "0.0.1", 0, None)?; + env.fake_release().name("krate").version("0.0.2").create()?; + env.fake_release() + .name("krate") + .version("0.0.3") + .yanked(true) + .create()?; + + assert_eq!( + load(&mut env.db().conn(), &env.config())?, + vec![ + Crate { + name: "krate".into(), + releases: vec![ + Release { + version: "0.0.2".into(), + yanked: Some(false), + }, + Release { + version: "0.0.3".into(), + yanked: Some(true), + } + ] + }, + Crate { + name: "queued".into(), + releases: vec![Release { + version: "0.0.1".into(), + yanked: None, + }] + }, + ] + ); + Ok(()) + }) + } } diff --git a/src/utils/consistency/diff.rs b/src/utils/consistency/diff.rs index 7ba1b9fae..ffa4d4d98 100644 --- a/src/utils/consistency/diff.rs +++ b/src/utils/consistency/diff.rs @@ -1,108 +1,211 @@ -use super::data::{Crate, CrateName, Data, Release, Version}; -use std::{ - cmp::Ordering, - collections::{btree_map::IntoIter, BTreeMap}, - fmt::Debug, - iter::Peekable, -}; - -#[derive(Debug)] -pub(crate) struct DataDiff { - pub(crate) crates: DiffMap, -} - -#[derive(Debug)] -pub(crate) struct CrateDiff { - pub(crate) releases: DiffMap, -} - -#[derive(Debug)] -pub(crate) struct ReleaseDiff {} - -pub(crate) enum Diff { - Both(Key, Value::Diff), - Left(Key, Value), - Right(Key, Value), -} - -pub(crate) trait Diffable { - type Diff; +use std::fmt::Display; - fn diff(self, other: Self) -> Self::Diff; -} +use super::data::Crate; +use itertools::{ + EitherOrBoth::{Both, Left, Right}, + Itertools, +}; -#[derive(Debug)] -pub(crate) struct DiffMap { - left: Peekable>, - right: Peekable>, +#[derive(Debug, PartialEq)] +pub(super) enum Difference { + CrateNotInIndex(String), + CrateNotInDb(String, Vec), + ReleaseNotInIndex(String, String), + ReleaseNotInDb(String, String), + ReleaseYank(String, String, bool), } -impl DiffMap { - fn new(left: BTreeMap, right: BTreeMap) -> Self { - Self { - left: left.into_iter().peekable(), - right: right.into_iter().peekable(), +impl Display for Difference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Difference::CrateNotInIndex(name) => { + write!(f, "Crate in db not in index: {name}")?; + } + Difference::CrateNotInDb(name, _versions) => { + write!(f, "Crate in index not in db: {name}")?; + } + Difference::ReleaseNotInIndex(name, version) => { + write!(f, "Release in db not in index: {name} {version}")?; + } + Difference::ReleaseNotInDb(name, version) => { + write!(f, "Release in index not in db: {name} {version}")?; + } + Difference::ReleaseYank(name, version, yanked) => { + write!( + f, + "release yanked difference, index yanked:{yanked}, release: {name} {version}", + )?; + } } + Ok(()) } } -impl Iterator for DiffMap { - type Item = Diff; - - fn next(&mut self) -> Option { - match (self.left.peek(), self.right.peek()) { - (Some((left, _)), Some((right, _))) => match left.cmp(right) { - Ordering::Less => { - let (key, value) = self.left.next().unwrap(); - Some(Diff::Left(key, value)) - } - Ordering::Equal => { - let (key, left) = self.left.next().unwrap(); - let (_, right) = self.right.next().unwrap(); - Some(Diff::Both(key, left.diff(right))) +pub(super) fn calculcate_diff<'a, I>(db_data: I, index_data: I) -> Vec +where + I: Iterator, +{ + let mut result = Vec::new(); + + for crates_diff in db_data.merge_join_by(index_data, |db, index| db.name.cmp(&index.name)) { + match crates_diff { + Both(db_crate, index_crate) => { + for release_diff in db_crate + .releases + .iter() + .merge_join_by(index_crate.releases.iter(), |db_release, index_release| { + db_release.version.cmp(&index_release.version) + }) + { + match release_diff { + Both(db_release, index_release) => { + let index_yanked = + index_release.yanked.expect("index always has yanked-state"); + // if `db_release.yanked` is `None`, the record + // is coming from the build queue, not the `releases` + // table. + // In this case, we skip this check. + if let Some(db_yanked) = db_release.yanked { + if db_yanked != index_yanked { + result.push(Difference::ReleaseYank( + db_crate.name.clone(), + db_release.version.clone(), + index_yanked, + )); + } + } + } + Left(db_release) => result.push(Difference::ReleaseNotInIndex( + db_crate.name.clone(), + db_release.version.clone(), + )), + Right(index_release) => result.push(Difference::ReleaseNotInDb( + index_crate.name.clone(), + index_release.version.clone(), + )), + } } - Ordering::Greater => { - let (key, value) = self.right.next().unwrap(); - Some(Diff::Right(key, value)) - } - }, - (Some((_, _)), None) => { - let (key, value) = self.left.next().unwrap(); - Some(Diff::Left(key, value)) - } - (None, Some((_, _))) => { - let (key, value) = self.right.next().unwrap(); - Some(Diff::Right(key, value)) } - (None, None) => None, - } + Left(db_crate) => result.push(Difference::CrateNotInIndex(db_crate.name.clone())), + Right(index_crate) => result.push(Difference::CrateNotInDb( + index_crate.name.clone(), + index_crate + .releases + .iter() + .map(|r| r.version.clone()) + .collect(), + )), + }; } + + result } -impl Diffable for Data { - type Diff = DataDiff; +#[cfg(test)] +mod tests { + use super::super::data::Release; + use super::*; + use std::iter; - fn diff(self, other: Self) -> Self::Diff { - DataDiff { - crates: DiffMap::new(self.crates, other.crates), - } + #[test] + fn test_empty() { + assert!(calculcate_diff(iter::empty(), iter::empty()).is_empty()); } -} -impl Diffable for Crate { - type Diff = CrateDiff; + #[test] + fn test_crate_not_in_index() { + let db_releases = vec![Crate { + name: "krate".into(), + releases: vec![], + }]; + + assert_eq!( + calculcate_diff(db_releases.iter(), vec![].iter()), + vec![Difference::CrateNotInIndex("krate".into())] + ); + } - fn diff(self, other: Self) -> Self::Diff { - CrateDiff { - releases: DiffMap::new(self.releases, other.releases), - } + #[test] + fn test_crate_not_in_db() { + let index_releases = vec![Crate { + name: "krate".into(), + releases: vec![ + Release { + version: "0.0.2".into(), + yanked: Some(false), + }, + Release { + version: "0.0.3".into(), + yanked: Some(true), + }, + ], + }]; + + assert_eq!( + calculcate_diff(vec![].iter(), index_releases.iter()), + vec![Difference::CrateNotInDb( + "krate".into(), + vec!["0.0.2".into(), "0.0.3".into()] + )] + ); } -} -impl Diffable for Release { - type Diff = ReleaseDiff; + #[test] + fn test_yank_diff() { + let db_releases = vec![Crate { + name: "krate".into(), + releases: vec![ + Release { + version: "0.0.2".into(), + yanked: Some(true), + }, + Release { + version: "0.0.3".into(), + yanked: Some(true), + }, + ], + }]; + let index_releases = vec![Crate { + name: "krate".into(), + releases: vec![ + Release { + version: "0.0.2".into(), + yanked: Some(false), + }, + Release { + version: "0.0.3".into(), + yanked: Some(true), + }, + ], + }]; + + assert_eq!( + calculcate_diff(db_releases.iter(), index_releases.iter()), + vec![Difference::ReleaseYank( + "krate".into(), + "0.0.2".into(), + false, + )] + ); + } - fn diff(self, _other: Self) -> Self::Diff { - ReleaseDiff {} + #[test] + fn test_yank_diff_without_db_data() { + let db_releases = vec![Crate { + name: "krate".into(), + releases: vec![Release { + version: "0.0.2".into(), + yanked: None, + }], + }]; + let index_releases = vec![Crate { + name: "krate".into(), + releases: vec![Release { + version: "0.0.2".into(), + yanked: Some(false), + }], + }]; + + assert!(calculcate_diff(db_releases.iter(), index_releases.iter()).is_empty()); } } diff --git a/src/utils/consistency/index.rs b/src/utils/consistency/index.rs index 0d0f1340c..a8a5ac9ff 100644 --- a/src/utils/consistency/index.rs +++ b/src/utils/consistency/index.rs @@ -1,22 +1,33 @@ -use super::data::{Crate, CrateName, Data, Release, Version}; +use super::data::{Crate, Crates, Release, Releases}; use crate::Index; +use anyhow::Result; use rayon::iter::ParallelIterator; -pub(crate) fn load(index: &Index) -> Result { - let crates = index +pub(super) fn load(index: &Index) -> Result { + let mut result: Crates = index .crates()? .crates_parallel() .map(|krate| { krate.map(|krate| { - let releases = krate + let mut releases: Releases = krate .versions() .iter() - .map(|version| (Version(version.version().into()), Release::default())) + .map(|version| Release { + version: version.version().into(), + yanked: Some(version.is_yanked()), + }) .collect(); - (CrateName(krate.name().into()), Crate { releases }) + releases.sort_by(|lhs, rhs| lhs.version.cmp(&rhs.version)); + + Crate { + name: krate.name().into(), + releases, + } }) }) .collect::>()?; - Ok(Data { crates }) + result.sort_by(|lhs, rhs| lhs.name.cmp(&rhs.name)); + + Ok(result) } diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs index 8415d227c..4acac62f3 100644 --- a/src/utils/consistency/mod.rs +++ b/src/utils/consistency/mod.rs @@ -1,6 +1,6 @@ -use self::diff::{Diff, Diffable}; -use crate::Index; -use anyhow::Context; +use crate::{build_queue, db::delete, Context}; +use anyhow::{Context as _, Result}; +use itertools::Itertools; use tracing::info; mod data; @@ -8,52 +8,293 @@ mod db; mod diff; mod index; -pub fn run_check( - conn: &mut postgres::Client, - index: &Index, - dry_run: bool, -) -> Result<(), anyhow::Error> { - if !dry_run { - anyhow::bail!("TODO: only a --dry-run synchronization is supported currently"); - } +const BUILD_PRIORITY: i32 = 15; + +/// consistency check +/// +/// will compare our database with the local crates.io index and +/// apply any changes that we find in the index but not our database. +/// +/// Differences that we check for, and the activivies: +/// * release in index, but not our DB => queue a build for this release. +/// * crate in index, but not in our DB => queue builds for all versions of that crate. +/// * release in DB, but not in the index => delete the release from our DB & storage. +/// * crate in our DB, but not in the index => delete the whole crate from our DB & storage. +/// * different yank-state between DB & Index => update the yank-state in our DB +/// +/// Even when activities fail, the command can just be re-run. While the diff calculation will +/// be repeated, we won't we-execute fixing activities. +pub fn run_check(ctx: &dyn Context, dry_run: bool) -> Result<()> { + let mut conn = ctx.pool()?.get()?; + let index = ctx.index()?; info!("Loading data from database..."); - let timer = std::time::Instant::now(); - let db_data = - self::db::load(conn).context("Loading crate data from database for consistency check")?; - info!("...loaded in {:?}", timer.elapsed()); + let db_data = db::load(&mut conn, &*ctx.config()?) + .context("Loading crate data from database for consistency check")?; tracing::info!("Loading data from index..."); - let timer = std::time::Instant::now(); let index_data = - self::index::load(index).context("Loading crate data from index for consistency check")?; - info!("...loaded in {:?}", timer.elapsed()); - - let diff = db_data.diff(index_data); - - for krate in diff.crates { - match krate { - Diff::Both(name, diff) => { - for release in diff.releases { - match release { - Diff::Both(_, _) => {} - Diff::Left(version, _) => { - println!("Release in db not in index: {} {}", name, version); - } - Diff::Right(version, _) => { - println!("Release in index not in db: {} {}", name, version); - } + index::load(&index).context("Loading crate data from index for consistency check")?; + + let diff = diff::calculcate_diff(db_data.iter(), index_data.iter()); + let result = handle_diff(ctx, diff.iter(), dry_run)?; + + println!("============"); + println!("SUMMARY"); + println!("============"); + println!("difference found:"); + for (key, count) in diff.iter().counts_by(|el| match el { + diff::Difference::CrateNotInIndex(_) => "CrateNotInIndex", + diff::Difference::CrateNotInDb(_, _) => "CrateNotInDb", + diff::Difference::ReleaseNotInIndex(_, _) => "ReleaseNotInIndex", + diff::Difference::ReleaseNotInDb(_, _) => "ReleaseNotInDb", + diff::Difference::ReleaseYank(_, _, _) => "ReleaseYank", + }) { + println!("{} => {}", key, count); + } + + println!("============"); + if dry_run { + println!("activities that would have been triggered:"); + } else { + println!("activities triggered:"); + } + println!("builds queued: {}", result.builds_queued); + println!("crates deleted: {}", result.crates_deleted); + println!("releases deleted: {}", result.releases_deleted); + println!("yanks corrected: {}", result.yanks_corrected); + + Ok(()) +} + +#[derive(Default)] +struct HandleResult { + builds_queued: u32, + crates_deleted: u32, + releases_deleted: u32, + yanks_corrected: u32, +} + +fn handle_diff<'a, I>(ctx: &dyn Context, iter: I, dry_run: bool) -> Result +where + I: Iterator, +{ + let mut result = HandleResult::default(); + + let mut conn = ctx.pool()?.get()?; + let storage = ctx.storage()?; + let config = ctx.config()?; + let build_queue = ctx.build_queue()?; + + for difference in iter { + println!("{difference}"); + + match difference { + diff::Difference::CrateNotInIndex(name) => { + if !dry_run { + delete::delete_crate(&mut conn, &storage, &config, name)?; + } + result.crates_deleted += 1; + } + diff::Difference::CrateNotInDb(name, versions) => { + for version in versions { + if !dry_run { + build_queue.add_crate(name, version, BUILD_PRIORITY, None)?; } + result.builds_queued += 1; } } - Diff::Left(name, _) => { - println!("Crate in db not in index: {}", name); + diff::Difference::ReleaseNotInIndex(name, version) => { + if !dry_run { + delete::delete_version(ctx, name, version)?; + } + result.releases_deleted += 1; } - Diff::Right(name, _) => { - println!("Crate in index not in db: {}", name); + diff::Difference::ReleaseNotInDb(name, version) => { + if !dry_run { + build_queue.add_crate(name, version, BUILD_PRIORITY, None)?; + } + result.builds_queued += 1; + } + diff::Difference::ReleaseYank(name, version, yanked) => { + if !dry_run { + build_queue::set_yanked(&mut conn, name, version, *yanked); + } + result.yanks_corrected += 1; } } } - Ok(()) + Ok(result) +} + +#[cfg(test)] +mod tests { + use postgres_types::FromSql; + + use super::diff::Difference; + use super::*; + use crate::test::{wrapper, TestEnvironment}; + + fn count(env: &TestEnvironment, sql: &str) -> Result { + Ok(env.db().conn().query_one(sql, &[])?.get::<_, i64>(0)) + } + + fn single_row(env: &TestEnvironment, sql: &str) -> Result> + where + T: for<'a> FromSql<'a>, + { + Ok(env + .db() + .conn() + .query(sql, &[])? + .iter() + .map(|row| row.get::<_, T>(0)) + .collect()) + } + + #[test] + fn test_delete_crate() { + wrapper(|env| { + env.fake_release() + .name("krate") + .version("0.1.1") + .version("0.1.2") + .create()?; + + let diff = vec![Difference::CrateNotInIndex("krate".into())]; + + // calling with dry-run leads to no change + handle_diff(env, diff.iter(), true)?; + + assert_eq!( + count(env, "SELECT count(*) FROM crates WHERE name = 'krate'")?, + 1 + ); + + // without dry-run the crate will be deleted + handle_diff(env, diff.iter(), false)?; + + assert_eq!( + count(env, "SELECT count(*) FROM crates WHERE name = 'krate'")?, + 0 + ); + + Ok(()) + }) + } + + #[test] + fn test_delete_release() { + wrapper(|env| { + env.fake_release().name("krate").version("0.1.1").create()?; + env.fake_release().name("krate").version("0.1.2").create()?; + + let diff = vec![Difference::ReleaseNotInIndex( + "krate".into(), + "0.1.1".into(), + )]; + + assert_eq!(count(env, "SELECT count(*) FROM releases")?, 2); + + handle_diff(env, diff.iter(), true)?; + + assert_eq!(count(env, "SELECT count(*) FROM releases")?, 2); + + handle_diff(env, diff.iter(), false)?; + + assert_eq!( + single_row::(env, "SELECT version FROM releases")?, + vec!["0.1.2"] + ); + + Ok(()) + }) + } + + #[test] + fn test_wrong_yank() { + wrapper(|env| { + env.fake_release() + .name("krate") + .version("0.1.1") + .yanked(true) + .create()?; + + let diff = vec![Difference::ReleaseYank( + "krate".into(), + "0.1.1".into(), + false, + )]; + + handle_diff(env, diff.iter(), true)?; + + assert_eq!( + single_row::(env, "SELECT yanked FROM releases")?, + vec![true] + ); + + handle_diff(env, diff.iter(), false)?; + + assert_eq!( + single_row::(env, "SELECT yanked FROM releases")?, + vec![false] + ); + + Ok(()) + }) + } + + #[test] + fn test_missing_release_in_db() { + wrapper(|env| { + let diff = vec![Difference::ReleaseNotInDb("krate".into(), "0.1.1".into())]; + + handle_diff(env, diff.iter(), true)?; + + let build_queue = env.build_queue(); + + assert!(build_queue.queued_crates()?.is_empty()); + + handle_diff(env, diff.iter(), false)?; + + assert_eq!( + build_queue + .queued_crates()? + .iter() + .map(|c| (c.name.as_str(), c.version.as_str(), c.priority)) + .collect::>(), + vec![("krate", "0.1.1", 15)] + ); + Ok(()) + }) + } + + #[test] + fn test_missing_crate_in_db() { + wrapper(|env| { + let diff = vec![Difference::CrateNotInDb( + "krate".into(), + vec!["0.1.1".into(), "0.1.2".into()], + )]; + + handle_diff(env, diff.iter(), true)?; + + let build_queue = env.build_queue(); + + assert!(build_queue.queued_crates()?.is_empty()); + + handle_diff(env, diff.iter(), false)?; + + assert_eq!( + build_queue + .queued_crates()? + .iter() + .map(|c| (c.name.as_str(), c.version.as_str(), c.priority)) + .collect::>(), + vec![("krate", "0.1.1", 15), ("krate", "0.1.2", 15)] + ); + Ok(()) + }) + } } From 4b2d36677dc053d492fbf93da57f9872ef2e08e3 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 9 Jan 2023 20:43:06 +0100 Subject: [PATCH 2/4] fix various typos --- src/utils/consistency/diff.rs | 12 ++++++------ src/utils/consistency/mod.rs | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/utils/consistency/diff.rs b/src/utils/consistency/diff.rs index ffa4d4d98..0bb9c54cb 100644 --- a/src/utils/consistency/diff.rs +++ b/src/utils/consistency/diff.rs @@ -41,7 +41,7 @@ impl Display for Difference { } } -pub(super) fn calculcate_diff<'a, I>(db_data: I, index_data: I) -> Vec +pub(super) fn calculate_diff<'a, I>(db_data: I, index_data: I) -> Vec where I: Iterator, { @@ -109,7 +109,7 @@ mod tests { #[test] fn test_empty() { - assert!(calculcate_diff(iter::empty(), iter::empty()).is_empty()); + assert!(calculate_diff(iter::empty(), iter::empty()).is_empty()); } #[test] @@ -120,7 +120,7 @@ mod tests { }]; assert_eq!( - calculcate_diff(db_releases.iter(), vec![].iter()), + calculate_diff(db_releases.iter(), vec![].iter()), vec![Difference::CrateNotInIndex("krate".into())] ); } @@ -142,7 +142,7 @@ mod tests { }]; assert_eq!( - calculcate_diff(vec![].iter(), index_releases.iter()), + calculate_diff(vec![].iter(), index_releases.iter()), vec![Difference::CrateNotInDb( "krate".into(), vec!["0.0.2".into(), "0.0.3".into()] @@ -180,7 +180,7 @@ mod tests { }]; assert_eq!( - calculcate_diff(db_releases.iter(), index_releases.iter()), + calculate_diff(db_releases.iter(), index_releases.iter()), vec![Difference::ReleaseYank( "krate".into(), "0.0.2".into(), @@ -206,6 +206,6 @@ mod tests { }], }]; - assert!(calculcate_diff(db_releases.iter(), index_releases.iter()).is_empty()); + assert!(calculate_diff(db_releases.iter(), index_releases.iter()).is_empty()); } } diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs index 4acac62f3..4bb8b964a 100644 --- a/src/utils/consistency/mod.rs +++ b/src/utils/consistency/mod.rs @@ -15,7 +15,7 @@ const BUILD_PRIORITY: i32 = 15; /// will compare our database with the local crates.io index and /// apply any changes that we find in the index but not our database. /// -/// Differences that we check for, and the activivies: +/// Differences that we check for, and the activities: /// * release in index, but not our DB => queue a build for this release. /// * crate in index, but not in our DB => queue builds for all versions of that crate. /// * release in DB, but not in the index => delete the release from our DB & storage. @@ -23,7 +23,7 @@ const BUILD_PRIORITY: i32 = 15; /// * different yank-state between DB & Index => update the yank-state in our DB /// /// Even when activities fail, the command can just be re-run. While the diff calculation will -/// be repeated, we won't we-execute fixing activities. +/// be repeated, we won't re-execute fixing activities. pub fn run_check(ctx: &dyn Context, dry_run: bool) -> Result<()> { let mut conn = ctx.pool()?.get()?; let index = ctx.index()?; @@ -36,7 +36,7 @@ pub fn run_check(ctx: &dyn Context, dry_run: bool) -> Result<()> { let index_data = index::load(&index).context("Loading crate data from index for consistency check")?; - let diff = diff::calculcate_diff(db_data.iter(), index_data.iter()); + let diff = diff::calculate_diff(db_data.iter(), index_data.iter()); let result = handle_diff(ctx, diff.iter(), dry_run)?; println!("============"); From a56e1ee4094ed236bbdc3ae3aad73036d53f49a9 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 9 Jan 2023 20:45:10 +0100 Subject: [PATCH 3/4] consistency-check: align command output --- src/utils/consistency/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs index 4bb8b964a..961747955 100644 --- a/src/utils/consistency/mod.rs +++ b/src/utils/consistency/mod.rs @@ -50,7 +50,7 @@ pub fn run_check(ctx: &dyn Context, dry_run: bool) -> Result<()> { diff::Difference::ReleaseNotInDb(_, _) => "ReleaseNotInDb", diff::Difference::ReleaseYank(_, _, _) => "ReleaseYank", }) { - println!("{} => {}", key, count); + println!("{:17} => {:4}", key, count); } println!("============"); @@ -59,10 +59,10 @@ pub fn run_check(ctx: &dyn Context, dry_run: bool) -> Result<()> { } else { println!("activities triggered:"); } - println!("builds queued: {}", result.builds_queued); - println!("crates deleted: {}", result.crates_deleted); - println!("releases deleted: {}", result.releases_deleted); - println!("yanks corrected: {}", result.yanks_corrected); + println!("builds queued: {:4}", result.builds_queued); + println!("crates deleted: {:4}", result.crates_deleted); + println!("releases deleted: {:4}", result.releases_deleted); + println!("yanks corrected: {:4}", result.yanks_corrected); Ok(()) } From 2eda2180231a411c3299bd805d7fdb08cbe6faa7 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 9 Jan 2023 21:06:42 +0100 Subject: [PATCH 4/4] consistency: warn & continue when errors happen while fixing inconsistencies --- Cargo.toml | 2 +- src/build_queue.rs | 87 +++++++++++++++++++----------------- src/db/delete.rs | 3 ++ src/utils/consistency/mod.rs | 23 +++++++--- 4 files changed, 66 insertions(+), 49 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7842feb35..8141d88a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,6 +97,7 @@ percent-encoding = "2.2.0" # NOTE: if you change this, also double-check that the comment in `queue_builder::remove_tempdirs` is still accurate. tempfile = "3.1.0" +fn-error-context = "0.2.0" # Templating tera = { version = "1.5.0", features = ["builtins"] } @@ -122,7 +123,6 @@ kuchiki = "0.8" rand = "0.8" mockito = "0.31.0" test-case = "2.0.0" -fn-error-context = "0.2.0" aws-smithy-client = { version = "0.53.0", features = ["test-util"]} aws-smithy-http = "0.53.0" indoc = "1.0.7" diff --git a/src/build_queue.rs b/src/build_queue.rs index 8035772d6..8ab56f3bd 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -6,6 +6,7 @@ use crate::storage::Storage; use crate::utils::{get_config, get_crate_priority, report_error, set_config, ConfigName}; use crate::{Config, Index, Metrics, RustwideBuilder}; use anyhow::Context; +use fn_error_context::context; use tracing::{debug, error, info, warn}; @@ -69,6 +70,7 @@ impl BuildQueue { Ok(()) } + #[context("error trying to add {name}-{version} to build queue")] pub fn add_crate( &self, name: &str, @@ -342,15 +344,16 @@ impl BuildQueue { let yanked = change.yanked(); let unyanked = change.unyanked(); if let Some(release) = yanked.or(unyanked) { - // FIXME: https://github.com/rust-lang/docs.rs/issues/1934 - // we sometimes have inconsistencies between our yank-state and - // the crates.io yank state, and we don't know why yet. - self.set_yanked( + // FIXME: delay yanks of crates that have not yet finished building + // https://github.com/rust-lang/docs.rs/issues/1934 + if let Err(err) = self.set_yanked( &mut conn, release.name.as_str(), release.version.as_str(), yanked.is_some(), - ); + ) { + report_error(&err); + } if let Err(err) = cdn::queue_crate_invalidation(&mut *conn, &self.config, &release.name) @@ -372,48 +375,48 @@ impl BuildQueue { Ok(crates_added) } - pub fn set_yanked(&self, conn: &mut postgres::Client, name: &str, version: &str, yanked: bool) { + #[context("error trying to set {name}-{version} to yanked: {yanked}")] + pub fn set_yanked( + &self, + conn: &mut postgres::Client, + name: &str, + version: &str, + yanked: bool, + ) -> Result<()> { let activity = if yanked { "yanked" } else { "unyanked" }; - match conn - .execute( - "UPDATE releases - SET yanked = $3 - FROM crates - WHERE crates.id = releases.crate_id - AND name = $1 - AND version = $2 - ", - &[&name, &version, &yanked], - ) - .with_context(|| format!("error while setting {}-{} to {}", name, version, activity)) - { - Ok(rows) => { - if rows != 1 { - match self - .has_build_queued(name, version) - .context("error trying to fetch build queue") - { - Ok(false) => { - // the rustwide builder will fetch the current yank state from - // crates.io, so and missed update here will be fixed after the - // build is finished. - error!( - "tried to yank or unyank non-existing release: {} {}", - name, version - ); - } - Ok(true) => {} - Err(err) => { - report_error(&err); - } - } - } else { - debug!("{}-{} {}", name, version, activity); + let rows = conn.execute( + "UPDATE releases + SET yanked = $3 + FROM crates + WHERE crates.id = releases.crate_id + AND name = $1 + AND version = $2", + &[&name, &version, &yanked], + )?; + if rows != 1 { + match self + .has_build_queued(name, version) + .context("error trying to fetch build queue") + { + Ok(false) => { + // the rustwide builder will fetch the current yank state from + // crates.io, so and missed update here will be fixed after the + // build is finished. + error!( + "tried to yank or unyank non-existing release: {} {}", + name, version + ); + } + Ok(true) => {} + Err(err) => { + report_error(&err); } } - Err(err) => report_error(&err), + } else { + debug!("{}-{} {}", name, version, activity); } + Ok(()) } /// Builds the top package from the queue. Returns whether there was a package in the queue. diff --git a/src/db/delete.rs b/src/db/delete.rs index 88bd890ac..5a6ff257b 100644 --- a/src/db/delete.rs +++ b/src/db/delete.rs @@ -2,6 +2,7 @@ use crate::error::Result; use crate::storage::{rustdoc_archive_path, source_archive_path, Storage}; use crate::{Config, Context}; use anyhow::Context as _; +use fn_error_context::context; use postgres::Client; use std::fs; @@ -16,6 +17,7 @@ enum CrateDeletionError { MissingCrate(String), } +#[context("error trying to delete crate {name} from database")] pub fn delete_crate( conn: &mut Client, storage: &Storage, @@ -52,6 +54,7 @@ pub fn delete_crate( Ok(()) } +#[context("error trying to delete release {name}-{version} from database")] pub fn delete_version(ctx: &dyn Context, name: &str, version: &str) -> Result<()> { let conn = &mut ctx.pool()?.get()?; let storage = ctx.storage()?; diff --git a/src/utils/consistency/mod.rs b/src/utils/consistency/mod.rs index 961747955..492da50bb 100644 --- a/src/utils/consistency/mod.rs +++ b/src/utils/consistency/mod.rs @@ -1,7 +1,7 @@ use crate::{build_queue, db::delete, Context}; use anyhow::{Context as _, Result}; use itertools::Itertools; -use tracing::info; +use tracing::{info, warn}; mod data; mod db; @@ -92,33 +92,44 @@ where match difference { diff::Difference::CrateNotInIndex(name) => { if !dry_run { - delete::delete_crate(&mut conn, &storage, &config, name)?; + if let Err(err) = delete::delete_crate(&mut conn, &storage, &config, name) { + warn!("{:?}", err); + } } result.crates_deleted += 1; } diff::Difference::CrateNotInDb(name, versions) => { for version in versions { if !dry_run { - build_queue.add_crate(name, version, BUILD_PRIORITY, None)?; + if let Err(err) = build_queue.add_crate(name, version, BUILD_PRIORITY, None) + { + warn!("{:?}", err); + } } result.builds_queued += 1; } } diff::Difference::ReleaseNotInIndex(name, version) => { if !dry_run { - delete::delete_version(ctx, name, version)?; + if let Err(err) = delete::delete_version(ctx, name, version) { + warn!("{:?}", err); + } } result.releases_deleted += 1; } diff::Difference::ReleaseNotInDb(name, version) => { if !dry_run { - build_queue.add_crate(name, version, BUILD_PRIORITY, None)?; + if let Err(err) = build_queue.add_crate(name, version, BUILD_PRIORITY, None) { + warn!("{:?}", err); + } } result.builds_queued += 1; } diff::Difference::ReleaseYank(name, version, yanked) => { if !dry_run { - build_queue::set_yanked(&mut conn, name, version, *yanked); + if let Err(err) = build_queue::set_yanked(&mut conn, name, version, *yanked) { + warn!("{:?}", err); + } } result.yanks_corrected += 1; }