diff --git a/.env.sample b/.env.sample index 7cf054776..93ed7c946 100644 --- a/.env.sample +++ b/.env.sample @@ -1,5 +1,3 @@ -CRATESFYI_GITHUB_USERNAME= -CRATESFYI_GITHUB_ACCESSTOKEN= CRATESFYI_PREFIX=ignored/cratesfyi-prefix CRATESFYI_DATABASE_URL=postgresql://cratesfyi:password@localhost RUST_LOG=cratesfyi,rustwide=info diff --git a/Cargo.lock b/Cargo.lock index bcb141a53..3f4a0b390 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -364,6 +364,7 @@ dependencies = [ "arc-swap 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "backtrace 0.3.48 (registry+https://github.com/rust-lang/crates.io-index)", "badge 0.2.0", + "base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", "comrak 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "crates-index-diff 7.0.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -404,6 +405,7 @@ dependencies = [ "slug 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "staticfile 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", + "strum 0.18.0 (registry+https://github.com/rust-lang/crates.io-index)", "systemstat 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tera 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3132,6 +3134,25 @@ dependencies = [ "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "strum" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "strum_macros 0.18.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "strum_macros" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "syn" version = "0.11.11" @@ -4373,6 +4394,8 @@ dependencies = [ "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef" "checksum structopt-derive 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "d239ca4b13aee7a2142e6795cbd69e457665ff8037aed33b3effdc430d2f927a" +"checksum strum 0.18.0 (registry+https://github.com/rust-lang/crates.io-index)" = "57bd81eb48f4c437cadc685403cad539345bf703d78e63707418431cecd4522b" +"checksum strum_macros 0.18.0 (registry+https://github.com/rust-lang/crates.io-index)" = "87c85aa3f8ea653bfd3ddf25f7ee357ee4d204731f6aa9ad04002306f6e2774c" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" "checksum syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "1425de3c33b0941002740a420b1a906a350b88d08b82b2c8a01035a3f9447bac" "checksum syn-mid 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a" diff --git a/Cargo.toml b/Cargo.toml index 27d31fe65..1526c1b73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,8 @@ zstd = "0.5" git2 = { version = "0.13.6", default-features = false } path-slash = "0.1.3" once_cell = { version = "1.4.0", features = ["parking_lot"] } +base64 = "0.12.1" +strum = { version = "0.18.0", features = ["derive"] } # Data serialization and deserialization serde = { version = "1.0", features = ["derive"] } diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index 6e3c54f7b..d5f79bba9 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -79,4 +79,4 @@ COPY dockerfiles/entrypoint.sh /opt/docsrs/ WORKDIR /opt/docsrs ENTRYPOINT ["/opt/docsrs/entrypoint.sh"] -CMD ["start-web-server"] +CMD ["daemon", "--registry-watcher=disabled"] diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index e5e311936..abf2b2da9 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -8,6 +8,7 @@ use cratesfyi::{BuildQueue, Config, DocBuilder, DocBuilderOptions, RustwideBuild use failure::Error; use once_cell::sync::OnceCell; use structopt::StructOpt; +use strum::VariantNames; pub fn main() -> Result<(), Error> { let _ = dotenv::dotenv(); @@ -40,6 +41,13 @@ fn logger_init() { rustwide::logging::init_with(builder.build()); } +#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::EnumString, strum::EnumVariantNames)] +#[strum(serialize_all = "snake_case")] +enum Toggle { + Enabled, + Disabled, +} + #[derive(Debug, Clone, PartialEq, Eq, StructOpt)] #[structopt( name = "cratesfyi", @@ -65,6 +73,14 @@ enum CommandLine { /// Deprecated. Run the server in the foreground instead of detaching a child #[structopt(name = "FOREGROUND", short = "f", long = "foreground")] foreground: bool, + + /// Enable or disable the registry watcher to automatically enqueue newly published crates + #[structopt( + long = "registry-watcher", + default_value = "enabled", + possible_values(Toggle::VARIANTS) + )] + registry_watcher: Toggle, }, /// Database operations @@ -98,12 +114,20 @@ impl CommandLine { ctx.build_queue()?, )?; } - Self::Daemon { foreground } => { + Self::Daemon { + foreground, + registry_watcher, + } => { if foreground { log::warn!("--foreground was passed, but there is no need for it anymore"); } - cratesfyi::utils::start_daemon(ctx.config()?, ctx.pool()?, ctx.build_queue()?)?; + cratesfyi::utils::start_daemon( + ctx.config()?, + ctx.pool()?, + ctx.build_queue()?, + registry_watcher == Toggle::Enabled, + )?; } Self::Database { subcommand } => subcommand.handle_args(ctx)?, Self::Queue { subcommand } => subcommand.handle_args(ctx)?, @@ -430,8 +454,8 @@ impl DatabaseSubcommand { } Self::UpdateGithubFields => { - cratesfyi::utils::github_updater(&*ctx.conn()?) - .expect("Failed to update github fields"); + cratesfyi::utils::GithubUpdater::new(&*ctx.config()?, ctx.pool()?)? + .update_all_crates()?; } Self::AddDirectory { directory, prefix } => { diff --git a/src/config.rs b/src/config.rs index 8f5c60f7f..0054f353e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,6 +12,10 @@ pub struct Config { pub(crate) max_pool_size: u32, pub(crate) min_pool_idle: u32, + // Github authentication + pub(crate) github_username: Option, + pub(crate) github_accesstoken: Option, + // Max size of the files served by the docs.rs frontend pub(crate) max_file_size: usize, pub(crate) max_file_size_html: usize, @@ -26,10 +30,20 @@ impl Config { max_pool_size: env("DOCSRS_MAX_POOL_SIZE", 90)?, min_pool_idle: env("DOCSRS_MIN_POOL_IDLE", 10)?, + github_username: maybe_env("CRATESFYI_GITHUB_USERNAME")?, + github_accesstoken: maybe_env("CRATESFYI_GITHUB_ACCESSTOKEN")?, + max_file_size: env("DOCSRS_MAX_FILE_SIZE", 50 * 1024 * 1024)?, max_file_size_html: env("DOCSRS_MAX_FILE_SIZE_HTML", 5 * 1024 * 1024)?, }) } + + pub fn github_auth(&self) -> Option<(&str, &str)> { + Some(( + self.github_username.as_deref()?, + self.github_accesstoken.as_deref()?, + )) + } } fn env(var: &str, default: T) -> Result @@ -58,7 +72,10 @@ where .parse::() .map(Some) .with_context(|_| format!("failed to parse configuration variable {}", var))?), - Err(VarError::NotPresent) => Ok(None), + Err(VarError::NotPresent) => { + log::debug!("optional configuration variable {} is not set", var); + Ok(None) + } Err(VarError::NotUnicode(_)) => bail!("configuration variable {} is not UTF-8", var), } } diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index d57e06028..50006aefc 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -4,45 +4,18 @@ use crate::{ db::Pool, - docbuilder::RustwideBuilder, - utils::{github_updater, pubsubhubbub, update_release_activity}, + utils::{queue_builder, update_release_activity, GithubUpdater}, BuildQueue, Config, DocBuilder, DocBuilderOptions, }; use chrono::{Timelike, Utc}; use failure::Error; -use log::{debug, error, info, warn}; -use std::panic::{catch_unwind, AssertUnwindSafe}; +use log::{debug, error, info}; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use std::{env, thread}; -pub fn start_daemon( - config: Arc, - db: Pool, - build_queue: Arc, -) -> Result<(), Error> { - const CRATE_VARIABLES: [&str; 3] = [ - "CRATESFYI_PREFIX", - "CRATESFYI_GITHUB_USERNAME", - "CRATESFYI_GITHUB_ACCESSTOKEN", - ]; - - // first check required environment variables - for v in CRATE_VARIABLES.iter() { - if env::var(v).is_err() { - panic!("Environment variable {} not found", v) - } - } - - let dbopts = opts(); - - // check paths once - dbopts.check_paths().unwrap(); - - // check new crates every minute - let cloned_db = db.clone(); - let cloned_build_queue = build_queue.clone(); +fn start_registry_watcher(pool: Pool, build_queue: Arc) -> Result<(), Error> { thread::Builder::new() .name("registry index reader".to_string()) .spawn(move || { @@ -50,8 +23,7 @@ pub fn start_daemon( thread::sleep(Duration::from_secs(30)); loop { let opts = opts(); - let mut doc_builder = - DocBuilder::new(opts, cloned_db.clone(), cloned_build_queue.clone()); + let mut doc_builder = DocBuilder::new(opts, pool.clone(), build_queue.clone()); if doc_builder.is_locked() { debug!("Lock file exists, skipping checking new crates"); @@ -65,141 +37,47 @@ pub fn start_daemon( thread::sleep(Duration::from_secs(60)); } - }) - .unwrap(); - - // build new crates every minute - // REFACTOR: Break this into smaller functions - let cloned_db = db.clone(); - let cloned_build_queue = build_queue.clone(); - thread::Builder::new().name("build queue reader".to_string()).spawn(move || { - let opts = opts(); - let mut doc_builder = DocBuilder::new(opts, cloned_db.clone(), cloned_build_queue.clone()); - - /// Represents the current state of the builder thread. - enum BuilderState { - /// The builder thread has just started, and hasn't built any crates yet. - Fresh, - /// The builder has just seen an empty build queue. - EmptyQueue, - /// The builder has just seen the lock file. - Locked, - /// The builder has just finished building a crate. The enclosed count is the number of - /// crates built since the caches have been refreshed. - QueueInProgress(usize), - } - - let mut builder = RustwideBuilder::init(cloned_db).unwrap(); - - let mut status = BuilderState::Fresh; - - loop { - if !status.is_in_progress() { - thread::sleep(Duration::from_secs(60)); - } - - // check lock file - if doc_builder.is_locked() { - warn!("Lock file exits, skipping building new crates"); - status = BuilderState::Locked; - continue; - } - - if status.count() >= 10 { - // periodically, we need to flush our caches and ping the hubs - debug!("10 builds in a row; flushing caches"); - status = BuilderState::QueueInProgress(0); - - match pubsubhubbub::ping_hubs() { - Err(e) => error!("Failed to ping hub: {}", e), - Ok(n) => debug!("Succesfully pinged {} hubs", n) - } - - if let Err(e) = doc_builder.load_cache() { - error!("Failed to load cache: {}", e); - } - - if let Err(e) = doc_builder.save_cache() { - error!("Failed to save cache: {}", e); - } - } - - // Only build crates if there are any to build - debug!("Checking build queue"); - match cloned_build_queue.pending_count() { - Err(e) => { - error!("Failed to read the number of crates in the queue: {}", e); - continue; - } - - Ok(0) => { - if status.count() > 0 { - // ping the hubs before continuing - match pubsubhubbub::ping_hubs() { - Err(e) => error!("Failed to ping hub: {}", e), - Ok(n) => debug!("Succesfully pinged {} hubs", n) - } - - if let Err(e) = doc_builder.save_cache() { - error!("Failed to save cache: {}", e); - } - } - debug!("Queue is empty, going back to sleep"); - status = BuilderState::EmptyQueue; - continue; - } - - Ok(queue_count) => { - info!("Starting build with {} crates in queue (currently on a {} crate streak)", - queue_count, status.count()); - } - } + })?; - // if we're starting a new batch, reload our caches and sources - if !status.is_in_progress() { - if let Err(e) = doc_builder.load_cache() { - error!("Failed to load cache: {}", e); - continue; - } - } + Ok(()) +} - // Run build_packages_queue under `catch_unwind` to catch panics - // This only panicked twice in the last 6 months but its just a better - // idea to do this. - let res = catch_unwind(AssertUnwindSafe(|| { - match doc_builder.build_next_queue_package(&mut builder) { - Err(e) => error!("Failed to build crate from queue: {}", e), - Ok(crate_built) => if crate_built { - status.increment(); - } - } - })); +pub fn start_daemon( + config: Arc, + db: Pool, + build_queue: Arc, + enable_registry_watcher: bool, +) -> Result<(), Error> { + const CRATE_VARIABLES: &[&str] = &["CRATESFYI_PREFIX"]; - if let Err(e) = res { - error!("GRAVE ERROR Building new crates panicked: {:?}", e); - } + // first check required environment variables + for v in CRATE_VARIABLES.iter() { + if env::var(v).is_err() { + panic!("Environment variable {} not found", v) } + } - impl BuilderState { - fn count(&self) -> usize { - match *self { - BuilderState::QueueInProgress(n) => n, - _ => 0, - } - } + let dbopts = opts(); - fn is_in_progress(&self) -> bool { - match *self { - BuilderState::QueueInProgress(_) => true, - _ => false, - } - } + // check paths once + dbopts.check_paths().unwrap(); - fn increment(&mut self) { - *self = BuilderState::QueueInProgress(self.count() + 1); - } - } - }).unwrap(); + if enable_registry_watcher { + // check new crates every minute + start_registry_watcher(db.clone(), build_queue.clone())?; + } + + // build new crates every minute + let cloned_db = db.clone(); + let cloned_build_queue = build_queue.clone(); + thread::Builder::new() + .name("build queue reader".to_string()) + .spawn(move || { + let doc_builder = + DocBuilder::new(opts(), cloned_db.clone(), cloned_build_queue.clone()); + queue_builder(doc_builder, cloned_db, cloned_build_queue).unwrap(); + }) + .unwrap(); // update release activity everyday at 23:55 let cloned_db = db.clone(); @@ -216,13 +94,13 @@ pub fn start_daemon( }, )?; - // update github stats every 6 hours - let cloned_db = db.clone(); + // update github stats every hour + let github_updater = GithubUpdater::new(&config, db.clone())?; cron( "github stats updater", - Duration::from_secs(60 * 60 * 6), + Duration::from_secs(60 * 60), move || { - github_updater(&*cloned_db.get()?)?; + github_updater.update_all_crates()?; Ok(()) }, )?; diff --git a/src/utils/github_updater.rs b/src/utils/github_updater.rs index 941a315f8..00fea698d 100644 --- a/src/utils/github_updater.rs +++ b/src/utils/github_updater.rs @@ -1,10 +1,18 @@ use crate::error::Result; +use crate::{db::Pool, Config}; use chrono::{DateTime, Utc}; use failure::err_msg; -use log::debug; +use log::{debug, warn}; use postgres::Connection; use regex::Regex; -use std::str::FromStr; +use reqwest::header::{HeaderValue, ACCEPT, AUTHORIZATION, USER_AGENT}; +use serde::Deserialize; + +const APP_USER_AGENT: &str = concat!( + env!("CARGO_PKG_NAME"), + " ", + include_str!(concat!(env!("OUT_DIR"), "/git_version")) +); /// Fields we need use in cratesfyi #[derive(Debug)] @@ -16,115 +24,153 @@ struct GitHubFields { last_commit: DateTime, } -/// Updates github fields in crates table -pub fn github_updater(conn: &Connection) -> Result<()> { - // TODO: This query assumes repository field in Cargo.toml is - // always the same across all versions of a crate - for row in &conn.query( - "SELECT DISTINCT ON (crates.name) - crates.name, - crates.id, - releases.repository_url - FROM crates - INNER JOIN releases ON releases.crate_id = crates.id - WHERE releases.repository_url ~ '^https?://github.com' AND - (crates.github_last_update < NOW() - INTERVAL '1 day' OR - crates.github_last_update IS NULL) - ORDER BY crates.name, releases.release_time DESC", - &[], - )? { - let crate_name: String = row.get(0); - let crate_id: i32 = row.get(1); - let repository_url: String = row.get(2); - - if let Err(err) = get_github_path(&repository_url[..]) - .ok_or_else(|| err_msg("Failed to get github path")) - .and_then(|path| get_github_fields(&path[..])) - .and_then(|fields| { - conn.execute( - "UPDATE crates - SET github_description = $1, - github_stars = $2, github_forks = $3, - github_issues = $4, github_last_commit = $5, - github_last_update = NOW() - WHERE id = $6", - &[ - &fields.description, - &(fields.stars as i32), - &(fields.forks as i32), - &(fields.issues as i32), - &fields.last_commit.naive_utc(), - &crate_id, - ], - ) - .or_else(|e| Err(e.into())) - }) - { - debug!("Failed to update github fields of: {} {}", crate_name, err); +pub struct GithubUpdater { + client: reqwest::blocking::Client, + pool: Pool, +} + +impl GithubUpdater { + pub fn new(config: &Config, pool: Pool) -> Result { + let mut headers = vec![ + (USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)), + (ACCEPT, HeaderValue::from_static("application/json")), + ]; + + if let Some((username, accesstoken)) = config.github_auth() { + let basicauth = format!( + "Basic {}", + base64::encode(format!("{}:{}", username, accesstoken)) + ); + headers.push((AUTHORIZATION, HeaderValue::from_str(&basicauth).unwrap())); + } else { + warn!("No GitHub authorization specified, will be working with very low rate limits"); } - // sleep for rate limits - use std::thread; - use std::time::Duration; - thread::sleep(Duration::from_secs(2)); + let client = reqwest::blocking::Client::builder() + .default_headers(headers.into_iter().collect()) + .build()?; + + Ok(GithubUpdater { client, pool }) } - Ok(()) -} + /// Updates github fields in crates table + pub fn update_all_crates(&self) -> Result<()> { + debug!("Starting update of all crates"); + + if self.is_rate_limited()? { + warn!("Skipping update because of rate limit"); + return Ok(()); + } + + let conn = self.pool.get()?; + // TODO: This query assumes repository field in Cargo.toml is + // always the same across all versions of a crate + let rows = conn.query( + "SELECT DISTINCT ON (crates.name) + crates.name, + crates.id, + releases.repository_url + FROM crates + INNER JOIN releases ON releases.crate_id = crates.id + WHERE releases.repository_url ~ '^https?://github.com' AND + (crates.github_last_update < NOW() - INTERVAL '1 day' OR + crates.github_last_update IS NULL) + ORDER BY crates.name, releases.release_time DESC", + &[], + )?; + + for row in &rows { + let crate_name: String = row.get(0); + let crate_id: i32 = row.get(1); + let repository_url: String = row.get(2); + + debug!("Updating {}", crate_name); + if let Err(err) = self.update_crate(&conn, crate_id, &repository_url) { + if self.is_rate_limited()? { + warn!("Skipping remaining updates because of rate limit"); + return Ok(()); + } + warn!("Failed to update {}: {}", crate_name, err); + } + } + + debug!("Completed all updates"); + Ok(()) + } + + fn is_rate_limited(&self) -> Result { + #[derive(Deserialize)] + struct Response { + resources: Resources, + } + + #[derive(Deserialize)] + struct Resources { + core: Resource, + } + + #[derive(Deserialize)] + struct Resource { + remaining: u64, + } + + let url = "https://api.github.com/rate_limit"; + let response: Response = self.client.get(url).send()?.error_for_status()?.json()?; -fn get_github_fields(path: &str) -> Result { - use serde_json::Value; - - let body = { - use reqwest::{blocking::Client, header::USER_AGENT, StatusCode}; - use std::{env, io::Read}; - - let client = Client::new(); - let mut body = String::new(); - - let mut resp = client - .get(&format!("https://api.github.com/repos/{}", path)[..]) - .header( - USER_AGENT, - format!("cratesfyi/{}", env!("CARGO_PKG_VERSION")), - ) - .basic_auth( - env::var("CRATESFYI_GITHUB_USERNAME") - .ok() - .unwrap_or_default(), - env::var("CRATESFYI_GITHUB_ACCESSTOKEN").ok(), - ) - .send()?; - - if resp.status() != StatusCode::OK { - return Err(err_msg("Failed to get github data")); + Ok(response.resources.core.remaining == 0) + } + + fn update_crate(&self, conn: &Connection, crate_id: i32, repository_url: &str) -> Result<()> { + let path = + get_github_path(repository_url).ok_or_else(|| err_msg("Failed to get github path"))?; + let fields = self.get_github_fields(&path)?; + + conn.execute( + "UPDATE crates + SET github_description = $1, + github_stars = $2, github_forks = $3, + github_issues = $4, github_last_commit = $5, + github_last_update = NOW() + WHERE id = $6", + &[ + &fields.description, + &(fields.stars as i32), + &(fields.forks as i32), + &(fields.issues as i32), + &fields.last_commit.naive_utc(), + &crate_id, + ], + )?; + + Ok(()) + } + + fn get_github_fields(&self, path: &str) -> Result { + #[derive(Deserialize)] + struct Response { + #[serde(default)] + description: Option, + #[serde(default)] + stargazers_count: i64, + #[serde(default)] + forks_count: i64, + #[serde(default)] + open_issues: i64, + #[serde(default = "Utc::now")] + pushed_at: DateTime, } - resp.read_to_string(&mut body)?; - body - }; - - let json = Value::from_str(&body[..])?; - let obj = json.as_object().unwrap(); - - Ok(GitHubFields { - description: obj - .get("description") - .and_then(|d| d.as_str()) - .unwrap_or("") - .to_string(), - stars: obj - .get("stargazers_count") - .and_then(|d| d.as_i64()) - .unwrap_or(0), - forks: obj.get("forks_count").and_then(|d| d.as_i64()).unwrap_or(0), - issues: obj.get("open_issues").and_then(|d| d.as_i64()).unwrap_or(0), - last_commit: DateTime::parse_from_rfc3339( - obj.get("pushed_at").and_then(|d| d.as_str()).unwrap_or(""), - ) - .map(|datetime| datetime.with_timezone(&Utc)) - .unwrap_or_else(|_| Utc::now()), - }) + let url = format!("https://api.github.com/repos/{}", path); + let response: Response = self.client.get(&url).send()?.error_for_status()?.json()?; + + Ok(GitHubFields { + description: response.description.unwrap_or_default(), + stars: response.stargazers_count, + forks: response.forks_count, + issues: response.open_issues, + last_commit: response.pushed_at, + }) + } } fn get_github_path(url: &str) -> Option { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 453445a68..101b3930d 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -3,9 +3,10 @@ pub(crate) use self::cargo_metadata::{CargoMetadata, Package as MetadataPackage}; pub(crate) use self::copy::copy_doc_dir; pub use self::daemon::start_daemon; -pub use self::github_updater::github_updater; +pub use self::github_updater::GithubUpdater; pub use self::html::extract_head_and_body; pub use self::queue::{get_crate_priority, remove_crate_priority, set_crate_priority}; +pub use self::queue_builder::queue_builder; pub use self::release_activity_updater::update_release_activity; pub(crate) use self::rustc_version::parse_rustc_version; @@ -19,6 +20,7 @@ mod github_updater; mod html; mod pubsubhubbub; mod queue; +mod queue_builder; mod release_activity_updater; mod rustc_version; pub(crate) mod sized_buffer; diff --git a/src/utils/queue_builder.rs b/src/utils/queue_builder.rs new file mode 100644 index 000000000..6e9b66e9c --- /dev/null +++ b/src/utils/queue_builder.rs @@ -0,0 +1,143 @@ +use crate::{db::Pool, docbuilder::RustwideBuilder, utils::pubsubhubbub, BuildQueue, DocBuilder}; +use failure::Error; +use log::{debug, error, info, warn}; +use std::panic::{catch_unwind, AssertUnwindSafe}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +// TODO: change to `fn() -> Result` when never _finally_ stabilizes +pub fn queue_builder( + mut doc_builder: DocBuilder, + db: Pool, + build_queue: Arc, +) -> Result<(), Error> { + /// Represents the current state of the builder thread. + enum BuilderState { + /// The builder thread has just started, and hasn't built any crates yet. + Fresh, + /// The builder has just seen an empty build queue. + EmptyQueue, + /// The builder has just seen the lock file. + Locked, + /// The builder has just finished building a crate. The enclosed count is the number of + /// crates built since the caches have been refreshed. + QueueInProgress(usize), + } + + let mut builder = RustwideBuilder::init(db)?; + + let mut status = BuilderState::Fresh; + + loop { + if !status.is_in_progress() { + thread::sleep(Duration::from_secs(60)); + } + + // check lock file + if doc_builder.is_locked() { + warn!("Lock file exits, skipping building new crates"); + status = BuilderState::Locked; + continue; + } + + if status.count() >= 10 { + // periodically, we need to flush our caches and ping the hubs + debug!("10 builds in a row; flushing caches"); + status = BuilderState::QueueInProgress(0); + + match pubsubhubbub::ping_hubs() { + Err(e) => error!("Failed to ping hub: {}", e), + Ok(n) => debug!("Succesfully pinged {} hubs", n), + } + + if let Err(e) = doc_builder.load_cache() { + error!("Failed to load cache: {}", e); + } + + if let Err(e) = doc_builder.save_cache() { + error!("Failed to save cache: {}", e); + } + } + + // Only build crates if there are any to build + debug!("Checking build queue"); + match build_queue.pending_count() { + Err(e) => { + error!("Failed to read the number of crates in the queue: {}", e); + continue; + } + + Ok(0) => { + if status.count() > 0 { + // ping the hubs before continuing + match pubsubhubbub::ping_hubs() { + Err(e) => error!("Failed to ping hub: {}", e), + Ok(n) => debug!("Succesfully pinged {} hubs", n), + } + + if let Err(e) = doc_builder.save_cache() { + error!("Failed to save cache: {}", e); + } + } + debug!("Queue is empty, going back to sleep"); + status = BuilderState::EmptyQueue; + continue; + } + + Ok(queue_count) => { + info!( + "Starting build with {} crates in queue (currently on a {} crate streak)", + queue_count, + status.count() + ); + } + } + + // if we're starting a new batch, reload our caches and sources + if !status.is_in_progress() { + if let Err(e) = doc_builder.load_cache() { + error!("Failed to load cache: {}", e); + continue; + } + } + + // Run build_packages_queue under `catch_unwind` to catch panics + // This only panicked twice in the last 6 months but its just a better + // idea to do this. + let res = catch_unwind(AssertUnwindSafe(|| { + match doc_builder.build_next_queue_package(&mut builder) { + Err(e) => error!("Failed to build crate from queue: {}", e), + Ok(crate_built) => { + if crate_built { + status.increment(); + } + } + } + })); + + if let Err(e) = res { + error!("GRAVE ERROR Building new crates panicked: {:?}", e); + } + } + + impl BuilderState { + fn count(&self) -> usize { + match *self { + BuilderState::QueueInProgress(n) => n, + _ => 0, + } + } + + fn is_in_progress(&self) -> bool { + match *self { + BuilderState::QueueInProgress(_) => true, + _ => false, + } + } + + fn increment(&mut self) { + *self = BuilderState::QueueInProgress(self.count() + 1); + } + } +}