-
Notifications
You must be signed in to change notification settings - Fork 0
Stoner stochastic graphing overhaul #37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e3229a5
1b75e1d
a330901
87ee15c
1e6279b
4bbc072
e71cf0d
51fe628
1ebd446
5f1face
ac7cbb7
61dde98
76ab53c
09896a5
95a6da8
b6f3f59
117e192
86f5997
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,3 +7,5 @@ codecov.yml | |
| appveyor.yml | ||
| .travis.yml | ||
| ^\.github$ | ||
| ^\.positai$ | ||
| ^\.claude$ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,3 +4,4 @@ inst/doc | |
| *.bak | ||
| .Rhistory | ||
|
|
||
| .positai | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -150,10 +150,19 @@ stone_stochastic_standardise <- function( | |
| } | ||
| } | ||
|
|
||
| # Note that for MenA, we want to keep the _cwyx outcomes. | ||
|
|
||
| if (missing_run_id_fix) { | ||
| if ((!"run_id" %in% names(d)) && (length(index) == 200)) d$run_id <- j | ||
| } | ||
|
|
||
| # Remove columns "X" and "X.1" that have crept in with some of the | ||
| # inputs saved with row.names | ||
|
|
||
| d[["X"]] <- NULL | ||
| d[["X.1"]] <- NULL | ||
|
|
||
|
|
||
|
|
||
| # Round to integer, as per guidance. (Not using as.integer, as that | ||
| # has limits on how large numbers can be, so we are just truncating | ||
|
|
@@ -257,3 +266,110 @@ stone_stochastic_central <- function(base, touchstone, disease, group, | |
| outfile <- sprintf("%s_%s_central.pq", group, scenario) | ||
| arrow::write_parquet(central, file.path(path, outfile)) | ||
| } | ||
|
|
||
|
|
||
|
|
||
| ##' Create a `meta.csv` file in the root of the standardised | ||
| ##' stochastics. The columns contain scalars of `touchstone`, | ||
| ##' `disease`, `group`, `scenario` - and for each row, a | ||
| ##' semi-colon-separated lists for `countries` and `outcomes`. | ||
| ##' This is useful for making the stochastic explorer faster | ||
| ##' on startup (otherwise it has to sample all of the files | ||
| ##' each time you run it) - and also it is a good general | ||
| ##' record of all the stochastic data we have. | ||
| ##' | ||
| ##' This does mean that we should re-create the meta data | ||
| ##' each time we make changes to the standardised stochastic | ||
| ##' data though. | ||
| ##' | ||
| ##' @export | ||
| ##' @title Produce `meta.csv` summary of the structure and | ||
| ##' content of a standardised stochastic data folder. | ||
| ##' @importFrom data.table rbindlist | ||
| ##' @importFrom utils write.csv | ||
| ##' @param path The root folder of the stochastic data. | ||
| ##' @returns Nothing - called for side-effect of writing meta.csv. | ||
|
|
||
| stone_stochastic_make_meta <- function(path) { | ||
|
|
||
| # Only called from within stone_stochastic_make_meta. | ||
| # Here are looking at all the files within a group's | ||
| # folder, to calculate the per-scenario lists of | ||
| # outcomes and scenarios; each scenario produces | ||
| # one line of meta-data for the final csv. | ||
|
|
||
| explore_files <- function(touchstone, folder, disease, group) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we get docs on the expectations for these args? How are these injected into this scope? Is the expectation that they're present in the global scope? It might be worth adding defaults to the fn signature - perhaps drawing from package constants? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And perhaps some input checking so users know how and why their fn calls fail would be great.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'll do some more commenting to clarify |
||
| files <- list.files(file.path(path, touchstone, folder)) | ||
|
|
||
| # outcomes are assumed to be the same in all scenarios for | ||
| # a given touchstone/modelling group/disease. Pick the | ||
| # first file we find, read the header, and exclude the | ||
| # mandatory columns, to leave the outcomes. | ||
|
|
||
| first <- file.path(path, touchstone, folder, files[1]) | ||
| ds <- arrow::open_dataset(first) | ||
| outcomes <- ds$schema$names | ||
| outcomes <- outcomes[!outcomes %in% c("run_id", "disease", "year", "age", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see a candidate for a package constant... |
||
| "country", "cohort_size")] | ||
| outcomes <- sort(unique(tolower(outcomes))) | ||
|
|
||
| # Filenames are in the format Group_Scenario_Country.pq - | ||
| # split by the underscore to get scenarios | ||
|
|
||
| files <- strsplit(list.files(file.path(path, touchstone, folder)), "_") | ||
| scenarios <- unique(unlist(lapply(files, `[[`, 2))) | ||
|
|
||
| # Countries can (rarely) be different for different scenarios. | ||
| # For each scenario, find all the files in that scenario, then | ||
| # select the "Country.pq" and remove the ".pq". Then build | ||
| # a single-row data-frame, to be bound together at the end. | ||
|
|
||
| data.table::rbindlist(lapply(scenarios, function(scenario) { | ||
| matches <- files[unlist(lapply(files, `[[`, 2)) == scenario] | ||
| countries <- unique(unlist(lapply(matches, `[[`, 3))) | ||
| countries <- gsub(".pq", "", countries) | ||
|
Comment on lines
+320
to
+330
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we get a comment or a section in the fn docs with what's intended here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some comments throughout that section... |
||
| data.frame( | ||
| touchstone = touchstone, | ||
| disease = disease, | ||
| group = group, | ||
| scenario = scenario, | ||
| countries = paste0(countries, collapse = ";"), | ||
| outcomes = paste0(outcomes, collapse = ";") | ||
| ) | ||
| })) | ||
| } | ||
|
|
||
| # For a given touchstone (ie, a folder name inside the | ||
| # stochastic file share), look in that folder and find | ||
| # all the internal folder names. They will be in the form | ||
| # Disease_Group, so split by "_", explore the files in | ||
| # each folder, and bind all the results together. | ||
|
|
||
| touchstone_meta <- function(touchstone) { | ||
| entries <- list.files(file.path(path, touchstone)) | ||
| data.table::rbindlist(lapply(entries, function(x) { | ||
| xs <- strsplit(x, "_")[[1]] | ||
| explore_files(touchstone = touchstone, | ||
| folder = x, | ||
| disease = xs[1], | ||
| group = xs[2]) | ||
| })) | ||
| } | ||
|
|
||
| # Start here. Check the path exists and is writeable. If ok, it contains one | ||
| # folder per touchstone; the resulting meta.csv is the rbind of | ||
| # all the rows we get by calling touchstone_meta for each touchstone. | ||
|
|
||
| if (!dir.exists(path)) { | ||
| cli::cli_abort("Path {path} not found") | ||
| } | ||
|
|
||
| if (!checkmate::test_path_for_output(file.path(path, "not_exist.csv"))) { | ||
| cli::cli_abort("Path {path} seems non-writable") | ||
| } | ||
|
|
||
| touchstones <- basename(list.dirs(paste0(path, "/"), recursive = FALSE)) | ||
| res <- data.table::rbindlist(lapply(touchstones, touchstone_meta)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we apply this pattern -
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, why not. It's tidier... |
||
| write.csv(res, file.path(path, "meta.csv"), | ||
| row.names = FALSE, quote = FALSE) | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.