# # install the orchaRd package
# install.packages("devtools")
# install.packages("remotes")
# devtools::install_github("itchyshin/orchard_plot", subdir = "orchaRd", force = TRUE, build_vignettes = TRUE)
# devtools::install_github("MathiasHarrer/dmetar")
# devtools::install_github("daniel1noble/metaAidR")
# these packages need be obtained from github not from cran
#remotes::install_github("rvlenth/emmeans", dependencies = TRUE, build_opts = "")
# remotes::install_github("wviechtb/metafor")
#devtools::install_github("MathiasHarrer/dmetar")
# @TO-DO - I do not think it is working yet
##remotes::install_github("rlesur/klippy")
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'openxlsx' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\localadmin\AppData\Local\Temp\Rtmpq2Enhb\downloaded_packages
##
## openxlsx installed
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## also installing the dependencies 'XML', 'rentrez', 'rncl'
## package 'XML' successfully unpacked and MD5 sums checked
## package 'rentrez' successfully unpacked and MD5 sums checked
## package 'rncl' successfully unpacked and MD5 sums checked
## package 'rotl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\localadmin\AppData\Local\Temp\Rtmpq2Enhb\downloaded_packages
##
## rotl installed
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'here' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\localadmin\AppData\Local\Temp\Rtmpq2Enhb\downloaded_packages
##
## here installed
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## also installing the dependencies 'xts', 'TTR', 'modeltools', 'DEoptimR', 'quantmod', 'mclust', 'flexmix', 'prabclus', 'diptest', 'robustbase', 'kernlab', 'anytime', 'fracdiff', 'lmtest', 'timeDate', 'tseries', 'urca', 'zoo', 'RcppArmadillo', 'moments', 'fpc', 'tsibble', 'forecast', 'viridis', 'nullabor'
## package 'xts' successfully unpacked and MD5 sums checked
## package 'TTR' successfully unpacked and MD5 sums checked
## package 'modeltools' successfully unpacked and MD5 sums checked
## package 'DEoptimR' successfully unpacked and MD5 sums checked
## package 'quantmod' successfully unpacked and MD5 sums checked
## package 'mclust' successfully unpacked and MD5 sums checked
## package 'flexmix' successfully unpacked and MD5 sums checked
## package 'prabclus' successfully unpacked and MD5 sums checked
## package 'diptest' successfully unpacked and MD5 sums checked
## package 'robustbase' successfully unpacked and MD5 sums checked
## package 'kernlab' successfully unpacked and MD5 sums checked
## package 'anytime' successfully unpacked and MD5 sums checked
## package 'fracdiff' successfully unpacked and MD5 sums checked
## package 'lmtest' successfully unpacked and MD5 sums checked
## package 'timeDate' successfully unpacked and MD5 sums checked
## package 'tseries' successfully unpacked and MD5 sums checked
## package 'urca' successfully unpacked and MD5 sums checked
## package 'zoo' successfully unpacked and MD5 sums checked
## package 'RcppArmadillo' successfully unpacked and MD5 sums checked
## package 'moments' successfully unpacked and MD5 sums checked
## package 'fpc' successfully unpacked and MD5 sums checked
## package 'tsibble' successfully unpacked and MD5 sums checked
## package 'forecast' successfully unpacked and MD5 sums checked
## package 'viridis' successfully unpacked and MD5 sums checked
## package 'nullabor' successfully unpacked and MD5 sums checked
## package 'metaviz' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\localadmin\AppData\Local\Temp\Rtmpq2Enhb\downloaded_packages
##
## metaviz installed
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## also installing the dependency 'CompQuadForm'
## package 'CompQuadForm' successfully unpacked and MD5 sums checked
## package 'meta' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\localadmin\AppData\Local\Temp\Rtmpq2Enhb\downloaded_packages
##
## meta installed
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## also installing the dependencies 'gridGraphics', 'yulab.utils'
## package 'gridGraphics' successfully unpacked and MD5 sums checked
## package 'yulab.utils' successfully unpacked and MD5 sums checked
## package 'ggplotify' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\localadmin\AppData\Local\Temp\Rtmpq2Enhb\downloaded_packages
##
## ggplotify installed
## Installing package into 'C:/Users/localadmin/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
The main objective of the literature survey was to capture the types of publication bias methods recently used in the field of ecology and evolution. The survey reported here was conducted alongside a larger survey on reporting standards from the PRISMA-EcoEvo project (Preferred Reporting Items for Systematic reviews and Meta-Analyses in Ecology and Evolutionary Biology; full details and materials provided in O’Dea et al. (2021). The survey was based on 102 meta-analysis papers published between 1 January 2010 and 25 March 2019. We aimed for the sample of 102 meta-analyses to be representative of recently published meta-analyses in ecology and evolutionary biology journals.
To select the representative sample of meta-analysis publications, we first searched the Scopus database for papers with (“meta-analy*” OR “metaanaly*” OR “meta-regression”) in the title, abstract, or keywords (where the asterisk allows for any ending to the word, such as “meta-analyses” or “meta-analysis”), restricted the date to papers published after 2009, and restricted the ISSN (International Standard Serial Number) to journals classified as ‘Ecology’ and/or ‘Evolutionary Biology’ by the 2017 rankings of the ISI InCites Journal Citation Reports. On 25 March 2019 this search returned 1,668 papers from 134 journals.
Second, the returned papers were categorised as being from journals listed as ‘Ecology’, ‘Evolution’, or ‘Both’, and we reduced the number of journals and papers. To reduce the number of journals we arranged journals in descending order of frequency (i.e. to indicate which journals published the most meta-analyses). After removing journals in more applied sub-fields (such as ecology economics), we retained the top 10 journals classified as ecology, the top 10 journals classified as evolutionary biology, and the top 11 journals classified as both ecology and evolutionary biology. The list of 31 retained journals is shown in Table S1. Next, we selected 297 studies to be screened for inclusion in the sample, by using the ‘sample’ function in R (v. 3.5.1; R Core Team, 2018) to randomly select up to 17 studies from the journals classified as ‘Evolutionary Biology’ (57 studies total) or ‘Both’ (150 studies total), and up to 9 studies from journals classified as ‘Ecology’ (90 studies total).
Journals screened in our search for a representative sample of meta-analyses published in ecology and evolutionary biology. ISI Classification is based on the 2017 ISI InCites Journal Citation Reports. The number of papers returned from the Scopus database search is described by ‘N hits’, while the number of studies selected for screening is described by ‘N screened’.
# getting the data and formatting some variables (turning character vectors to factors)
read_excel(here("data/TableS1.xlsx"), na = "NA") %>%
#mutate_if(is.character, as.factor) %>%
kableExtra::kbl() %>%
kableExtra::kable_paper() %>%
kableExtra::kable_styling("striped", position = "left")
ISI Classification | Full Journal Title | N hits | N screened |
---|---|---|---|
Both | Proceedings of the Royal Society of London B: Biological Sciences | 47 | 17 |
Both | Ecology and Evolution | 43 | 17 |
Both | Molecular Ecology | 35 | 17 |
Both | Journal of Evolutionary Biology | 32 | 17 |
Both | Evolution | 25 | 17 |
Both | American Naturalist | 20 | 17 |
Both | Biology Letters | 17 | 17 |
Both | Evolutionary Ecology | 14 | 14 |
Both | Nature Ecology & Evolution | 7 | 7 |
Both | Heredity | 5 | 5 |
Both | Molecular Ecology Resources | 5 | 5 |
Ecology | Global Change Biology | 135 | 9 |
Ecology | Ecology Letters | 98 | 9 |
Ecology | Oikos | 65 | 9 |
Ecology | Global Ecology and Biogeography | 51 | 9 |
Ecology | Biological Conservation | 49 | 9 |
Ecology | Oecologia | 47 | 9 |
Ecology | Conservation Biology | 43 | 9 |
Ecology | Journal of Applied Ecology | 42 | 9 |
Ecology | Journal of Ecology | 35 | 9 |
Ecology | Marine Ecology Progress Series | 35 | 9 |
Evolution | BMC Evolutionary Biology | 12 | 12 |
Evolution | Evolutionary Applications | 10 | 10 |
Evolution | Biological Journal of the Linnean Society | 9 | 9 |
Evolution | Molecular Biology and Evolution | 7 | 7 |
Evolution | Genome Biology and Evolution | 4 | 4 |
Evolution | Molecular Phylogenetics and Evolution | 4 | 4 |
Evolution | Evolutionary Bioinformatics | 3 | 3 |
Evolution | Evolutionary Biology | 3 | 3 |
Evolution | Journal of Heredity | 3 | 3 |
Evolution | Systematic Biology | 2 | 2 |
The sample of 102 meta-analysis papers met the following four inclusion criteria: (1) the study addressed a question in the fields of ecology and evolutionary biology; (2) claimed to present results from a meta-analysis; (3) performed a search for, and collected, data from the primary literature; (4) used a statistical model to analyse effect sizes that were collected from multiple studies. Paper screening was conducted in two stages. First, two authors (RO and ML) conducted parallel abstract screening. Conflicting decisions were discussed and resolved. Second, 64% of screened abstracts underwent parallel full-text screening by RO and ML, in consultation with SN.
Papers were independently assessed by seven authors (RO, DN, JK, MJ, ML, RO, SN, and TP) as part of a larger survey. The one survey question pertaining to this publication bias project was: “Which publication bias tests are reported in the paper? (Select all that apply)”. There were 11 possible choices for respondents to select: (A) Funnel plots (including contour-enhanced funnel plots); (B) Normal quantile (QQ) plots (Wang & Bushman); (C) Correlation-based tests (e.g. Begg & Manzumdar rank correlation); (D) Regression-based tests (e.g. Egger regression and its variants); (E) File drawer numbers or fail-safe N (Rosenthal, Orwin or Rosenberg method); (F) Trim-and-fill tests; (G) P-curve, P-uniform or its variants; (H) Selection (method) models (e.g. Copas, Hedges or Iyengar & Greenhouse model); (I) Time-lag bias tests (e.g., regression or correlation on the relationship between effect sizes and time or cumulative meta-analysis); (J) None reported and (K) ‘Other’ methods. According to Sutton (2009) (see also Vevea et al., 2019), Methods A-D are tests detecting publication bias, whereas Methods E-F are assessing the impact of publication bias.
Among the 102 assessed papers, 17.8% did not report any tests of publication bias. Most meta-analysis papers reported one or more tests of publication bias (17.8% of assessed papers did not include any assessment of publication bias). These results suggest tests of publication bias have become more common in recent years in ecology and evolutionary biology, as over half of older meta-analyses assessed by Nakagawa and Santos (2012) and Koricheva and Gurevitch (2014) did not report any tests of publication bias (although our results are not directly comparable, due to the different survey methods). Still, inferential tests of publication bias remain uncommon. By far the most popular test of publication bias were funnel plots (32.4%; Table S2), with all remaining methods represented by fewer than 15% of papers. All methods except ‘selection models’ were present in at least one paper (with ‘other’ being selected for a weighted histogram used by Loydi et al. (2013; Table S2). The absence of selection model methods could be because these methods are comparatively technically challenging, or because ecologists and evolutionary biologists are not yet aware of the benefits of these methods.
Frequency with which publication bias tests were reported in the 102 meta-analysis publications, ranked in order of decreasing popularity. No tests were reported for 17.80% of papers.
# getting the data and formatting some variables (turning character vectors to factors)
read_excel(here("data/TableS2.xlsx"), na = "NA") %>%
#mutate_if(is.character, as.factor) %>%
kableExtra::kbl() %>%
kableExtra::kable_paper() %>%
kableExtra::kable_styling("striped", position = "left")
Publication Bias Test | Number of Papers Reporting Test | Percentage of Papers Reporting Test |
---|---|---|
|
69 | 0.324 |
|
30 | 0.141 |
|
25 | 0.117 |
|
20 | 0.094 |
|
16 | 0.075 |
|
10 | 0.047 |
|
3 | 0.014 |
|
1 | 0.005 |
|
1 | 0.005 |
|
0 | 0.000 |
As in the main text, Equation 8 is as follows \[ z_{i} = \beta_{0} + \beta_{1}prec_i + e_i,\\ \] where \(e_{i} \sim \mathcal{N}(0, \sigma_e^2)\). We can rewrite this as:
\[ \frac{y_{i}}{se_i} = \beta_{0} + \beta_{1}\left(\frac{1}{se_{i}}\right) + e_i,\\ \] If we multiple both sides by \(se_i\), we have:
\[ y_{i} = \beta_{0}se_i + \beta_{1} + e_{i}se_{i}, \]
which is basically the same, if \(\beta_0\) and \(\beta_1\) are swapped, as:
\[ y_{i} = \beta_{0} + \beta_{1}se_i + \epsilon_i, \] where \(\epsilon_i \sim \mathcal{N}(0, v_i\phi)\) as in Equation 9. We can show this using simulated data as well (we use the data for Figure 3 and 4). Below the first result is from Equation 8 and the second Equation 9:
# creating data - the same data as Figures 3 + 4
set.seed(77777)
# setting parameters
n.effect <- 100
sigma2.s <- 0.05
beta1 <- 0.2
# using negative binomial we get a good spread of sample sizes
n.sample <- rnbinom(n.effect, mu = 30, size = 0.7) + 4
# variance for Zr
vi <- 1/(n.sample - 3)
# moderator x 1
xi <- rnorm(n.effect)
# there is an overall effect of r = 0.2 (Zr = 0.203, which we get from the `atanh` function)
Zr <- atanh(0.2) + beta1*xi + rnorm(n.effect, 0, sqrt(sigma2.s)) + rnorm(n.effect, 0, sqrt(vi))
#qplot(Zr, 1/sqrt(vi))
# data frame
dat <- data.frame(yi = Zr, vi = vi, sei = sqrt(vi), xi = xi, ni = n.sample, prec = 1 / sqrt(vi), wi = 1/vi, zi = Zr/sqrt(vi))
rows <- 1:nrow(dat)
expected <- which(1/dat$sei < 5 & dat$yi < 0.25)
unexpected <- which(1/dat$sei > 4.7 & dat$yi > 0.25)
col_point <- ifelse(rows %in% expected, "red", ifelse(rows %in% unexpected, "blue", "black"))
dat$col_point <- col_point
# data with "publication bias"
dat2 <- dat[dat$col_point != "red", ]
# demonstration that they are the same
eq_8 <- lm(zi ~ prec, data = dat2)
eq_9 <- lm(yi ~ sei, weight = wi, data = dat2)
summary(eq_8)
##
## Call:
## lm(formula = zi ~ prec, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.389 -1.011 -0.027 0.969 5.082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6657 0.4465 3.73 0.00038 ***
## prec -0.0382 0.0741 -0.52 0.60743
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.73 on 73 degrees of freedom
## Multiple R-squared: 0.00363, Adjusted R-squared: -0.01
## F-statistic: 0.266 on 1 and 73 DF, p-value: 0.607
summary(eq_9)
##
## Call:
## lm(formula = yi ~ sei, data = dat2, weights = wi)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -5.389 -1.011 -0.027 0.969 5.082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0382 0.0741 -0.52 0.60743
## sei 1.6657 0.4465 3.73 0.00038 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.73 on 73 degrees of freedom
## Multiple R-squared: 0.16, Adjusted R-squared: 0.149
## F-statistic: 13.9 on 1 and 73 DF, p-value: 0.000375
As you can see above, Equation 8’s intercept is identical to the slope of Equation 9, while Equation 8’s slope is identical to Equation 9’s intercept. Note that corresponding standard errors (SE) are also the same.
We use the function fsn()
in the R package,
metafor
, as in this web page: https://wviechtb.github.io/metafor/reference/fsn.html.
# example data using risk ratio (not response ratio)
dat <- escalc(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, data=dat.bcg)
fsn(yi, vi, data=dat, type="Rosenthal")
##
## Fail-safe N Calculation Using the Rosenthal Approach
##
## Observed Significance Level: <.0001
## Target Significance Level: 0.05
##
## Fail-safe N: 598
# setting effect size targe as -0.1
fsn(yi, vi, data=dat, type="Orwin", target = 0.1)
##
## Fail-safe N Calculation Using the Orwin Approach
##
## Average Effect Size: -0.7407
## Target Effect Size: -0.1000
##
## Fail-safe N: 84
fsn(yi, vi, data=dat, type="Rosenberg")
##
## Fail-safe N Calculation Using the Rosenberg Approach
##
## Average Effect Size: -0.4303
## Observed Significance Level: <.0001
## Target Significance Level: 0.05
##
## Fail-safe N: 370
In this section, we provide worked examples to show how to test for publication bias in meta-analytic datasets with high heterogeneity and several layers of non-independence, including phylogenetic non-independence (a common feature of meta-analytic datasets in ecology and evolution; Senior et al. 2016, Noble et al. 2017). We used two openly available meta-analytic datasets to show how to implement our suggested multilevel meta-regression method (see main text) on correlation effect sizes (Zr: section S4.1) and effect sizes based on mean differences between two groups (SMD, lnRR: section S4.2).
# importing dataset with correlation coefficients
dataset.r.original <- read.xlsx(here("data","ft044.xlsx"), colNames = TRUE, sheet = 1)
# transforming r to Zr for the analyses, and estimating VZr for the weighting
dataset.r.original$Sample.size <- as.numeric(dataset.r.original$Sample.size)
# subset dataset to those with 4 or more individuals (this removes two rows, one without sample size and one with sample size = 3). Effect sizes based on 3 individuals won't be able to be analyzed because VZr is then infinite (as the denominator when calculating VZr is zero).
dataset.r.original <- dataset.r.original[dataset.r.original$Sample.size>3 & !(is.na(dataset.r.original$Sample.size)),]
dataset.r.original$yi <- atanh(dataset.r.original$Correlation)
dataset.r.original$vi <- 1/(as.numeric(dataset.r.original$Sample.size)-3)
# renaming some variables to make it easier
dataset.r.original$study <- dataset.r.original$Reference
dataset.r.original <- dataset.r.original[ , !names(dataset.r.original) %in% c("Reference")]
For our first worked example, we use the dataset provided by Garamszegi et al. (2012), who found support for correlations across different behavioural traits within animal populations. Here, we re-analyse the data from Garamszegi et al. (2012) by first conducting a phylogenetic multilevel intercept-only meta-analytic model and then testing for evidence of publication bias following the approaches outlined in the main text.
Since multiple animal species (n = 65) are included in this dataset,
we need to consider phylogenetic non-independence in our statistical
models (Chamberlain et al. 2012, Cinar et al. 2020). For that, we first
search for the species in the Open Tree Taxonomy (Rees and Cranston
2017) to confirm and correct the species names, then build a
phylogenetic tree for all the species included in the dataset (Figure
S1) by retrieving their phylogenetic relationships from the Open Tree of
Life (Hinchliff et al. 2015) using the R package rotl
(Michonneau et al. 2016). Next, we estimate branch lengths following
Grafen (1989) as implemented in the function ‘compute.brlen()’ of the R
package ape
(Paradis and Schliep 2019). Finally, we
construct a phylogenetic relatedness correlation matrix that will be
fitted as part of the random effect structure of our models (see
below).
# fixing a typo in the original list of species names
dataset.r.original$Species <- ifelse(dataset.r.original$Species=="Acrochephalus schoenobaenus",
"Acrocephalus schoenobaenus",
dataset.r.original$Species)
# updating a name that was not being well recognized by the Open Tree Taxonomy
dataset.r.original$Species <- ifelse(dataset.r.original$Species=="Eurotestudo boettgeri",
"Testudo boettgeri",
dataset.r.original$Species)
# fixing another typo in the original list of species names
dataset.r.original$Species <- ifelse(dataset.r.original$Species=="Taenopygia guttata",
"Taeniopygia guttata",
dataset.r.original$Species)
# # obtaining dataframe listing the Open Tree identifiers potentially matching our list of species (or you can load the data below)
#taxa <- tnrs_match_names(names = unique(dataset.r.original$Species))
# # saving the taxonomic data created on the 18th of February 2021 to speed the process in the future and make the code fully reproducible if taxonomic changes are implemented in the future
# save(taxa,file = "taxa_Open_Tree_of_Life_20210218.RData")
# loading the taxonomic data (taxa) created on the 18th of February 2021
load(here("data", "taxa_Open_Tree_of_Life_20210218.RData"))
# # check approximate matches
# taxa[taxa$approximate_match==TRUE,]
#
# # check synonyms matches
# taxa[taxa$is_synonym==TRUE,]
#
# # check number of matches
# taxa[taxa$number_matches>1,]
# # some further checks
# ott_id_tocheck <- taxa[taxa$number_matches != 1,"ott_id"]
# for(i in 1:length(ott_id_tocheck)){
# print(inspect(taxa, ott_id = ott_id_tocheck[i]))
# }
# all phylogenetic data seems in order now
# however, the ott_id for Parma unifasciata cannot be found when retrieving the phylogenetic relationships, so to get around this we are going to use the ott_id of another Parma species, Parma oligolepis (ott_id = 323186). In this case it is fine because we only include two species belonging to the genus Parma, and therefore, the phylogenetic relationship will be the same for our purposes
# tnrs_match_names(names = 'Parma oligolepis')
# taxa[taxa$unique_name=="Parma unifasciata","ott_id"] <- 323186
# # retrieving phylogenetic relationships among taxa in the form of a trimmed sub-tree (you can simply load the tree at the bottom)
# tree <- tol_induced_subtree(ott_ids = taxa[["ott_id"]], label_format = "name")
# # we need to check for the existence of polytomies
# is.binary.tree(tree) # No polytomies, so we can proceed.
# to confirm that our tree covers all the species we wanted it to include, and make sure that the species names in our database match those in the tree, we use the following code
# tree$tip.label <- gsub("_"," ", tree$tip.label)
# intersect(as.character(tree$tip.label), as.character(dataset.r.original$Species))
# setdiff(as.character(dataset.r.original$Species), as.character(tree$tip.label)) #listed in our database but not in the tree
# setdiff(as.character(tree$tip.label),as.character(dataset.r.original$Species)) # listed in the tree but not in our database
# All but Pan and Parma oligolepis are the same species, the "problem" is that synonyms have been used in the tree. We are going to leave all the names as in Open Tree of Life except Pan and Parma oligolepis, which we are going to substitute by Pan troglodytes and Parma unifasciata
# # we start by fixing the following names in the tree
# tree$tip.label[tree$tip.label=="Pan"]<-"Pan troglodytes"
# tree$tip.label[tree$tip.label=="Parma oligolepis"]<-"Parma unifasciata"
# setdiff(as.character(dataset.r.original$Species), as.character(tree$tip.label)) #listed in our database but not in the tree
# setdiff(as.character(tree$tip.label),as.character(dataset.r.original$Species)) # listed in the tree but not in our database
# changing the names in our database to follow those in the tree. We are creating a new Species.updated variable so that it is clear that this list of Species is an updated version compared to the original one
dataset.r.original$Species.updated <- dataset.r.original$Species
dataset.r.original$Species.updated <- ifelse(dataset.r.original$Species.updated=="Carduelis chloris",
"Chloris chloris",
dataset.r.original$Species.updated)
dataset.r.original$Species.updated <- ifelse(dataset.r.original$Species.updated=="Dendroica pensylvaniaca",
"Setophaga pensylvanica",
dataset.r.original$Species.updated)
dataset.r.original$Species.updated <- ifelse(dataset.r.original$Species.updated=="Sylvia melanocephala",
"Curruca melanocephala",
dataset.r.original$Species.updated)
# we are also updating the original Species variable since we have to fit both Species and Species.updated in our models (see below)
dataset.r.original$Species <- dataset.r.original$Species.updated
# setdiff(as.character(dataset.r.original$Species.updated), as.character(tree$tip.label)) #listed in our database but not in the tree
# setdiff(as.character(tree$tip.label),as.character(dataset.r.original$Species.updated)) # listed in the tree but not in our database
# all in order
# we can now save the tree
#save(tree, file = "tree_20211802.Rdata")
dataset.r <- dataset.r.original
# load the saved tree (tree)
load(here("data","tree_20211802.Rdata"))
# compute branch lengths of tree
phylo_branch <- compute.brlen(tree, method = "Grafen", power = 1)
# # check tree is ultrametric
# is.ultrametric(phylo_branch) # TRUE
# matrix to be included in the models
phylo_cor <- vcv(phylo_branch, cor = T)
plot(tree, type = "fan", cex=0.65, label.offset =.05, no.margin = TRUE) #check: https://www.rdocumentation.org/packages/ape/versions/5.3/topics/plot.phylo
Before running any meta-analytic model, it is important to explore the meta-analytic dataset to search for potential outliers that could represent data extraction errors. To do so, we can draw funnel plots showing the standard error and the inverse of the standard error (i.e. precision) as the y-axes (Figure S4.2). From these funnel plots (Figure S4.2), we conclude that no clear outliers seem to be present in this meta-analytic dataset.
par(mfrow = c(1, 2))
funnel(dataset.r$yi, dataset.r$vi, yaxis="sei",
#xlim = c(-3, 3),
xlab = "Effect size (Zr)", digits = 2, las = 1)
funnel(dataset.r$yi, dataset.r$vi, yaxis="seinv",
#xlim = c(-3, 3),
xlab = "Effect size (Zr)", digits = 2, las = 1)
Now that we have created a matrix with the phylogenetic relationships among species and have confirmed that no outliers seem to be present, we can then use the dataset from Garamszegi et al. (2012) to provide a worked example on how to test for publication bias using the multilevel meta-regression method suggested in the main text. First, let’s have a look at the detailed results of a multilevel phylogenetic meta-analytic (intercept-only) model (Table S4.1), including the corresponding funnel plot showing the overall effect or meta-analytic mean (Figure S4.3).
# creating a unit-level random effect to model residual variance in metafor
dataset.r$obsID <- 1:nrow(dataset.r)
# running multilevel intercept-only meta-analytic model
meta.analysis.model.r <- rma.mv(yi, vi,
mods=~1,
random=list(~ 1 | Species.updated, # phylo effect
~ 1 | Species, # non-phylo effect
~ 1 | study,
~ 1 | obsID),
R = list(Species.updated = phylo_cor), #phylogenetic relatedness
method="REML",
test="t", # using t dist rather than z because t is a better, more conservative distribution than z
data=dataset.r)
# print(meta.analysis.model.r, digits=3)
# extracting the mean, 95% confidence intervals and 95% prediction intervals
metaanalytic.mean.model.r <- predict(meta.analysis.model.r, digits=3)
# estimating I2 as measure of heterogeneity
I2.model.r <- i2_ml(meta.analysis.model.r, method = "matrix")
# creating a data.frame with the meta-analytic mean and heterogeneity estimates
table.model.r <- data.frame(n=length(unique(dataset.r$study)),
k=nrow(dataset.r),
mean=round(metaanalytic.mean.model.r[[1]],2),
CI=paste0("[",round(metaanalytic.mean.model.r[[3]],2),",",round(metaanalytic.mean.model.r[[4]],2),"]"),
PI=paste0("[",round(metaanalytic.mean.model.r[[5]],2),",",round(metaanalytic.mean.model.r[[6]],2),"]"),
I2_obsID=round(I2.model.r[["I2_obsID"]],1),
I2_paperID=round(I2.model.r[["I2_study"]],1),
I2_nonphylo=round(I2.model.r[["I2_Species"]],1),
I2_phylo=round(I2.model.r[["I2_Species.updated"]]*100,1),
I2_total=round(I2.model.r[["I2_Total"]],1)
)
# creating a nicely-formatted table using the R package 'gt'
table.model.r.gt <- table.model.r %>%
gt::gt() %>%
cols_label(n=md("**n**"),
k=md("**k**"),
mean=md("**Meta-analytic mean**"),
CI=md("**95% CI**"),
PI=md("**95% PI**"),
I2_obsID=md("***I*<sup>2</sup><sub>residual</sub>\n(%)**"),
I2_paperID=md("***I*<sup>2</sup><sub>study</sub>\n(%)**"),
I2_nonphylo=md("***I*<sup>2</sup><sub>non-phylogeny</sub>\n(%)**"),
I2_phylo=md("***I*<sup>2</sup><sub>phylogeny</sub>\n(%)**"),
I2_total=md("***I*<sup>2</sup><sub>total</sub> \n(%)**"),
) %>%
cols_align(align = "center") %>%
tab_source_note(source_note = md("n = number of studies; k = number of effects; CI = confidence interval; PI = prediction interval; *I*<sup>2</sup> = heterogeneity")) #%>%
# tab_options(table.width=770)
table.model.r.gt
n | k | Meta-analytic mean | 95% CI | 95% PI | I2residual (%) | I2study (%) | I2non-phylogeny (%) | I2phylogeny (%) | I2total (%) |
---|---|---|---|---|---|---|---|---|---|
88 | 104 | 0.29 | [0.17,0.41] | [-0.25,0.83] | 15.9 | 29.5 | 20.9 | 807 | 74.3 |
n = number of studies; k = number of effects; CI = confidence interval; PI = prediction interval; I2 = heterogeneity |
Table S4.1. Results of the phylogenetic multilevel intercept-only meta-analysis testing the relationship between behaviours across species. Estimates are presented as standardized effect sizes using Fisher’s transformation of the correlation coefficient (i.e. Zr).
funnel(meta.analysis.model.r, yaxis="seinv",
xlab = "Effect size (Zr)")
To test for publication bias, we can first fit a phylogenetic multilevel meta-regression to explore whether there is some evidence of small-study effects in our meta-analytic dataset. To do so, we fit a uni-moderator phylogenetic multilevel meta-regression including the effect sizes’ standard errors (SE or sei) as the only moderator (see Equation 21 from the main text). This meta-regression will provide some information about the existence of small-study effects (i.e. asymmetry in the distribution of effect sizes), but the best evidence for small-study effects will come from the all-in publication bias test (multi-moderator meta-regression) described in section S4.1.4.3, since this multi-moderator meta-regression will test whether asymmetry exists after accounting for the heterogeneity explained by all the included moderators (more in the main text).
# creating a variable for the standard error of each effect size (i.e. the square root of the sampling variance, see Figure 1 from the main text)
dataset.r$sei <- sqrt(dataset.r$vi)
# Application of Equation 21 from the main text
publication.bias.model.r.se <- rma.mv(yi, vi,
mod =~1+sei,
random=list(~ 1 | Species.updated, # phylo effect
~ 1 | Species, # non-phylo effect
~ 1 | study,
~ 1 | obsID),
R = list(Species.updated = phylo_cor), #phylogenetic relatedness
method="REML",
test="t", # using t dist rather than z
data=dataset.r)
# print(publication.bias.model.r.se,digits=3)
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.r.se <- estimates.CI(publication.bias.model.r.se)
According to this uni-moderator meta-regression, there is some indication of small-study effects since the slope of the moderator ‘SE’ is close to statistical significance (slope = 0.75, 95% CI = [-0.01,1.51]; R2marginal = 13.4%), showing that effect sizes with larger SE (more uncertain effect sizes) tend to be larger. Nonetheless, as mentioned above, we will confirm the existence of small-study effects after accounting for some of the heterogeneity present in the data using the all-in publication bias test (multi-moderator meta-regression) described in section S4.1.4.3.
To test for time-lag bias (also called decline effects) we can first fit a uni-moderator phylogenetic multilevel meta-regression including the year of publication (mean-centred) as the only moderator (see Equation 23 from the main text). The estimated slope for year of publication will provide some evidence on whether effect sizes have changed linearly over time since the first effect size was published; however, as above, the best evidence for such decline effects will come from the all-in publication bias test (multi-moderator meta-regression) described in section S4.1.4.3, since this multi-moderator meta-regression will test for the existence of decline effects after accounting for the heterogeneity explained by all the included moderators (more in the main text).
# creating a variable with the year of publication
dataset.r$year <- as.numeric(str_extract(dataset.r$study, "(\\d)+"))
# mean-centring year of publication to help with interpretation, particularly in S4.1.4.3.
dataset.r$year.c <- as.vector(scale(dataset.r$year, scale = F))
# Application of Equation 23 from the main manuscript
publication.bias.model.r.timelag <- rma.mv(yi, vi,
mods=~1+year.c,
random=list(~ 1 | Species.updated, # phylo effect
~ 1 | Species, # non-phylo effect
~ 1 | study,
~ 1 | obsID),
R = list(Species.updated = phylo_cor), #phylogenetic relatedness
method="REML",
test="t", # using t dist rather than z
data=dataset.r)
# summary(publication.bias.model.r.timelag)
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.r.timelag <- estimates.CI(publication.bias.model.r.timelag)
According to this uni-moderator meta-regression, there is no indication of decline effects since the slope of the moderator ‘year of publication’ is essentially zero (slope = 0, 95% CI = [-0.01,0.01]; R2marginal = 0.3%). Nonetheless, as mentioned above, we need to confirm this pattern after accounting for some of the heterogeneity present in the data using the all-in publication bias test (multi-moderator meta-regression) described in section S4.1.4.3.
When heterogeneity exists (which is normally the case in ecology and
evolution; Senior et al. 2016), it is best to combine Equations 21 and
23 with other moderators since those additional moderators will
generally be expected to explain some of the heterogeneity. That is,
this all-in publication bias test (multi-moderator meta-regression)
would be the best test of small-study (publication bias) and decline
effects (time-lag bias) in most meta-analytic datasets. For our data, we
will run a multi-moderator phylogenetic multilevel meta-regression
including the effect sizes’ standard errors, the year of publication
(mean-centred) and a moderator originally included in Garamszegi et
al. (2012), ‘captivity status’ (CaptivityC
). Although
Garamszegi et al. (2012) included other moderators in their model
(i.e. species, taxonomic class, spatial overlap, age, sex and season),
we decided to focus only on those moderators that were available for the
entire dataset (i.e. no NA’s or missing data, see Table 2 in Garamszegi
et al. 2012), and we also excluded ‘species’ (and ‘taxonomic class’)
since species effects are already captured as a random effect and
correlation matrix in our phylogenetic approach.
# Application of Equation 24 from the main manuscript
publication.bias.model.r.all.se <- rma.mv(yi, vi,
mods=~1+
sei+
year.c+
CaptivityC,
random=list(~ 1 | Species.updated, # phylo effect
~ 1 | Species, # non-phylo effect
~ 1 | study,
~ 1 | obsID),
R = list(Species.updated = phylo_cor), #phylogenetic relatedness
method="REML",
test="t", # using t dist rather than z
data=dataset.r)
#summary(publication.bias.model.r.all.se)
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.r.all.se <- estimates.CI(publication.bias.model.r.all.se)
The all-in publication bias test agrees with what we observed in the uni-moderator meta-regressions above (sections S4.1.4.1 and S4.1.4.2). First, the multi-moderator meta-regression shows a statistically significant slope for the moderator ‘SE’ (slope = 0.12, 95% CI = [-0.09,0.33]; Figure S4.4), showing evidence of small-study effects. In other words, the largest effect sizes in the dataset tend to be those with the lowest precision (i.e. larger uncertainty; Figure S4.4). This result agrees with the findings originally reported in Garamszegi et al. (2012), where the authors found evidence of publication bias after applying a simple correlation method proposed by Begg and Mazumdar (1994) and the trim-and-fill method (Duval and Tweedie 2000a,b) - although bear in mind that we do not recommend to use those simpler methods for meta-analytic datasets with multiple levels of non-independence and high heterogeneity (more in the main text; see alternative in section S5). Second, the all-in publication bias test also confirms that there is no evidence of decline effects in the data since the slope of the moderator ‘year of publication’ was again indistinguishable from zero (slope = 0.82, 95% CI = [0.03,1.61]; Figure S4.5). Overall, the moderators included in this multi-moderator meta-regression explained a total of 16.5% (i.e. R2marginal) of the heterogeneity observed in the meta-analytic model (see Table S4.1).
# predicting effect sizes for a range of SE from the minimum observed in the data to the maximum observed in data, while year is kept at its mean value, in this case 0 since year was mean-centred
# condition on FC
predict.publication.bias.model.r.all.se.plot.1 <- predict(publication.bias.model.r.all.se,
newmods=cbind(seq(min(dataset.r$sei),
max(dataset.r$sei),
0.005),
c(0), 0, 0, 0, 0))
newdat <- data.frame(sei=seq(min(dataset.r$sei),
max(dataset.r$sei),
0.005),
fit=predict.publication.bias.model.r.all.se.plot.1$pred,
upper=predict.publication.bias.model.r.all.se.plot.1$ci.ub,
lower=predict.publication.bias.model.r.all.se.plot.1$ci.lb,
stringsAsFactors=FALSE)
xaxis <- dataset.r$sei
yaxis <- dataset.r$yi
plot(xaxis,yaxis,
type="n",
ylab="",
xlab="",
xaxt="n",
yaxt="n",
ylim=c(-2.25,2.25),
xlim=c(0,1))
abline(a=0,b=0, lwd=1, lty=1)
axis(1,at=seq(0,1,0.1),
cex.axis=0.8,tck=-0.02)
axis(2,
at=round(seq(-2.5,2.25,0.5),1),
cex.axis=0.8,las=2,tck=-0.02)
title(xlab = "standard error (SE)",
ylab = "effect size (Zr)",
line = 2.75, cex.lab=1.4)
points(xaxis,yaxis,
bg=rgb(0,0,0, 0.1),
col=rgb(0,0,0, 0.2),
pch=21,
cex=1)
lines(newdat$sei, newdat$fit, lwd=2.75,col="darkorchid4")
polygon(c(newdat$sei,rev(newdat$sei)),
c(newdat$lower,rev(newdat$upper)),
border=NA,col=rgb(104/255,34/255,139/255, 0.5))
# predicting effect sizes for a range of years from the minimum observed in the data to the maximum observed in data, while SE is kept at its mean value. Keep in mind that alternative ways could be used, for example, one could use median-centring rather than mean-centring.
# conditioned on FC
predict.publication.bias.model.r.all.se.plot.2 <- predict(publication.bias.model.r.all.se,
newmods=cbind(mean(dataset.r$sei),
seq(min(dataset.r$year.c),
max(dataset.r$year.c),
0.25),0, 0,0,0))
newdat <- data.frame(year=seq(min(dataset.r$year),
max(dataset.r$year),
0.25),
fit=predict.publication.bias.model.r.all.se.plot.2$pred,
upper=predict.publication.bias.model.r.all.se.plot.2$ci.ub,
lower=predict.publication.bias.model.r.all.se.plot.2$ci.lb,
stringsAsFactors=FALSE)
xaxis <- dataset.r$year
yaxis <- dataset.r$yi
cex.study <- (1/dataset.r$sei)/4
plot(xaxis,yaxis,
type="n",
ylab="",
xlab="",
xaxt="n",
yaxt="n",
ylim=c(-2.25,2.25),
xlim=c(min(dataset.r$year),max(dataset.r$year)))
abline(a=0,b=0, lwd=1, lty=1)
axis(1,at=seq(min(dataset.r$year),max(dataset.r$year),5),
cex.axis=0.8,tck=-0.02)
axis(2,
at=round(seq(-2.5,2.25,0.5),1),
cex.axis=0.8,las=2,tck=-0.02)
title(xlab = "year of publication",
ylab = "effect size (Zr)",
line = 2.75, cex.lab=1.4)
points(xaxis,yaxis,
bg=rgb(0,0,0, 0.1),
col=rgb(0,0,0, 0.2),
pch=21,
cex=cex.study)
lines(newdat$year, newdat$fit, lwd=2.75,col="darkorchid4")
polygon(c(newdat$year,rev(newdat$year)),
c(newdat$lower,rev(newdat$upper)),
border=NA,col=rgb(104/255,34/255,139/255, 0.5))
Once we have confirmed that there is evidence of small-study effects (see above), the next step in our suggested two-step approach is to calculate the adjusted overall effect size, that is, the overall effect once we account for publication bias (see the main text).
In a simpler meta-regression model (only with the moderator ‘SE’), the best estimate for the adjusted overall effect size would be the intercept of that model, when the intercept is not significant (i.e. Stanley and Doucouliagos 2012, 2014; more in the main text and also see our corrigendum). Note that Stanley (2017) suggests to use the significance level of 10% to assess the intercept’s statistical significance. In a complex model with a categorical variables, it is difficult to decide what is the intercept. Nonetheless, the models above suggest that there are indications of some effects at least in some categories. Therefore, in the all-in publication bias test, our approach to estimate the adjusted overall effect is to fit a similar multi-moderator regression to the one above but including the effect sizes’ sampling variances (SV or vi) instead of their standard errors as a moderator. This is because when there is an (true) overall effect, the use of ‘SV’ leads to an adjusted overall effect that is less downward biased than when using ‘SE’ (more in the main text; Stanley and Doucouliagos 2012, 2014). Note that when there is weak evidence for an overall effects, fitting ‘SE’ gives the best estimate of adjusted effects.
What moderators one should include in this version of the all-in
publication bias test depends on what we want the adjusted overall
effect size to reflect. Keep in mind that one of the purposes of
including moderators in this all-in test is to explain as much
heterogeneity as possible. We are going to add one more moderator
‘captivity status’ (CaptivityC
), which has 5 levels (we
refer to it as the full model). We will obtain marginalized means, using
the R package emmeans
. Importantly, we should keep in mind
that publication bias tests should be treated as sensitivity analyses to
test the robustness of results from the original analysis (Noble et al.,
2017). Importantly, none of the estimated adjusted overall effects
should be treated as the real overall value if publication bias did not
exist; after all, we can never be sure of what has not been
published.
# full model
publication.bias.model.r.v.1 <- rma.mv(yi, vi,
mods=~ 1 + vi +
year.c +
CaptivityC, # take Class - that is already a part of phylo
random=list(~ 1 | Species.updated, # phylo effect
~ 1 | Species, # non-phylo effect
~ 1 | study,
~ 1 | obsID),
R = list(Species.updated = phylo_cor), #phylogenetic relatedness
method="REML",
test="t", # using t dist rather than z
data=dataset.r)
# preparation to get marginalized mean (when vi = 0 and year.c = 0)
res.publication.bias.model.r.v.1 <- qdrg(object = publication.bias.model.r.v.1, data = dataset.r, at = list(vi = 0, year.c = 0))
# marginalized overall mean at vi = 0 and year.c = 0; also weights = "prop" or "cells" average things over proportionally. if not specified, all groups (levels) get the same weights
overall.res.publication.bias.model.r.v.1 <- emmeans(res.publication.bias.model.r.v.1, specs = ~1, df = publication.bias.model.r.v.1$ddf, weights = "prop") # using effect size - 7
# marginalised means for different levels for CaptivityC
mm.publication.bias.model.r.v.1 <- emmeans(res.publication.bias.model.r.v.1, specs = "CaptivityC", df = publication.bias.model.r.v.1$ddf)
# comparing results without correcting for publication bias
publication.bias.model.r.v.1b <- rma.mv(yi, vi,
mods=~ -1 +
CaptivityC, # take Class - that is already a part of phylo
random=list(~ 1 | Species.updated, # phylo effect
~ 1 | Species, # non-phylo effect
~ 1 | study,
~ 1 | obsID),
R = list(Species.updated = phylo_cor), #phylogenetic relatedness
method="REML",
test="t", # using t dist rather than z
data=dataset.r)
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.r.v.1 <- estimates.CI2(mm.publication.bias.model.r.v.1)
estimates.publication.bias.model.r.v.1b <- estimates.CI(publication.bias.model.r.v.1b)
The full model provided an adjusted overall effect size that was slightly smaller (adjusted effect = 0.01, 95% CI = [0.03,97]) than that obtained by the multilevel meta-analysis (unadjusted effect = 0.29, 95% CI = [0.17,0.41]; more in section S4.1.3), highlighting the downward correction of the overall effect after accounting for publication bias. The full model also showed slightly smaller effects for all levels of the categorical factor ‘captivity status’ than the unadjusted model (Table S4.2).
# creating a table to show the results of model 2
table.comparing.captivity.levels <- merge(estimates.publication.bias.model.r.v.1,
estimates.publication.bias.model.r.v.1b,
by="estimate",
all.x=T)
# rounding estimates
table.comparing.captivity.levels <- table.comparing.captivity.levels %>% mutate(across(where(is.numeric), round, 2))
table.comparing.captivity.levels <- data.frame(captivity.level = table.comparing.captivity.levels[,1],
adjusted.mean=table.comparing.captivity.levels[,2], adjusted.CI=paste0("[",table.comparing.captivity.levels[,3],",",table.comparing.captivity.levels[,4],"]"),
unadjusted.mean=table.comparing.captivity.levels[,5], unadjusted.CI=paste0("[",table.comparing.captivity.levels[,6],",",table.comparing.captivity.levels[,7],"]"))
table.comparing.captivity.levels[,1] <- c("FC","FW","WCT","WCT & FC", "WCT & FW")
# creating a formatted table using the R package 'gt'
table.model.r.captivity.gt <- table.comparing.captivity.levels %>%
gt() %>%
cols_label(captivity.level=md("**Captivity status**"),
adjusted.mean=md("**Adjusted mean**"),
adjusted.CI=md("**Adjusted 95% CI**"),
unadjusted.mean=md("**Unadjusted mean**"),
unadjusted.CI=md("**Unadjusted 95% CI**")) %>%
cols_align(align = "center")
table.model.r.captivity.gt
Captivity status | Adjusted mean | Adjusted 95% CI | Unadjusted mean | Unadjusted 95% CI |
---|---|---|---|---|
FC | 0.00 | [0,0] | 0.28 | [0.15,0.41] |
FW | -0.01 | [-0.24,0.22] | 0.27 | [0.05,0.49] |
WCT | 0.03 | [-0.12,0.18] | 0.30 | [0.16,0.45] |
WCT & FC | 0.18 | [-0.17,0.53] | 0.43 | [0.09,0.78] |
WCT & FW | -0.12 | [-0.39,0.15] | 0.16 | [-0.12,0.43] |
Table S4.2. Results for all levels of the moderator ‘captivity status’, both adjusted and unadjusted for publication bias using the all-in publication bias test (multi-moderator meta-regression). Estimates are presented as standardized effect sizes using Fisher’s transformation of the correlation coefficient (i.e. Zr).
For our second worked example, we use the meta-analytic dataset provided by Murphy et al. 2013. This meta-analysis tested whether human-caused disturbances led a change in species richness across both terrestrial and aquatic biomes, using the log response ratio (i.e., lnRR). Briefly, the authors found that human-mediated disturbances led to a statistically significant 18.3% decline in species richness across five anthropogenic disturbances (species invasions, nutrient addition, temperature increase, habitat loss or fragmentation, and land-use change). Here, we use this dataset to show how to use multilevel meta-regression approach to both detect and adjust for publication bias.
# importing dataset with mean differences
dataset.original.mean.diff<-read_excel(here("data", "ft027.xlsx"), col_names = TRUE)
#names(dataset.original.mean.diff)
#str(dataset.original.mean.diff)
# lnRR
dataset.rr <- escalc(measure = "ROM",
m1i = T_mean,
m2i = C_mean,
sd1i = T_sd,
sd2i = C_sd,
n1i = T_N,
n2i = C_N,
data = dataset.original.mean.diff,
append = T)
dataset.smd <- escalc(measure = "SMD",
m1i = T_mean,
m2i = C_mean,
sd1i = T_sd,
sd2i = C_sd,
n1i = T_N,
n2i = C_N,
data = dataset.original.mean.diff,
append = T)
#Warning message:
#Some 'yi' and/or 'vi' values equal to +-Inf. Recoded to NAs.
# clean NA
dataset.rr <- dataset.rr[!is.na(dataset.rr$yi) & dataset.rr$vi != 0, ]
dataset.smd <- dataset.smd[!is.na(dataset.smd$yi) & dataset.smd$vi != 0, ]
Before analysis, it is important to explore the meta-analytic dataset. To identify outliers (which are sometimes caused by data extraction errors, or typos in the data source), Figure S4.6 shows funnel plots with the standard error and the inverse of the standard error (i.e. precision) in the y-axes (Figure S4.6).
# we find some strange data in SMD and lnRR
par(mfrow = c(1, 2))
funnel(dataset.rr$yi, dataset.rr$vi, yaxis="seinv",
#xlim = c(-3, 3),
ylab = "Precision (1/SE)",
xlab = "Effect size (lnRR)")
funnel(dataset.smd$yi, dataset.smd$vi, yaxis="seinv",
#xlim = c(-3, 3),
ylab = "Precision (1/SE)",
xlab = "Effect size (SMD)")
Data exploration found usual outliers in both the lnRR and SMD data. For this example we have removed these outliers. However, for a real meta-analysis, the source of those outliers should be investigated, and the decision about whether or not to remove unusual effect sizes should be justified and reported in the paper. Next, we checked the funnel plots again (Figure S4.7). This time, the funnel plots seemed to be more normal.
Funnel plots with 1/SE as the measure of uncertainty for lnRR (left-hand side) and SMD (right-hand side) after removing outliers in the meta-analytic dataset.
# removing outliers
dataset.rr <- dataset.rr[dataset.rr$vi != min(dataset.rr$vi), ]
dataset.smd <- dataset.smd[dataset.smd$yi != min(dataset.smd$yi),]
par(mfrow = c(1, 2))
funnel(dataset.rr$yi, dataset.rr$vi, yaxis="seinv",
#xlim = c(-3, 3),
ylab = "Precision (1/SE)",
xlab = "Effect size (lnRR)")
funnel(dataset.smd$yi, dataset.smd$vi, yaxis="seinv",
#xlim = c(-3, 3),
ylab = "Precision (1/SE)",
xlab = "Effect size (SMD)")
To avoid ‘artefactual’ funnel asymmetry, we suggest using the “effective sample size” rather than SE for mean difference effect sizes, such as lnRR and SMD. Let’s check the funnel plots (Figure S4.8) which use effective sample size on the y-axis.
# calculating "effective sample size" to account for unbalanced sampling, for SMD and lnRR (e_n, hereafter, see Equation 25 from the main manuscript)
dataset.rr$e_n <- with(dataset.rr, (4*(C_N*T_N)) / (C_N + T_N))
dataset.smd$e_n <- with(dataset.smd, (4*(C_N*T_N)) / (C_N + T_N))
#using effective sampling size
par(mfrow = c(1, 2))
funnel(dataset.rr$yi, dataset.rr$vi, ni = dataset.rr$e_n, yaxis="ni",
#xlim = c(-3, 3),
ylab = "Effective sample size",
xlab = "Effect size (lnRR)")
funnel(dataset.smd$yi, dataset.smd$vi, ni = dataset.smd$e_n, yaxis="ni",
#xlim = c(-3, 3),
ylab = "Effective sample size",
xlab = "Effect size (SMD)")
Comparing the funnel plots using effective sample size (Figure 4.8) with 1/SE (Figure 4.7) we notice that data points in funnel plots with effective sample sizes looked more ‘scattered’, whereas data points in funnel plots with 1/SE looked more ‘crowded’. This is because SE is correlated more strongly with the effect size than effective sample size, owing to similarities between the effect size and sampling variance equations for both lnRR and SMR (i.e., compare equations 1 and 2, and equations 3 and 4, in the main text). For example, the SMD’s variance has the square of the point estimate (i.e. SMD) in its equation, which leads to a correlation between SMDs and sampling SE. This can result in ‘artefactual’ funnel asymmetry.
Next, we showcase how to test for the presence of publication bias using the multilevel meta-regression method suggested in the main text. First, we re-analyse the dataset using a multilevel meta-analytic (intercept-only) model and make corresponding funnel plot showing the overall effect or meta-analytic mean.
# creating a unit-level random effect to model residual variance in metafor
dataset.rr$obsID <- 1:nrow(dataset.rr)
dataset.smd$obsID <- 1:nrow(dataset.smd)
# running multilevel intercept-only meta-analytic model
meta.analysis.model.rr <- rma.mv(yi, vi,
mods = ~ 1,
random = list(~1 | Disturbance_category,
~ 1 | paperID,
~ 1 | obsID),
method = "REML",
test = "t",
data = dataset.rr)
meta.analysis.model.smd <- rma.mv(yi, vi,
mods = ~ 1,
random = list(~1 | Disturbance_category,
~ 1 | paperID,
~ 1 | obsID),
method = "REML",
test = "t",
data = dataset.smd)
# extracting the mean, 95% confidence intervals and 95% prediction intervals
metaanalytic.mean.model.rr <- predict(meta.analysis.model.rr, digits=3)
metaanalytic.mean.model.smd <- predict(meta.analysis.model.smd, digits=3)
# estimating relative heterogeneity I2
I2.model.rr <- i2_ml(meta.analysis.model.rr)
I2.model.smd <- i2_ml(meta.analysis.model.smd)
# creating a table to show the heterogeneity estimates
table.model.rr <- data.frame(n=length(unique(dataset.rr$paperID)),
k=nrow(dataset.rr),
mean=round(metaanalytic.mean.model.rr[[1]],2),
CI=paste0("[",round(metaanalytic.mean.model.rr[[3]],2),",",round(metaanalytic.mean.model.rr[[4]],2),"]"),
PI=paste0("[",round(metaanalytic.mean.model.rr[[5]],2),",",round(metaanalytic.mean.model.rr[[6]],2),"]"),
I2_obsID=round(I2.model.rr[[4]],1),
I2_paperID=round(I2.model.rr[[3]],1),
I2_Disturbance=round(I2.model.rr[[2]],1),
I2_total=round(I2.model.rr[[1]],1))
rownames(table.model.rr) <- NULL
#write.csv(table.model.RR, "./table/table.model.RR.csv", row.names = FALSE)
table.model.smd <- data.frame(n=length(unique(dataset.smd$paperID)),
k=nrow(dataset.smd),
mean=round(metaanalytic.mean.model.smd[[1]],2),
CI=paste0("[",round(metaanalytic.mean.model.smd[[3]],2),",",round(metaanalytic.mean.model.smd[[4]],2),"]"),
PI=paste0("[",round(metaanalytic.mean.model.smd[[5]],2),",",round(metaanalytic.mean.model.smd[[6]],2),"]"),
I2_obsID=round(I2.model.smd[[4]],1),
I2_paperID=round(I2.model.smd[[3]],1),
I2_Disturbance=round(I2.model.smd[[2]],1),
I2_total=round(I2.model.smd[[1]],1))
rownames(table.model.smd) <- NULL
#write.csv(table.model.SMD, "./table/table.model.SMD.csv", row.names = FALSE)
# creating a formatted table using the R package 'gt'
table.model.rr.gt <- table.model.rr %>%
gt::gt() %>%
cols_label(n=md("**n**"),
k=md("**k**"),
mean=md("**Meta-analytic mean**"),
CI=md("**95% CI**"),
PI=md("**95% PI**"),
I2_obsID=md("***I*<sup>2</sup><sub>residual</sub> (%)**"),
I2_paperID=md("***I*<sup>2</sup><sub>study</sub> (%)**"),
I2_Disturbance=md("***I*<sup>2</sup><sub>Disturbance</sub> (%)**"),
I2_total=md("***I*<sup>2</sup><sub>total</sub> (%)**")) %>%
cols_align(align = "center") %>%
tab_source_note(source_note = md("n = number of studies; k = number of effects; CI = confidence interval; PI = prediction interval; *I*<sup>2</sup> = heterogeneity"))
table.model.rr.gt
n | k | Meta-analytic mean | 95% CI | 95% PI | I2residual (%) | I2study (%) | I2Disturbance (%) | I2total (%) |
---|---|---|---|---|---|---|---|---|
243 | 325 | -0.16 | [-0.24,-0.09] | [-0.89,0.56] | 57.9 | 32.7 | 4 | 94.6 |
n = number of studies; k = number of effects; CI = confidence interval; PI = prediction interval; I2 = heterogeneity |
table.model.smd.gt <- table.model.smd %>%
gt() %>%
cols_label(n=md("**n**"),
k=md("**k**"),
mean=md("**Meta-analytic mean**"),
CI=md("**95% CI**"),
PI=md("**95% PI**"),
I2_obsID=md("***I*<sup>2</sup><sub>residual</sub> (%)**"),
I2_paperID=md("***I*<sup>2</sup><sub>study</sub> (%)**"),
I2_Disturbance=md("***I*<sup>2</sup><sub>Disturbance</sub> (%)**"),
I2_total=md("***I*<sup>2</sup><sub>total</sub> (%)**")) %>%
cols_align(align = "center") %>%
tab_source_note(source_note = md("n = number of studies; k = number of effects; CI = confidence interval; PI = prediction interval; *I*<sup>2</sup> = heterogeneity"))
table.model.smd.gt
n | k | Meta-analytic mean | 95% CI | 95% PI | I2residual (%) | I2study (%) | I2Disturbance (%) | I2total (%) |
---|---|---|---|---|---|---|---|---|
240 | 321 | -0.49 | [-0.64,-0.35] | [-2.54,1.55] | 27.7 | 57.3 | 0 | 85 |
n = number of studies; k = number of effects; CI = confidence interval; PI = prediction interval; I2 = heterogeneity |
Table S4.2 Results of the multilevel intercept-only meta-analysis testing the effects of human-caused disturbances on changes in species richness across both terrestrial and aquatic biomes. Estimates are presented as standardized effect sizes using the log response ratio (i.e. lnRR, top) and standardized mean difference (i.e. SMD, bottom).
To test for publication bias (evidence of small-study effects), we first fitted a uni-moderator multilevel meta-regression including the square root of the inverse of effective sample size \(\sqrt{\frac{1}{\tilde{n}}}\) (Equation 26) as the only moderator (see Equation 27 from the main text). This meta-regression will provide some information about the existence of small-study effects (i.e. asymmetry in the distribution of effect sizes) after accounting for non-independence. However, as we explained in main text, the best evidence comes from the approach which models both heterogeneity and non-independence, by using a multi-moderator multilevel meta-regression model that includes all the important moderators (see all-in publication bias test described in section S4.2.4.3).
# calculating the inverse of the "effective sample size" to account for unbalanced sampling, for SMD and lnRR (see Equation 25 from the main manuscript)
dataset.rr$inv_n_tilda <- with(dataset.rr, (C_N + T_N)/(C_N*T_N))
dataset.rr$sqrt_inv_n_tilda <- with(dataset.rr, sqrt(inv_n_tilda))
dataset.smd$inv_n_tilda <- with(dataset.smd, (C_N + T_N)/(C_N*T_N))
dataset.smd$sqrt_inv_n_tilda <- with(dataset.smd, sqrt(inv_n_tilda))
# Application of Equation 27 from the main manuscript
publication.bias.model.rr.srin <- rma.mv(yi, vi,
mods= ~ 1 + sqrt_inv_n_tilda,
random=list(~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method="REML",
test = "t",
data=dataset.rr)
publication.bias.model.smd.srin <- rma.mv(yi, vi,
mods= ~ 1 + sqrt_inv_n_tilda,
random=list(~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method="REML",
test = "t",
data=dataset.smd)
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.rr.srin <- estimates.CI(publication.bias.model.rr.srin)
estimates.publication.bias.model.smd.srin <- estimates.CI(publication.bias.model.smd.srin)
Effect sizes with larger uncertainty [larger sqrt(inv_n_tilda)] tend to be larger (i.e. small-study effects) as indicated because the statistically significant slope of the moderator ‘sqrt_inv_n_tilda’ for lnRR (slope = -0.26, 95% CI = [-0.49,-0.03]; R2marginal = 2.3%) and SMD (slope = -0.8, 95% CI = [-1.56,-0.03]; R2marginal = 2.3%). In S4.2.3.3, we will use all-in publication bias test (multi-moderator meta-regression)see whether the small-study effects exist after accounting for some of the heterogeneity present in the data.
To test for time-lag bias (or the decline effect), we include the year of publication (mean-centred) as a single moderating variable (see Equation 23 from the main text).
# mean-centring year of publication to help with interpretation, particularly in S4.1.4.3.
dataset.rr$year.c <- as.vector(scale(dataset.rr$year, scale = F))
dataset.smd$year.c <- as.vector(scale(dataset.smd$year, scale = F))
# Application of Equation 23 from the main manuscript
publication.bias.model.rr.timelag <- rma.mv(yi, vi,
mods= ~ 1 + year.c,
random=list(~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method="REML",
test = "t",
data=dataset.rr)
publication.bias.model.smd.timelag <- rma.mv(yi, vi,
mods= ~ 1 + year.c,
random=list(~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method="REML",
test = "t",
data=dataset.smd)
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.rr.timelag <- estimates.CI(publication.bias.model.rr.timelag)
estimates.publication.bias.model.smd.timelag <- estimates.CI(publication.bias.model.smd.timelag)
According to this uni-moderator meta-regression, there is an indication of decline effects since the slope of the moderator ‘year of publication’ is close to statistical significance for lnRR (slope = 0.01, 95% CI = [0,0.02]; R2marginal = 1.6%) and SMD (slope = 0.03, 95% CI = [0,0.06]; R2marginal = 1.6%).
To get the best evidence of small-study effects and time-lag bias, we run a multi-moderator meta-regression including the square of inverse of effective sample size, the year of publication (mean-centred) and the study approach (either experimental or observational; a moderator originally included in Murphy et al. 2013).
# preparing the moderators that need to be included in a meta-regression that also contains a moderator with the standard errors of the effect sizes and the year of publication
# Application of Equation 29 from the main manuscript
publication.bias.model.rr.all.srin <- rma.mv(yi, vi,
mods= ~ -1 + sqrt_inv_n_tilda +
year.c +
Experimental.or.Observational.,
random=list(
~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method="REML",
test = "t",
data=dataset.rr,
control=list(optimizer="optim", optmethod="Nelder-Mead"))
publication.bias.model.smd.all.srin <- rma.mv(yi, vi,
mods= ~ -1 + sqrt_inv_n_tilda +
year.c +
Experimental.or.Observational.,
random=list(
~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method="REML",
test = "t",
data=dataset.smd,
control=list(optimizer="optim", optmethod="Nelder-Mead"))
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.rr.all.srin <- estimates.CI(publication.bias.model.rr.all.srin)
estimates.publication.bias.model.smd.all.srin <- estimates.CI(publication.bias.model.smd.all.srin)
The all-in publication bias test is similar to the uni-moderator meta-regressions above for small-study effects (sections S4.2.4.1) but had some differences for time-lag effects (S4.2.4.2).
First, the multi-moderator meta-regression show a statistically significant slope for the moderator ‘sqrt_inv_n_tilda’ (slope for lnRR = -0.25, 95% CI = [-0.47,-0.03]; Figure S4.9) and (slope for SMD = -0.94, 95% CI = [-1.72,-0.15]; Figure S4.10), confirming evidence of small-study effects. Note that Murphy et al. (2013) originally reported no funnel asymmetry from visual inspection, however, we do not recommend using this method for meta-analytic datasets with multiple levels of non-independence and high heterogeneity.
Second, the all-in publication bias test show no evidence of decline effects for lnRR since the moderator ‘year of publication’ is indistinguishable from zero (slope = 0.01, 95% CI = [0,0.02]; Figure S4.12). However, the effect size SMD tend to show evidence of decline effects, as the slope of publication year is close to statistical significance (slope = 0.03, 95% CI = [0,0.06]; Figure S4.13).
# predicting effect sizes for a range of sqrt(inv_n_tilda) from the minimum observed in the data to the maximum observed in data, while year is kept at its mean value, in this case 0 since year was mean-centred
# conditioned on observational studies
predict.publication.bias.model.rr.all.srin.plot.1 <-
predict(publication.bias.model.rr.all.srin, newmods=cbind(seq(min(dataset.rr$sqrt_inv_n_tilda), max(dataset.rr$sqrt_inv_n_tilda), length.out=324), c(0), c(0), 1))
newdat <- data.frame(sei= seq(min(dataset.rr$sqrt_inv_n_tilda), max(dataset.rr$sqrt_inv_n_tilda), length.out=324),
fit=predict.publication.bias.model.rr.all.srin.plot.1$pred,
upper=predict.publication.bias.model.rr.all.srin.plot.1$ci.ub,
lower=predict.publication.bias.model.rr.all.srin.plot.1$ci.lb,
stringsAsFactors=FALSE)
xaxis <- dataset.rr$sqrt_inv_n_tilda
yaxis <- dataset.rr$yi
plot(xaxis,yaxis,
type="n",
ylab="",
xlab="",
xaxt="n",
yaxt="n"
)
abline(a=0,b=0, lwd=1, lty=1)
axis(1,at=seq(0,1.6,0.2),
cex.axis=0.8,tck=-0.02)
axis(2,
at=round(seq(-3,2,0.5),1),
cex.axis=0.8,las=2,tck=-0.02)
title(xlab = "square root of inverse of effective sample size",
ylab = "effect size (lnRR)",
line = 2.75, cex.lab=1.4)
points(xaxis,yaxis,
bg=rgb(0,0,0, 0.1),
col=rgb(0,0,0, 0.2),
pch=21,
cex=1)
lines(newdat$sei, newdat$fit, lwd=2.75,col="darkorchid4")
polygon(c(newdat$sei,rev(newdat$sei)),
c(newdat$lower,rev(newdat$upper)),
border=NA,col=rgb(104/255,34/255,139/255, 0.5))
# predicting effect sizes for a range of SE from the minimum observed in the data to the maximum observed in data, while year is kept at its mean value, in this case 0 since year was mean-centred
#conditioned on observational studies
predict.publication.bias.model.smd.all.srin.plot.1 <- predict(publication.bias.model.smd.all.srin, newmods=cbind(seq(min(dataset.smd$sqrt_inv_n_tilda), max(dataset.smd$sqrt_inv_n_tilda), length.out=320), c(0), c(0), 1))
newdat <- data.frame(sei= seq(min(dataset.smd$sqrt_inv_n_tilda), max(dataset.smd$sqrt_inv_n_tilda), length.out=320),
fit=predict.publication.bias.model.smd.all.srin.plot.1$pred,
upper=predict.publication.bias.model.smd.all.srin.plot.1$ci.ub,
lower=predict.publication.bias.model.smd.all.srin.plot.1$ci.lb,
stringsAsFactors=FALSE)
xaxis <- dataset.smd$sqrt_inv_n_tilda
yaxis <- dataset.smd$yi
plot(xaxis,yaxis,
type="n",
ylab="",
xlab="",
xaxt="n",
yaxt="n"
)
abline(a=0,b=0, lwd=1, lty=1)
axis(1,at=seq(0,1.2,0.2),
cex.axis=0.8,tck=-0.02)
axis(2,
at=round(seq(-8,4,2),1),
cex.axis=0.8,las=2,tck=-0.02)
title(xlab = "square root of inverse of effective sample size",
ylab = "effect size (SMD)",
line = 2.75, cex.lab=1.4)
points(xaxis,yaxis,
bg=rgb(0,0,0, 0.1),
col=rgb(0,0,0, 0.2),
pch=21,
cex=1)
lines(newdat$sei, newdat$fit, lwd=2.75,col="darkorchid4")
polygon(c(newdat$sei,rev(newdat$sei)),
c(newdat$lower,rev(newdat$upper)),
border=NA,col=rgb(104/255,34/255,139/255, 0.5))
# predicting effect sizes for a range of sqrt(inv_n_tilda) from the minimum observed in the data to the maximum observed in data, while year is kept at its mean value, in this case 0 since year was mean-centred
#conditioned on observational studies
predict.publication.bias.model.rr.all.srin.plot.2 <-
predict(publication.bias.model.rr.all.srin, newmods=cbind(mean(dataset.rr$sqrt_inv_n_tilda),seq(min(dataset.rr$year.c), max(dataset.rr$year.c), length.out=324), c(0), 1))
newdat <- data.frame(year = seq(min(dataset.rr$year), max(dataset.rr$year), length.out=324),
fit=predict.publication.bias.model.rr.all.srin.plot.2$pred,
upper=predict.publication.bias.model.rr.all.srin.plot.2$ci.ub,
lower=predict.publication.bias.model.rr.all.srin.plot.2$ci.lb,
stringsAsFactors=FALSE)
xaxis <- dataset.rr$year
yaxis <- dataset.rr$yi
plot(xaxis,yaxis,
type="n",
ylab="",
xlab="",
xaxt="n",
yaxt="n"
)
abline(a=0,b=0, lwd=1, lty=1)
axis(1,at=seq(1989,2013,5),
cex.axis=0.8,tck=-0.02)
axis(2,
at=round(seq(-3,2,0.5),1),
cex.axis=0.8,las=2,tck=-0.02)
title(xlab = "year of publication",
ylab = "effect size (lnRR)",
line = 2.75, cex.lab=1.4)
points(xaxis,yaxis,
bg=rgb(0,0,0, 0.1),
col=rgb(0,0,0, 0.2),
pch=21,
cex=1)
lines(newdat$year, newdat$fit, lwd=2.75,col="darkorchid4")
polygon(c(newdat$year,rev(newdat$year)),
c(newdat$lower,rev(newdat$upper)),
border=NA,col=rgb(104/255,34/255,139/255, 0.5))
# predicting effect sizes for a range of sqrt(inv_n_tilda) from the minimum observed in the data to the maximum observed in data, while year is kept at its mean value, in this case 0 since year was mean-centred
# conditioned observational studies
predict.publication.bias.model.smd.all.srin.plot.2 <-
predict(publication.bias.model.smd.all.srin, newmods=cbind(mean(dataset.smd$sqrt_inv_n_tilda),seq(min(dataset.smd$year.c), max(dataset.smd$year.c), length.out=320), c(0), 1))
newdat <- data.frame(year= seq(min(dataset.smd$year), max(dataset.smd$year), length.out=320),
fit=predict.publication.bias.model.smd.all.srin.plot.2$pred,
upper=predict.publication.bias.model.smd.all.srin.plot.2$ci.ub,
lower=predict.publication.bias.model.smd.all.srin.plot.2$ci.lb,
stringsAsFactors=FALSE)
xaxis <- dataset.smd$year
yaxis <- dataset.smd$yi
plot(xaxis,yaxis,
type="n",
ylab="",
xlab="",
xaxt="n",
yaxt="n"
)
abline(a=0,b=0, lwd=1, lty=1)
axis(1,at=seq(1989,2013,5),
cex.axis=0.8,tck=-0.02)
axis(2,
at=round(seq(-8,4,2),1),
cex.axis=0.8,las=2,tck=-0.02)
title(xlab = "year of publication",
ylab = "effect size (SMD)",
line = 2.75, cex.lab=1.4)
points(xaxis,yaxis,
bg=rgb(0,0,0, 0.1),
col=rgb(0,0,0, 0.2),
pch=21,
cex=1)
lines(newdat$year, newdat$fit, lwd=2.75,col="darkorchid4")
polygon(c(newdat$year,rev(newdat$year)),
c(newdat$lower,rev(newdat$upper)),
border=NA,col=rgb(104/255,34/255,139/255, 0.5))
In the main text we provide a two-step approach to calculate the
bias-corrected overall effect size estimate. The rationale is that, when
no uncertainty exists (theoretically), we can get the potential true
effect from a multiple-moderator multilevel-model (i.e. all-in
publication bias test, where the inclusion of moderator variables
reduces heterogeneity). We add two more moderators
Experimental.or.Observational.
(2 levels) and
Disturbance_category
(10 levels; we refer to this model as
the full model).
The models above do not provide strong evidence that there are a
significant overall effect (or a significant intercept or at least, one
group being significant at the alpha level at 10%, which translates to a
t value of > ~1.65 in magnitude). However, the meta-regression model
only with Experimental.or.Observational.
provides good
evidence that there are some overall effects. Therefore, here, we should
try both scenarios: \(\sqrt(1/\tilde{n})\) and \(1/\tilde{n}\). The former provides the best
estimate when there is no evidence that an overall effect exists or some
groups have non-zero effects while the latter is best when there is such
evidence.
# lnRR
# with sqrt(1/~n)
# preparation to get marginalized mean (when sqrt_inv_n_tilda = 0 and year.c = 0)
res.publication.bias.model.rr.v.0 <- qdrg(object = publication.bias.model.rr.all.srin, data = dataset.rr, at = list(inv_n_tilda = 0, year.c = 0))
# marginalized overall mean at inv_n_tilda = 0 and year.c = 0; also weights = "prop" or "cells" average things over proportionally. if not specified, all groups (levels) get the same weights
overall.res.publication.bias.model.rr.v.0 <- emmeans(res.publication.bias.model.rr.v.0, specs = ~1, df = publication.bias.model.rr.all.srin$ddf, weights = "prop") # using effect size - 7
# marginalized means for different levels for Experimental.or.Observational.
mm.publication.bias.model.rr.v.0 <- emmeans(res.publication.bias.model.rr.v.0, specs = "Experimental.or.Observational.", df = publication.bias.model.rr.all.srin$ddf, weights = "prop")
# with 1/~n
# providing an adjusted effect for each level of CaptivityC
publication.bias.model.rr.v.1 <- rma.mv(yi, vi,
mods=~-1+inv_n_tilda +
year.c +
Experimental.or.Observational. +
Disturbance_category, # this can be a random effect but placing this here to show how emmeans works
random=list(
~ 1 | obsID,
~ 1 | paperID),
method ="REML",
test = "t",
data = dataset.rr,
control = list(optimizer="optim", optmethod="Nelder-Mead"))
# preparation to get marginalized mean (when inv_n_tilda = 0 and year.c = 0)
res.publication.bias.model.rr.v.1 <- qdrg(object = publication.bias.model.rr.v.1, data = dataset.rr, at = list(inv_n_tilda = 0, year.c = 0))
# marginalized overall mean at inv_n_tilda = 0 and year.c = 0; also weights = "prop" or "cells" average things over proportionally. if not specified, all groups (levels) get the same weights
overall.res.publication.bias.model.rr.v.1 <- emmeans(res.publication.bias.model.rr.v.1, specs = ~1, df = publication.bias.model.rr.v.1$ddf, weights = "prop") # using effect size - 7
# marginalized means for different levels for Experimental.or.Observational.
mm.publication.bias.model.rr.v.1 <- emmeans(res.publication.bias.model.rr.v.1, specs = "Experimental.or.Observational.", df = publication.bias.model.rr.v.1$ddf, weights = "prop")
# estiamted marginalised mean
#mm.publication.bias.model.r.v.1 <- emmeans(res.publication.bias.model.rr.v.1, specs = "Experimental.or.Observational.", df = publication.bias.model.rr.v.1$dfs, weights = "equal")
# model 3b: comparing results without correcting for publication bias
publication.bias.model.rr.v.1b <- rma.mv(yi, vi,
mods=~-1+ Experimental.or.Observational.,
random=list(
~ 1 | obsID,
~ 1 | paperID),
method = "REML",
test = "t",
data = dataset.rr,
control = list(optimizer="optim", optmethod="Nelder-Mead"))
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.rr.v.0 <- estimates.CI2(mm.publication.bias.model.rr.v.0)
estimates.publication.bias.model.rr.v.1 <- estimates.CI2(mm.publication.bias.model.rr.v.1)
estimates.publication.bias.model.rr.v.1b <- estimates.CI(publication.bias.model.rr.v.1b)
# SMD
# with sqrt(1/~n)
# preparation to get marginalized mean (when sqrt_inv_n_tilda = 0 and year.c = 0)
res.publication.bias.model.smd.v.0 <- qdrg(object = publication.bias.model.smd.all.srin, data = dataset.smd, at = list(inv_n_tilda = 0, year.c = 0))
# marginalized overall mean at inv_n_tilda = 0 and year.c = 0; also weights = "prop" or "cells" average things over proportionally. if not specified, all groups (levels) get the same weights
overall.res.publication.bias.model.smd.v.0 <- emmeans(res.publication.bias.model.smd.v.0, specs = ~1, df = publication.bias.model.smd.all.srin$ddf, weights = "prop") # using effect size - 7
# marginalized means for different levels for Experimental.or.Observational.
mm.publication.bias.model.smd.v.0 <- emmeans(res.publication.bias.model.smd.v.0, specs = "Experimental.or.Observational.", df = publication.bias.model.smd.all.srin$ddf, weights = "prop")
# model 3: providing an adjusted effect for each level of CaptivityC
publication.bias.model.smd.v.1 <- rma.mv(yi, vi,
mods=~-1+ inv_n_tilda +
year.c +
Experimental.or.Observational.,
random=list(
~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method = "REML",
test = "t",
data = dataset.smd,
control = list(optimizer="optim", optmethod="Nelder-Mead"))
# preparation to get marginalized mean (when inv_n_tilda = 0 and year.c = 0)
res.publication.bias.model.smd.v.1 <- qdrg(object = publication.bias.model.smd.v.1, data = dataset.smd, at = list(inv_n_tilda = 0, year.c = 0))
# marginalized overall mean at inv_n_tilda = 0 and year.c = 0; also weights = "prop" or "cells" average things over proportionally. if not specified, all groups (levels) get the same weights
overall.res.publication.bias.model.smd.v.1 <- emmeans(res.publication.bias.model.smd.v.1, specs = ~1, df = publication.bias.model.smd.v.1$ddf, weights = "prop") # using effect size - 7
# marginalized means for different levels for Experimental.or.Observational.
mm.publication.bias.model.smd.v.1 <- emmeans(res.publication.bias.model.smd.v.1, specs = "Experimental.or.Observational.", df = publication.bias.model.smd.v.1$ddf, weights = "prop")
# model 3b: comparing results without correcting for publication bias
publication.bias.model.smd.v.1b <- rma.mv(yi, vi,
mods=~-1+ Experimental.or.Observational.,
random=list(
~ 1 | Disturbance_category,
~ 1 | obsID,
~ 1 | paperID),
method = "REML",
test = "t",
data = dataset.smd,
control = list(optimizer="optim", optmethod="Nelder-Mead"))
# extracting the mean and 95% confidence intervals
estimates.publication.bias.model.smd.v.0 <- estimates.CI2(mm.publication.bias.model.smd.v.0)
estimates.publication.bias.model.smd.v.1 <- estimates.CI2(mm.publication.bias.model.smd.v.1)
estimates.publication.bias.model.smd.v.1b <- estimates.CI(publication.bias.model.smd.v.1b)
The full model with \(\sqrt(1/\tilde{n})\) provided an adjusted overall lnRR that was slightly larger in magnitude from the unadjusted estimate (adjusted effect = -0.19, 95% CI = [0.02,321]; unadjusted effect = -0.16, 95% CI = [-0.24,-0.09]; th model in section S4.2.2). This is unexpected as the adjusted estimate should be closer to zero than the unadjusted one). On the other hand, the model with \(1/\tilde{n}\) provided a smaller overall value as expected (adjusted effect = -0.09, 95% CI = [0.04,312]). . Given those, we conclude that it is probably more appropriate to take the adjusted estimates from the model with \(1/\tilde{n}\) than the model with \(\sqrt(1/\tilde{n})\). In Table S4.3, show that the estimates from the unadjusted model and the adjusted model with \(\sqrt(1/\tilde{n})\) while the adjusted model with \(\sqrt(1/\tilde{n})\) gave smaller estimates in magnitude, which should be expected for adjusted effects. We suggest that we use the estimates from the method with \(1/\tilde{n}\) (Adjusted mean 2 and Adjusted 95% CI 2).
# creating a table to show the results of model 2
table.comparing.Exp.or.Obs.levels0 <- merge(estimates.publication.bias.model.rr.v.0,
estimates.publication.bias.model.rr.v.1,
by="estimate",
all.x=T)
table.comparing.Exp.or.Obs.levels <- merge(table.comparing.Exp.or.Obs.levels0,
estimates.publication.bias.model.rr.v.1b,
by="estimate",
all.x=T)
# rounding estimates
table.comparing.Exp.or.Obs.levels <- table.comparing.Exp.or.Obs.levels %>% mutate_at(vars(mean.x, lower.x,upper.x,mean.y,lower.y,upper.y, mean,lower,upper), list(~round(., 2)))
table.comparing.Exp.or.Obs.levels <- data.frame(Exp.or.Obs.level = table.comparing.Exp.or.Obs.levels[,1],
adjusted.mean1=table.comparing.Exp.or.Obs.levels[,2],
adjusted.CI1 = paste0("[",table.comparing.Exp.or.Obs.levels[,3],",",
table.comparing.Exp.or.Obs.levels[,4],"]"),
adjusted.mean2=table.comparing.Exp.or.Obs.levels[,5],
adjusted.CI2 = paste0("[",table.comparing.Exp.or.Obs.levels[,6],",",
table.comparing.Exp.or.Obs.levels[,7],"]"),
unadjusted.mean = table.comparing.Exp.or.Obs.levels[,8],
unadjusted.CI = paste0("[",table.comparing.Exp.or.Obs.levels[,9],",",
table.comparing.Exp.or.Obs.levels[,10],"]")
)
table.comparing.Exp.or.Obs.levels[,1] <- c("Experimental","Observational")
# creating a formatted table using the R package 'gt'
table.comparing.Exp.or.Obs.levels.gt <- table.comparing.Exp.or.Obs.levels %>%
gt() %>%
cols_label(Exp.or.Obs.level=md("**Study type**"),
adjusted.mean1=md("**Adjusted mean 1**"),
adjusted.CI1=md("**Adjusted 95% CI 1**"),
adjusted.mean2=md("**Adjusted mean 2**"),
adjusted.CI2=md("**Adjusted 95% CI 2**"),
unadjusted.mean=md("**Unadjusted mean**"),
unadjusted.CI=md("**Unadjusted 95% CI**"),
) %>%
cols_align(align = "center")
table.comparing.Exp.or.Obs.levels.gt
Study type | Adjusted mean 1 | Adjusted 95% CI 1 | Adjusted mean 2 | Adjusted 95% CI 2 | Unadjusted mean | Unadjusted 95% CI |
---|---|---|---|---|---|---|
Experimental | -0.07 | [-0.14,0.01] | -0.06 | [-0.18,0.07] | -0.08 | [-0.15,0] |
Observational | -0.25 | [-0.31,-0.19] | -0.10 | [-0.2,-0.01] | -0.25 | [-0.31,-0.18] |
Table S4.3. The estimates of all levels of the
moderator Experimental.or.Observational.
and their adjusted
estimates (1 and 2) using the all-in publication bias test
(multi-moderator meta-regression). ‘Adjusted 1’ is with \(\sqrt(1/\tilde{n})\) while ‘Adjusted 2’
with \(1/\tilde{n}\). Estimates are
presented as lnRR.
SMD also show similar patterns for the adjusted overall effects (Adjusted 1 with \(\sqrt(1/\tilde{n})\) and Adjusted 2 with \(1/\tilde{n}\) ) after accounting for publication bias and time-lag bias: (Adjusted 2 = -0.52, 95% CI = [0.08,317]; Adjusted 1 with \(\sqrt(1/\tilde{n})\) and Adjusted 2 with \(1/\tilde{n}\) ) after accounting for publication bias and time-lag bias: (Adjusted 2 = -0.27, 95% CI = [0.13,317]; unadjusted effect = -0.49, 95% CI = [-0.64,-0.35]; more in section S4.2.2). As with lnRR, Table S4.4 show that the estimates from the unadjusted model and the adjusted model with \(\sqrt(1/\tilde{n})\) while the adjusted model with \(\sqrt(1/\tilde{n})\) gave smaller estimates in magnitude. Therefore, for SMD, we also suggest that we use the estimates from the method with \(1/\tilde{n}\) (Adjusted mean 2 and Adjusted 95% CI 2). Also, we believe that the use of \(1/\tilde{n}\) may be always warranted because it is likely that a non-zero effect almost always exists, while when there is no overall effect, it probably does not matter whether we use \(\sqrt(1/\tilde{n})\) or \(1/\tilde{n}\) to adjust such an overall effect (whose true effect is zero).
# creating a table to show the results of model 2
table.comparing.Exp.or.Obs.levels0 <- merge(estimates.publication.bias.model.smd.v.0,
estimates.publication.bias.model.smd.v.1,
by="estimate",
all.x=T)
table.comparing.Exp.or.Obs.levels <- merge(table.comparing.Exp.or.Obs.levels0,
estimates.publication.bias.model.smd.v.1b,
by="estimate",
all.x=T)
# rounding estimates
table.comparing.Exp.or.Obs.levels <- table.comparing.Exp.or.Obs.levels %>% mutate_at(vars(mean.x, lower.x,upper.x,mean.y,lower.y,upper.y, mean,lower,upper), list(~round(., 2)))
table.comparing.Exp.or.Obs.levels <- data.frame(Exp.or.Obs.level = table.comparing.Exp.or.Obs.levels[,1],
adjusted.mean1=table.comparing.Exp.or.Obs.levels[,2],
adjusted.CI1 = paste0("[",table.comparing.Exp.or.Obs.levels[,3],",",
table.comparing.Exp.or.Obs.levels[,4],"]"),
adjusted.mean2=table.comparing.Exp.or.Obs.levels[,5],
adjusted.CI2 = paste0("[",table.comparing.Exp.or.Obs.levels[,6],",",
table.comparing.Exp.or.Obs.levels[,7],"]"),
unadjusted.mean = table.comparing.Exp.or.Obs.levels[,8],
unadjusted.CI = paste0("[",table.comparing.Exp.or.Obs.levels[,9],",",
table.comparing.Exp.or.Obs.levels[,10],"]")
)
table.comparing.Exp.or.Obs.levels[,1] <- c("Experimental","Observational")
# creating a formatted table using the R package 'gt'
table.comparing.Exp.or.Obs.levels.gt <- table.comparing.Exp.or.Obs.levels %>%
gt() %>%
cols_label(Exp.or.Obs.level=md("**Study type**"),
adjusted.mean1=md("**Adjusted mean 1**"),
adjusted.CI1=md("**Adjusted 95% CI 1**"),
adjusted.mean2=md("**Adjusted mean 2**"),
adjusted.CI2=md("**Adjusted 95% CI 2**"),
unadjusted.mean=md("**Unadjusted mean**"),
unadjusted.CI=md("**Unadjusted 95% CI**"),
) %>%
cols_align(align = "center")
table.comparing.Exp.or.Obs.levels.gt
Study type | Adjusted mean 1 | Adjusted 95% CI 1 | Adjusted mean 2 | Adjusted 95% CI 2 | Unadjusted mean | Unadjusted 95% CI |
---|---|---|---|---|---|---|
Experimental | -0.33 | [-0.57,-0.1] | -0.10 | [-0.45,0.25] | -0.37 | [-0.6,-0.13] |
Observational | -0.62 | [-0.82,-0.43] | -0.37 | [-0.64,-0.09] | -0.57 | [-0.76,-0.39] |
Table S4.4. The estimates of all levels of the
moderator Experimental.or.Observational.
and their adjusted
estimates (1 and 2) using the all-in publication bias test
(multi-moderator meta-regression). ‘Adjusted 1’ is with \(\sqrt(1/\tilde{n})\) while ‘Adjusted 2’
with \(1/\tilde{n}\). Estimates are
presented as SMD.
Here we redraw Figure 6a-d using effective sample size or more accurately, \(\tilde{n}\). We note that Figure 6b-d in the main text use standard errors (SE) of different types of residuals so panels b, c and d may not be directly comparable between Figure 6 and this redrawn figure.
#include_graphics("./R/fig.png")
Figure 6a-d redrawn
To aggregate (average) effect sizes per study we will use the
function ‘aggregate()’ from the R package metafor
(Viechtbauer 2010) using a compound symmetric structure (‘struct=CS’)
and a value of 0.5 for rho following Noble et al. (2017). That is, when
averaging effect sizes per study we are not assuming that the effect
sizes of a study are independent to each other, but instead that they
are correlated and, since we do not know the real value of that
correlation, we will estimate it as 0.5 for all studies. The function
‘aggregate()’ will then combine multiple estimates within the same study
using inverse-variance weighting taking the 0.5 correlation into
consideration. More information about the function ‘aggregate()’ can be
found by typing ?metafor::aggregate
, including how
‘aggregate()’ deals with other variables (e.g. moderators) in the
dataset.
# you will need the development version of "metafor" package to run this bit
# rho = 0.5 is as in Noble et al (Mol Ecol)
dataset.r <- escalc(yi=yi, vi=vi, data=dataset.r)
dataset.r.agg <- aggregate(dataset.r, cluster=study, struct="CS", rho = 0.5)
Using the aggregated data (see above), we will then fit the
trim-and-fill method (Duval and Tweedie 2000a,b). The trim-and-fill
method is a non-parametric method that can be used to both detect and
correct for publication bias and, although it can handle some
heterogeneity, it performs worse as heterogeneity increases (Peters et
al., 2007; Moreno et al., 2009; more in the main text). Since we
aggregated our data, we can now run a random-effects meta-analytic model
rather than a multilevel model as used in section S.4, and we will run
the trim-and-fill method using this model. Note that we used the method
by Knapp and Hartung (2003; test = "knha"
) rather than the
default z (Wald-type) tests; this test has been shown to perform
better.
# random/mixed-effects meta-analytic model
meta.analysis.model.r.agg <- rma(yi, vi, test="knha",data=dataset.r,slab=study)
# trim-and-fill method
trimfill.r <- trimfill(meta.analysis.model.r.agg)
The trim-and-fill method identified 15 (SE = 6.65) missing studies (Figure S5.1) and estimated an adjusted overall effect of 0.2 (95% CI = [0.13,0.27]), which is similar to the adjusted overall effect estimated by our suggested multilevel meta-regression method (0, 95% CI = [0,0]; more in section S4.1.4.3).
funnel(trimfill.r, yaxis="seinv")
Using the aggregated data (see above), we will also fit a selection
model-based method (reviewed by Marks-Anglin & Chen, 2020). Although
there are many selection model methods, all of them model how effect
sizes are missing based on p values, effect sizes and/or sampling
variance, and can provide an adjusted overall effect (e.g. Rodgers &
Pustejovsky, 2020; more in the main text). Contrary to other methods,
selection models can deal with and model heterogeneity (e.g. Citkowicz
and Vevea 2017), but cannot deal with non-independent effect sizes. We
will run a step function selection model based on one cut-point (\(\alpha = 0.05\)); this model is sometimes
referred to as a 3 parameter selection model (3PM; more in the main
text; Rodgers & Pustejovsky, 2020; more by typing
?metafor::selmodel
). Note that here we need to use the
maximum likehoold method (ML) rather than restricted maximum likelihood
(REML).
# without moderators
meta.analysis.r.agg0 <- rma(yi, vi,
method="ML", #method needs to set to "ML" rather than "REML" as in all models above
test="knha", #knha is meant to be better than Z and t, but it is not implemented for multilevel models
data=dataset.r)
three.PSM.r0 <- selmodel(meta.analysis.r.agg0, type="stepfun", steps=c(0.05,1)) #note that steps=c(0.05) would be give the same results
# look at confidence intervals - not just point estimate
# summary(three.PSM.r0)
# with moderators
meta.regression.r.agg <- rma(yi, vi,
mod = ~ year.c +
CaptivityC - 1,
method="ML",
test="knha",
data=dataset.r)
three.PSM.r <- selmodel(meta.regression.r.agg, type="stepfun", steps=c(0.05))
# summary(three.PSM.r)
# extracting the mean and 95% confidence intervals
estimates.three.PSM.r <- estimates.CI(three.PSM.r)
The step function selection model based on one cut-point (\(\alpha = 0.05\)) for the intercept-only model estimated an adjusted overall effect of 0.31 (95% CI = [0.21,0.41]; Figure S5.2), which is larger than the original overall effect estimated by the meta-analytic model (0.29, 95% CI = [0.17,0.41]; more in section S4.1.4.3). Overall, the selection model estimated adjusted overall effects that were either very similar or even slightly larger than the original unadjusted effect sizes (Table S5.1), which might indicate that the specific selection model that we used (i.e. one cut-point \(\alpha = 0.05\)) was not an adequate approximation for the underlying selection process.
par(mfrow = c(1, 2))
plot(three.PSM.r0, ylim = c(0,2),col="red", lty = 3)
plot(three.PSM.r, ylim = c(0,2),col="red", lty = 3)
# creating a table to show the results of the selection model
table.comparing.captivity.levels.3PSM <- merge(estimates.three.PSM.r[c(2:6),],
estimates.publication.bias.model.r.v.1b,
by="estimate",
all.x=T)
# rounding estimates
table.comparing.captivity.levels.3PSM <- table.comparing.captivity.levels.3PSM %>% mutate_at(vars(mean.x, lower.x,upper.x,mean.y,lower.y,upper.y), list(~round(., 2)))
table.comparing.captivity.levels.3PSM <- data.frame(captivity.level = table.comparing.captivity.levels.3PSM[,1],
adjusted.mean=table.comparing.captivity.levels.3PSM[,2], adjusted.CI=paste0("[",table.comparing.captivity.levels.3PSM[,3],",",table.comparing.captivity.levels.3PSM[,4],"]"),
unadjusted.mean=table.comparing.captivity.levels.3PSM[,5], unadjusted.CI=paste0("[",table.comparing.captivity.levels.3PSM[,6],",",table.comparing.captivity.levels.3PSM[,7],"]"))
table.comparing.captivity.levels.3PSM[,1] <- c("FC","FW","WCT","WCT & FC", "WCT & FW")
# creating a formatted table using the R package 'gt'
table.model.r.captivity.gt.3PSM <- table.comparing.captivity.levels.3PSM %>%
gt::gt() %>%
cols_label(captivity.level=md("**Captivity status**"),
adjusted.mean=md("**Adjusted mean**"),
adjusted.CI=md("**Adjusted 95% CI**"),
unadjusted.mean=md("**Unadjusted mean**"),
unadjusted.CI=md("**Unadjusted 95% CI**")) %>%
cols_align(align = "center")
table.model.r.captivity.gt.3PSM
Captivity status | Adjusted mean | Adjusted 95% CI | Unadjusted mean | Unadjusted 95% CI |
---|---|---|---|---|
FC | 0.29 | [0.17,0.41] | 0.28 | [0.15,0.41] |
FW | 0.26 | [0.06,0.47] | 0.27 | [0.05,0.49] |
WCT | 0.36 | [0.24,0.48] | 0.30 | [0.16,0.45] |
WCT & FC | 0.41 | [0.1,0.72] | 0.43 | [0.09,0.78] |
WCT & FW | 0.12 | [-0.14,0.38] | 0.16 | [-0.12,0.43] |
Table S5.1. Results adjusted and unadjusted for publication bias for all levels of the moderator ‘captivity status’ using a step function selection model based on one cut-point (\(\alpha = 0.05\)). Estimates are presented as standardized effect sizes using Fisher’s transformation of the correlation coefficient (i.e. Zr).
Last, using the aggregated data (see above), we will also perform a
cumulative meta-analysis using the function cumul()
from
the R package metafor
(Viechtbauer 2010). By displaying the
results of this cumulative meta-analysis as a forest plot, we can see if
there is evidence of decline effects (more in the text), which we do not
seem to have in this dataset (Figure S5.3).
cum.meta.analysis.model.r.agg <- cumul(meta.analysis.model.r.agg,order=order(dataset.r$year))
forest(cum.meta.analysis.model.r.agg, xlab = "Overall estimate (Zr)",digits=c(2,1),cex=0.3, header="Author(s) and year")
To sample one effect size per study we will write a custom function that, for each simulation: (i) randomly selects one effect size per study to generate a meta-analytic dataset; (ii) fits the publication bias test of interest; and (iii) extracts estimates from the publication bias test output. This process will be repeated 1000 times (i.e. 1000 samplings).
For an introduction to the trim-and-fill method see section S5.1.1.1 and the main text. Below are the summary results of the trim-and-fill method after running 1000 randomizations.
# function for randomly selecting 1 effect size from each study
func_S5.1.2.1 <- function(sim = 1){
# splitting dataframe into each study
study_list <- split(dataset.r, dataset.r$study)
# randomly extracting one effect size per study
SingleStudyES_dataset.r <- dplyr::bind_rows(lapply(study_list, function(x)
x[sample(1:nrow(x), 1),c("study","yi","vi")]))
# running the model on the dataframe now each study only has a single effect size
model.tmp.r <- rma(yi, vi, test="knha",data=SingleStudyES_dataset.r)
# applying trim-and-fill method
TAF <- trimfill(model.tmp.r)
# creating a dataframe with results from the trim-and-fill methods, with:
#n = number of missing studies
#beta = adjusted overall effect
df <- data.frame(sim,
n=TAF$k0,
beta=TAF$beta[[1]])
return(df)
}
# applying the function 1000 times
#trimfill.sampling.r <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.1.2.1(x)))
# saving data to save time
# save(trimfill.sampling.r,file = here("data","trimfill_sampling_r_1000s.RData"))
# loading the saved data
load(here("data","trimfill_sampling_r_1000s.RData"))
The median number of missing studies was 10 (range = 0 - 16; Figure S5.4A), which is seemingly lower than the number of missing studies estimated using the aggregated data (n = 15; more in section S5.1.1.1). Additionally, the median adjusted overall effect according to the trim-and-fill method was 0.21 (range = 0.17 - 0.28; Figure S5.4B), which is very similar to the adjusted overall effect estimated using the aggregated data (0.2; more in section S5.1.1.1), but still lower than the adjusted overall effect estimated by our suggested multilevel meta-regression method (0, 95% CI = [0,0]; more in section S4.1.4.3).
# plotting the distribution of the number of missing studies according to the trim-and-fill method
n.r <- ggplot(data=trimfill.sampling.r,aes(x=n)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=trimfill.sampling.r, aes(xintercept=median(n)),
linetype="dashed", size=1)
# plotting the distribution of the adjusted overall effect according to the trim-and-fill method
betas.r <- ggplot(data=trimfill.sampling.r,aes(x=beta)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=trimfill.sampling.r, aes(xintercept=median(beta)),
linetype="dashed", size=1)
ggarrange(n.r, betas.r,
labels = c("A", "B"),
ncol = 2, nrow = 1)
For an introduction to selection models see section S5.1.1.2 and the main text. As before, we will run a step function selection model based on one cut-point (\(\alpha = 0.05\)); this model is sometimes referred to as a 3 parameter selection model. For simplicity, we will only run the selection model for the intercept-only model (but see section S5.1.1.2 for how to run a selection model for a meta-regression). Below are the summary results of the 3 parameter selection model after running 1000 randomizations.
func_S5.1.2.2 <- function(sim = 1){
# splitting dataframe into each study
study_list <- split(dataset.r, dataset.r$study)
# randomly extracting one effect size per study
SingleStudyES_dataset.r <- dplyr::bind_rows(lapply(study_list, function(x)
x[sample(1:nrow(x), 1),c("study","yi","vi")]))
# running the model on the dataframe now each study only has a single effect size
model.tmp.r.agg0 <- rma(yi, vi, method="ML", test="knha",data=SingleStudyES_dataset.r)
#extracting the adjusted overall effect according to the selection model for each database
threePSM.vector.r <- selmodel(model.tmp.r.agg0, type="stepfun", steps=c(0.05))$beta[[1]]
#creating a dataframe with the n.randomization.r of estimated number of missing studies and adjusted overall effects
df <- data.frame(sim, beta=threePSM.vector.r)
return(df)
}
# applying the function 1000 times
#threePSM.sampling.r <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.1.2.2(x)))
# save(threePSM.sampling.r,file = here("data","3PSM_sampling_r_1000s.RData"))
# loading the data extracted from the 1000 iterations above
load(here("data","3PSM_sampling_r_1000s.RData"))
The median adjusted overall effect according to the 3 parameter selection model was 0.27 (range = 0.12 - 0.34; Figure S5.4B), which is similar to the adjusted overall effect estimated using the aggregated data (0.31; 95% CI = [0.21,0.41]; more in section S5.1.1.2) and to the adjusted overall effect estimated by our suggested multilevel meta-regession method (0, 95% CI = [0,0]; more in section S4.1.4.3).
# plotting the distribution of adjusted overall effect sizes according to the 3 parameter selection model
ggplot(data=threePSM.sampling.r,aes(x=beta)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=threePSM.sampling.r, aes(xintercept=median(beta)),
linetype="dashed", size=1)
For an introduction to cumulative meta-analysis see section S5.1.1.3 and the main text. Here, we will perform a sampling procedure where we run a cumulative meta-analysis test for each dataset and extract the mean effects estimated by the cumulative meta-analysis. We will then plot the median and 95% quantiles of those mean effects at each step. As before, the forest plot does not show any clear evidence of decline effects in this dataset (Figure S5.6).
func_S5.1.2.3 <- function(sim = 1){
# arranging by study year
dat_ordered <- dataset.r[order(dataset.r$year),c("study","yi","vi","year")]
# splitting dataframe into each study
study_list <- split(dat_ordered, dat_ordered$study)
# randomly extracting one effect size per study
SingleStudyES_dataset.r <- dplyr::bind_rows(lapply(study_list, function(x)
x[sample(1:nrow(x), 1),]))
# running MA
model.tmp.r <- rma(yi, vi, test="knha",data=SingleStudyES_dataset.r)
# cumulative meta-analysis
CMA <- cumul(model.tmp.r,order=order(SingleStudyES_dataset.r$year))
# dataframe with meta-analytic mean
df <- data.frame(sim, beta = CMA$estimate)
return(df)
}
# applying the function 1000 times
#threePSM.sampling.r <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.1.2.3(x)))
# save(threePSM.sampling.r,file = here("data","3PSM_sampling_r_1000s.RData"))
# loading the saved data
load(here("data","cumul_sampling_r_1000s.RData"))
cumul.sampling.r.summarized <- as.data.frame(cumul.sampling.r
%>% group_by(order) %>%
summarise(median = median(beta),
lowCI = quantile(beta,probs = c(0.025)),
hiCI = quantile(beta,probs = c(0.975))))
# reverses the factor level ordering for labels after coord_flip()
cumul.sampling.r.summarized$order <- factor(cumul.sampling.r.summarized$order, levels=rev(cumul.sampling.r.summarized$order))
ggplot(data=cumul.sampling.r.summarized, aes(x=order, y=median, ymin=lowCI, ymax=hiCI)) +
geom_pointrange() +
geom_hline(yintercept=0, lty=2) + # add a dotted line at x=1 after flip
coord_flip() + # flip coordinates (puts labels on y axis)
xlab("Order of publication") + ylab("Mean (95% CI)") +
theme_bw()
# rho = 0.5 is as in Noble et al (Mol Ecol)
dataset.rr <- escalc(yi=yi, vi=vi, data=dataset.rr)
dataset.rr.agg <- aggregate(dataset.rr, cluster=paperID, struct="CS", rho = 0.5)
dataset.smd <- escalc(yi=yi, vi=vi, data=dataset.smd)
dataset.smd.agg <- aggregate(dataset.smd, cluster=paperID, struct="CS", rho = 0.5)
Using the aggregated data (see Equation 30 in main text), we will then fit the trim-and-fill method (Duval and Tweedie 2000a,b). The trim-and-fill method is a non-parametric method that can be used to both detect and correct for publication bias and, although it can handle some heterogeneity, it performs worse as heterogeneity increases (Peters et al., 2007; Moreno et al., 2009; more in the main text). We will apply the trim-and-fill method to meta-analytic models with no random effects (since we aggregated our data we no longer need to fit a multilevel model to account for non-independent effect sizes).
# lnRR
meta.analysis.model.rr.agg <- rma(yi, vi, test="knha", # using t dist rather than z
data=dataset.rr.agg)
trimfill.rr <- trimfill(meta.analysis.model.rr.agg)
#SMD
meta.analysis.model.smd.agg <- rma(yi, vi, test="knha", # using t dist rather than z
data=dataset.smd.agg)
trimfill.smd <- trimfill(meta.analysis.model.smd.agg)
The trim-and-fill method identified 0 (SE = 8.88) missing studies (Figure S5.7) and estimated an adjusted overall effect of -0.15 (95% CI = [-0.2,-0.1]).
par(mfrow = c(1,2))
funnel(trimfill.rr, yaxis="seinv",
ylab = "Precision (1/SE)",
xlab = "Effect size (lnRR)")
funnel(trimfill.smd, yaxis="seinv",
ylab = "Precision (1/SE)",
xlab = "Effect size (SMD)")
Using the aggregated data (see above), we will also fit a selection model-based method (reviewed by Marks-Anglin & Chen, 2020). Although there are many selection models, all of them model how effect sizes are missing based on p values, effect sizes and/or sampling variance, and can provide an adjusted overall effect (e.g. Rodgers & Pustejovsky, 2020; more in the main text). Contrary to other methods, selection models can deal with and model heterogeneity (e.g. Citkowicz and Vevea 2017), but cannot deal with non-independent effect sizes. We will run a step function selection model based on one cut-point (\(\alpha = 0.05\)); this model is sometimes referred to as a 3 parameter selection model (3PM; more in the main text; Rodgers & Pustejovsky, 2020).
# lnRR
# without moderators
# moderator
meta.regression.rr.agg0 <- rma(yi, vi,
method="ML",
test="knha",
data=dataset.rr)
three.PSM.rr0 <- selmodel(meta.regression.rr.agg0, type="stepfun", steps=c(0.05, 1))
#summary(three.PSM.rr0)
# with moderator
meta.regression.rr.agg <- rma(yi, vi,
mod = ~ year.c
+ Experimental.or.Observational. - 1,
method="ML",
test="knha",
data=dataset.rr)
three.PSM.rr <- selmodel(meta.regression.rr.agg, type="stepfun", steps=c(0.05, 1))
## SMD
# without moderators
meta.regression.smd.agg0 <- rma(yi, vi,
method="ML",
test="knha",
data=dataset.smd)
three.PSM.smd0 <- selmodel(meta.regression.smd.agg0, type="stepfun", steps=c(0.05, 1))
# with moderators
meta.regression.smd.agg <- rma(yi, vi,
mod = ~ year.c
+ Experimental.or.Observational. - 1,
method="ML",
test="knha",
data=dataset.smd)
three.PSM.smd <- selmodel(meta.regression.smd.agg, type="stepfun", steps=c(0.05, 1))
# extracting the mean and 95% confidence intervals
estimates.three.PSM.rr <- estimates.CI(three.PSM.rr)
estimates.three.PSM.smd <- estimates.CI(three.PSM.smd)
The step function selection model based on one cut-point (\(\alpha = 0.05\)) for the intercept-only model estimated an adjusted overall lnRR of -0.1 (95% CI = [-0.18,-0.01]; Figure S5.8), which is smaller than the original overall effect estimated by the meta-analytic model (0.29, 95% CI = [0.17,0.41]; more in section S4.2.3.3). SMD showed the same pattern. Overall, the selection model estimated adjusted overall effects that were either very similar or even slightly larger than the original unadjusted effect sizes (Table S5.2), which might indicate that the specific selection model that we used (i.e. one cut-point of \(\alpha = 0.05\)) was not an adequate approximation for the underlying selection process.
Results of a step function selection model based on one cut-point (\(\alpha = 0.05\)) for a model without any moderators (left-hand figure, top panel for lnRR, bottom for SMD) and a model including both ‘year of publication’ and ‘Experimental.or.Observational.’ as moderators (right-hand figure, top panel for lnRR, bottom for SMD).
par(mfrow = c(1, 2))
plot(three.PSM.rr0, col="red", lty = 3)
plot(three.PSM.rr, col="red", lty = 3)
par(mfrow = c(1, 2))
plot(three.PSM.smd0, col="red", lty = 3)
plot(three.PSM.smd, col="red", lty = 3)
# creating a table to show the results of the selection model
table.comparing.Exp.or.Obs.levels.3PSM <- merge(estimates.three.PSM.rr[c(2:3),],
estimates.publication.bias.model.rr.v.1b,
by="estimate",
all.x=T)
# rounding estimates
table.comparing.Exp.or.Obs.levels.3PSM <- table.comparing.Exp.or.Obs.levels.3PSM %>% mutate_at(vars(mean.x, lower.x,upper.x,mean.y,lower.y,upper.y), list(~round(., 2)))
table.comparing.Exp.or.Obs.levels.3PSM <- data.frame(Exp.or.Obs.level = table.comparing.Exp.or.Obs.levels.3PSM[,1],
adjusted.mean=table.comparing.Exp.or.Obs.levels.3PSM[,2], adjusted.CI=paste0("[",table.comparing.Exp.or.Obs.levels.3PSM[,3],",",table.comparing.Exp.or.Obs.levels.3PSM[,4],"]"),
unadjusted.mean=table.comparing.Exp.or.Obs.levels.3PSM[,5], unadjusted.CI=paste0("[",table.comparing.Exp.or.Obs.levels.3PSM[,6],",",table.comparing.Exp.or.Obs.levels.3PSM[,7],"]"))
table.comparing.Exp.or.Obs.levels.3PSM[,1] <- c("Experimental","Observational")
# creating a formatted table using the R package 'gt'
table.model.rr.Exp.or.Obs.gt.3PSM <- table.comparing.Exp.or.Obs.levels.3PSM %>%
gt() %>%
cols_label(Exp.or.Obs.level=md("**Experimental.or.Observational.**"),
adjusted.mean=md("**Adjusted mean**"),
adjusted.CI=md("**Adjusted 95% CI**"),
unadjusted.mean=md("**Unadjusted mean**"),
unadjusted.CI=md("**Unadjusted 95% CI**")) %>%
cols_align(align = "center")
table.model.rr.Exp.or.Obs.gt.3PSM
Experimental.or.Observational. | Adjusted mean | Adjusted 95% CI | Unadjusted mean | Unadjusted 95% CI |
---|---|---|---|---|
Experimental | 0.06 | [-0.06,0.19] | -0.08 | [-0.15,0] |
Observational | -0.19 | [-0.27,-0.11] | -0.25 | [-0.31,-0.18] |
Table S5.2. Adjusted and unadjusted for publication bias results for all levels of the moderator ‘Experimental.or.Observational.’ using a step function selection model based on one cut-point (\(\alpha = 0.05\)). Estimates are presented as standardized effect sizes using lnRR.
# creating a table to show the results of the selection model
table.comparing.Exp.or.Obs.levels.3PSM <- merge(estimates.three.PSM.smd[c(2:3),],
estimates.publication.bias.model.smd.v.1b,
by="estimate",
all.x=T)
# rounding estimates
table.comparing.Exp.or.Obs.levels.3PSM <- table.comparing.Exp.or.Obs.levels.3PSM %>% mutate_at(vars(mean.x, lower.x,upper.x,mean.y,lower.y,upper.y), list(~round(., 2)))
table.comparing.Exp.or.Obs.levels.3PSM <- data.frame(Exp.or.Obs.level = table.comparing.Exp.or.Obs.levels.3PSM[,1],
adjusted.mean=table.comparing.Exp.or.Obs.levels.3PSM[,2], adjusted.CI=paste0("[",table.comparing.Exp.or.Obs.levels.3PSM[,3],",",table.comparing.Exp.or.Obs.levels.3PSM[,4],"]"),
unadjusted.mean=table.comparing.Exp.or.Obs.levels.3PSM[,5], unadjusted.CI=paste0("[",table.comparing.Exp.or.Obs.levels.3PSM[,6],",",table.comparing.Exp.or.Obs.levels.3PSM[,7],"]"))
table.comparing.Exp.or.Obs.levels.3PSM[,1] <- c("Experimental","Observational")
# creating a formatted table using the R package 'gt'
table.model.smd.Exp.or.Obs.gt.3PSM <- table.comparing.Exp.or.Obs.levels.3PSM %>%
gt() %>%
cols_label(Exp.or.Obs.level=md("**Experimental.or.Observational.**"),
adjusted.mean=md("**Adjusted mean**"),
adjusted.CI=md("**Adjusted 95% CI**"),
unadjusted.mean=md("**Unadjusted mean**"),
unadjusted.CI=md("**Unadjusted 95% CI**")) %>%
cols_align(align = "center")
table.model.smd.Exp.or.Obs.gt.3PSM
Experimental.or.Observational. | Adjusted mean | Adjusted 95% CI | Unadjusted mean | Unadjusted 95% CI |
---|---|---|---|---|
Experimental | -0.14 | [-0.46,0.17] | -0.37 | [-0.6,-0.13] |
Observational | -0.49 | [-0.71,-0.26] | -0.57 | [-0.76,-0.39] |
Table S5.3. Adjusted and unadjusted for publication bias results for all levels of the moderator ‘Experimental.or.Observational.’ using a step function selection model based on one cut-point (\(\alpha = 0.05\)). Estimates are presented as standardized effect sizes using SMD.
Last, using the aggregated data (see above), we will also perform a
cumulative meta-analysis using the function cumul()
from
the R package metafor
(Viechtbauer 2010). By displaying the
results of this cumulative meta-analysis as a forest plot, we can see if
there is evidence of decline effects (more in the text), which we do not
seem to have for lnRR (Figure S5.9, left-hand) and SMD (Figure S5.10,
left-hand).
# lnRR
cum.meta.analysis.model.rr.agg <- cumul(meta.analysis.model.rr.agg)
# SMD
cum.meta.analysis.model.smd.agg <- cumul(meta.analysis.model.smd.agg)
par(mfrow = c(1, 2))
forest(cum.meta.analysis.model.rr.agg, xlab = "Overall estimate (lnRR)")
forest(cum.meta.analysis.model.smd.agg, xlab = "Overall estimate (SMD)")
As in S5.1.2, we will run the trim-and-fill test on 1000 datasets, each with only 1 effect size per study (sampled randomly).
For an introduction to the trim-and-fill method see section S5.2.1.1 and the main text. Below are the summary results of the trim-and-fill method after running 1000 randomizations.
# function for randomly selecting 1 effect size from each study
func_S5.2.2.1 <- function(sim = 1, df){
# splitting dataframe into each study
study_list <- split(df, df$paperID)
# randomly extracting one effect size per study
SingleStudyES_df <- dplyr::bind_rows(lapply(study_list, function(x)
x[sample(1:nrow(x), 1),c("paperID","yi","vi")]))
# running the model on the dataframe now each study only has a single effect size
model.tmp <- rma(yi, vi, test="knha",data=SingleStudyES_df)
# applying trim-and-fill method
TAF <- trimfill(model.tmp)
# creating a dataframe with results from the trim-and-fill methods, with:
#n = number of missing studies
#beta = adjusted overall effect
df <- data.frame(sim,
n=TAF$k0,
beta=TAF$beta[[1]])
return(df)
}
# applying the function 1000 times for lnRR
#trimfill.sampling.rr <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.2.2.1(sim = x, df = dataset.rr)))
# saving data to save time
# save(trimfill.sampling.rr,file = here("data","trimfill_sampling_rr_1000s.RData"))
load(here("data","trimfill_sampling_rr_1000s.RData"))
# applying the function 1000 times for SMR
#trimfill.sampling.smd <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.2.2.1(sim = x, df = dataset.smd)))
# saving data to save time
# save(trimfill.sampling.smd,file = here("data","trimfill_sampling_smd_1000s.RData"))
load(here("data","trimfill_sampling_smd_1000s.RData"))
The median number of missing studies for lnRR was 0 (range = 0 - 17; Figure S5.9A). Additionally, the median adjusted overall lnRR according to the trim-and-fill method was -0.17 (range = -0.2 - -0.11; Figure S5.9B), which is larger than the adjusted overall lnRR estimated by our suggested multilevel meta-regession method (-0.06, 95% CI = [-0.18,0.07]; more in section S4.2.4.3).
# plotting the distribution of the number of missing studies according to the trim-and-fill method
n.rr <- ggplot(data=trimfill.sampling.rr,aes(x=n)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=trimfill.sampling.rr, aes(xintercept=median(n)),
linetype="dashed", size=1)
# plotting the distribution of the adjusted overall effect according to the trim-and-fill method
betas.rr <- ggplot(data=trimfill.sampling.rr,aes(x=beta)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=trimfill.sampling.rr, aes(xintercept=median(beta)),
linetype="dashed", size=1)
ggarrange(n.rr, betas.rr,
labels = c("A", "B"),
ncol = 2, nrow = 1)
The median number of missing studies for SMD was 0 (range = 0 - 1; Figure S5.10A). Additionally, the median adjusted overall SMD according to the trim-and-fill method was -0.5 (range = -0.57 - -0.43; Figure S5.10B), which is larger than the adjusted overall SMD estimated by our suggested multilevel meta-regression method (-0.1, 95% CI = [-0.45,0.25]; more in section S4.2.4.3).
# plotting the distribution of the number of missing studies according to the trim-and-fill method
n.smd <- ggplot(data=trimfill.sampling.smd,aes(x=n)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=trimfill.sampling.smd, aes(xintercept=median(n)),
linetype="dashed", size=1)
# plotting the distribution of the adjusted overall effect according to the trim-and-fill method
betas.smd <- ggplot(data=trimfill.sampling.smd,aes(x=beta)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=trimfill.sampling.smd, aes(xintercept=median(beta)),
linetype="dashed", size=1)
ggarrange(n.smd, betas.smd,
labels = c("A", "B"),
ncol = 2, nrow = 1)
For an introduction to selection models see section S5.2.1.2 and the main text. As before, we ran a step function selection model based on one cut-point (\(\alpha = 0.05\)); this model is sometimes referred to as a 3 parameter selection model. For simplicity, we only ran the selection model for the intercept-only model (but see section S5.2.1.2 for how to run a selection model for a meta-regression). Below are the summary results of the 3 parameter selection model after running 1000 randomizations.
# function for randomly selecting 1 effect size from each study
func_S5.2.2.2 <- function(sim = 1, df){
# splitting dataframe into each study
study_list <- split(df, df$paperID)
# randomly extracting one effect size per study
SingleStudyES_df <- dplyr::bind_rows(lapply(study_list, function(x)
x[sample(1:nrow(x), 1),c("paperID","yi","vi")]))
# running the model on the dataframe now each study only has a single effect size
model.tmp <- rma(yi, vi, method="ML", test="knha",data=SingleStudyES_df )
# extracting the adjusted overall effect according to the selection model
threePSM.vector <- selmodel(model.tmp, type="stepfun", steps=c(0.05))$beta[[1]]
# creating a dataframe with the n.randomization.r of estimated number
# of missing studies and adjusted overall effects
threePSM.sampling <- data.frame(sim, beta=threePSM.vector)
return(threePSM.sampling)
}
# applying the function 1000 times for lnRR
#threePSM.sampling.rr <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.2.2.2(sim = x, df = dataset.rr)))
# saving data to save time
# save(threePSM.sampling.rr,file = here("data","3PSM_sampling_rr_1000s.RData"))
load(here("data","3PSM_sampling_rr_1000s.RData"))
The median adjusted overall lnRR according to the 3 parameter selection model was -0.07 (range = -0.11 - -0.01; Figure S5.4B), which is smaller than the adjusted overall lnRR estimated by our suggested multilevel meta-regression method (-0.06, 95% CI = [-0.18,0.07]; more in section S4.2.4.3).
# plotting the distribution of adjusted overall effect sizes according to the 3 parameter selection model
ggplot(data=threePSM.sampling.rr,aes(x=beta)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=threePSM.sampling.rr, aes(xintercept=median(beta)),
linetype="dashed", size=1)
# applying the function 1000 times for SMD
#threePSM.sampling.smd <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.2.2.2(sim = x, df = dataset.smd)))
# saving data to save time
# save(threePSM.sampling.smd,file = here("data","3PSM_sampling_smd_1000s.RData"))
load(here("data","3PSM_sampling_smd_1000s.RData"))
The median adjusted overall SMD according to the 3 parameter selection model was -0.29 (range = -0.38 - -0.2; Figure S5.4B), which is smaller than the adjusted overall SMD estimated by our suggested multilevel meta-regession method (-0.1, 95% CI = [-0.45,0.25]; more in section S4.2.4.3).
# plotting the distribution of adjusted overall effect sizes according to the 3 parameter selection model
ggplot(data=threePSM.sampling.smd,aes(x=beta)) +
geom_density(alpha=.5,fill="darkorchid4") +
geom_vline(data=threePSM.sampling.smd, aes(xintercept=median(beta)),
linetype="dashed", size=1)
For an introduction to cumulative meta-analysis see section S5.2.1.3 and the main text. Here, we performed a sampling procedure where we ran a cumulative meta-analysis test for each dataset and extracted the mean effects estimated by the cumulative meta-analysis. We then plotted the median and 95% quantiles of those mean effects at each step. As before, the forest plot does not show any clear evidence of decline effects in this dataset (Figure S5.6).
func_S5.2.2.3 <- function(sim = 1, df = dataset.rr){
# splitting dataframe into each study
study_list <- split(df, df$paperID)
# randomly extracting one effect size per study
SingleStudyES_df <- dplyr::bind_rows(lapply(study_list, function(x)
x[sample(1:nrow(x), 1),c("paperID","year","yi","vi")]))
# running the model on the dataframe now each study only has a single effect size
model.tmp <- rma(yi, vi, method="ML", test="knha",data=SingleStudyES_df)
# order of years
year_order <- order(SingleStudyES_df$year)
#cumulative meta-analysis
cumul.ma <- metafor::cumul(x = model.tmp, order = year_order)
# dataframe for export
df <- data.frame(order = 1:length(year_order),
year = SingleStudyES_df$year[year_order],
sim,
beta = as.vector(cumul.ma$estimate),
lb = as.vector(cumul.ma$ci.lb),
ub = as.vector(cumul.ma$ci.ub))
return(df)
}
# running function 1000 times
# @TO-DO: is 1000 really necessary here? It takes a very long time and the dataframe is enormous (because it's 1000 x all the years)
# cumul.sampling.rr <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.2.2.3(sim = x, df = dataset.rr)))
# save(cumul.sampling.rr,file = here("data","cumul_sampling_rr_1000s.RData"))
# cumul.sampling.smd <- dplyr::bind_rows(lapply(1:1000, function(x) func_S5.2.2.3(sim = x, df = dataset.smd)))
# save(cumul.sampling.smd,file = here("data","cumul_sampling_smd_1000s.RData"))
load(here("data","cumul_sampling_rr_1000s.RData"))
load(here("data","cumul_sampling_smd_1000s.RData"))
cumul.sampling.rr.summarized <- as.data.frame(cumul.sampling.rr
%>% group_by(order) %>%
summarise(median = median(beta),
lowCI = quantile(beta,probs = c(0.025)),
hiCI = quantile(beta,probs = c(0.975))))
# reverses the factor level ordering for labels after coord_flip()
cumul.sampling.rr.summarized$order <- factor(cumul.sampling.rr.summarized$order, levels=rev(cumul.sampling.rr.summarized$order))
ggplot(data=cumul.sampling.rr.summarized, aes(x=order, y=median, ymin=lowCI, ymax=hiCI)) +
geom_pointrange() +
geom_hline(yintercept=0, lty=2) + # add a dotted line at x=1 after flip
coord_flip() + # flip coordinates (puts labels on y axis)
xlab("Order of publication") + ylab("Overall lnRR (95% CI)") +
theme_bw()
cumul.sampling.smd.summarized <- as.data.frame(cumul.sampling.smd
%>% group_by(order) %>%
summarise(median = median(beta),
lowCI = quantile(beta,probs = c(0.025)),
hiCI = quantile(beta,probs = c(0.975))))
# reverses the factor level ordering for labels after coord_flip()
cumul.sampling.smd.summarized$order <- factor(cumul.sampling.smd.summarized$order, levels=rev(cumul.sampling.smd.summarized$order))
ggplot(data=cumul.sampling.smd.summarized, aes(x=order, y=median, ymin=lowCI, ymax=hiCI)) +
geom_pointrange() +
geom_hline(yintercept=0, lty=2) + # add a dotted line at x=1 after flip
coord_flip() + # flip coordinates (puts labels on y axis)
xlab("Order of publication") + ylab("Overall SMD (95% CI)") +
theme_bw()
There is no code for Fig. 1 (created in PowerPoint)
# loading data
PB <- read.csv("./Survey/PubBias_tests.csv")
Item16 <- read.csv("./Survey/Item16PRISMAEcoEvo.csv")
# number of times each test was represented
PB %>% group_by(PB_reported_in_paper) %>% tally() %>% ungroup %>% arrange(n) %>%
mutate(percent = paste0(round(n/sum(n)*100,1), "%")) -> Ntests
# selection methods were not reported in any papers
Ntests <- rbind(Ntests,
data.frame(PB_reported_in_paper = "Selection (method) models (e.g. Copas, Hedges or Iyengar & Greenhouse model)",
n = 0,
percent = "0.0%"))
# making label for graphs
PB_labels <- Ntests$PB_reported_in_paper
PB_labels <- case_when(PB_labels == "Funnel plots (including contour-enhanced funnel plots)" ~ "(A) Funnel plots",
PB_labels == "Regression-based test (e.g. Egger regression and its variants)" ~ "(D) Regression-based methods",
PB_labels == "Time-lag bias tests (e.g., regression or correlation on the relationship between effect size and time or cumulative meta-analysis)" ~ "(I) Time-lag bias tests",
PB_labels == "File drawer numbers or fail safe N (Rosenthal, Orwin or Rosenberg method)" ~ "(E) Fail-safe N",
PB_labels == "Trim-and-fill tests" ~ "(F) Trim-and-fill tests",
PB_labels == "P-curve, P-uniform or its variants" ~ "(G) P-value-based methods",
PB_labels == "Weighted histogram" ~ "(K) Other (weighted histogram)",
PB_labels == "Correlation-based tests (e.g. Begg & Manzumdar rank correlation)" ~ "(C) Correlation-based methods",
PB_labels == "Normal quantile (QQ) plots (Wang & Bushman)" ~ "(B) Normal quantile (QQ) plots",
PB_labels == "None reported" ~ "(J) None reported",
PB_labels == "Selection (method) models (e.g. Copas, Hedges or Iyengar & Greenhouse model)" ~ "(H) Selection models")
# putting in same order as survey question
Ntests$PB_label <- factor(PB_labels, levels = rev(sort(PB_labels)))
# ggplot theme
background.colour = "white"
tm <- theme(panel.background = element_rect(fill = background.colour),
plot.background = element_rect(fill = background.colour),
panel.grid = element_blank(),
axis.line = element_line(size = 0.75, colour = "black"),
axis.ticks = element_line(size = 0.75, colour = "black"),
axis.ticks.length = unit(0.1,"cm"),
axis.text.x = element_text(size = 14, colour = "black"),
axis.title = element_text(size = 18, colour = "black"),
axis.text.y = element_text(hjust = 0, size = 16, colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_text(margin = margin(t=10,r=0,b=2,l=0)),
# axis.title.x = element_blank(),
plot.title = element_text(size = 18, colour = "black", face = "bold", hjust = 0.5, margin = margin(t=2,r=0,b=10,l=0)),
legend.background = element_rect(fill = background.colour),
legend.title = element_blank(),
legend.text = element_text(size = 16, margin = margin(2,0,2,0)),
legend.spacing.y = unit(1, "cm"),
legend.position = "bottom",
text = element_text(family = "Arial"),
legend.key = element_blank(),
plot.margin = unit(c(0.3,1,0.3,0.3), "cm"))
# colour to fill columns
colcol = "#cad7ed"
## Plot of the types of tests
fig2 <- ggplot(Ntests) + tm + theme(plot.title = element_text(hjust = -1)) +
geom_col(aes(PB_label, n), fill = colcol, colour = "black", width = 0.75, size = 1) +
geom_text(aes(x = PB_label, y = n, label = percent), hjust = -0.1, size = 5, family = "Arial") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80)) +
coord_flip() +
labs(x = "type of publication bias test",
y = "number of papers reporting test")
#fig2
# creating data
set.seed(77777)
# setting parameters
n.effect <- 100
sigma2.s <- 0.05
beta1 <- 0.2
# using negative binomial we get good spread of sample size
n.sample <- rnbinom(n.effect, mu = 30, size = 0.7) + 4
# variance for Zr
vi <- 1/(n.sample - 3)
# moderator x 1
xi <- rnorm(n.effect)
# there is underling overall effect to r = 0.2 or Zr = 0.203
Zr <- atanh(0.2) + beta1*xi + rnorm(n.effect, 0, sqrt(sigma2.s)) + rnorm(n.effect, 0, sqrt(vi))
#qplot(Zr, 1/sqrt(vi))
dat <- data.frame(yi = Zr, vi = vi, sei = sqrt(vi), xi = xi, ni = n.sample, prec = 1 / sqrt(vi), wi = 1/vi, zi = Zr/sqrt(vi))
rows <- 1:nrow(dat)
expected <- which(1/dat$sei < 5 & dat$yi < 0.25)
unexpected <- which(1/dat$sei > 4.7 & dat$yi > 0.25)
col_point <- ifelse(rows %in% expected, "red", ifelse(rows %in% unexpected, "blue", "black"))
dat$col_point <- col_point
#https://stackoverflow.com/questions/50012553/combining-plots-created-by-r-base-lattice-and-ggplot2
# saving base plot as an object https://www.andrewheiss.com/blog/2016/12/08/save-base-graphics-as-pseudo-objects-in-r/
mod_ra <- rma(yi, vi, data = dat)
mod_fe <- rma(yi, vi, data = dat, method = "FE")
# margin controlling
#par(mar=c(5,4,0,0) + 0.1)
# DL for sunset plot and enhanced counter - making it - 0.01 < p < 0.05
# a
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(dat$yi, dat$vi, ni = dat$ni, yaxis="ni",
xlim = c(-3, 3),
refline=mod_ra$beta, xlab = "Effect size (Zr)")
a <- recordPlot()
invisible(dev.off())
# b
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(mod_ra, yaxis="seinv", col = col_point,
ylim = c(1, 12), xlim = c(-3, 3),
ylab = "Precison (1/SE)", xlab = "Effect size (Zr)")
legend(x = 1.4, y = 12, legend = c("expected", "","unexpected"), pch = c(19, 0, 19), col = c("red", "grey83", "blue"), bty = "n")
b <- recordPlot()
invisible(dev.off())
# c
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(mod_ra,
yaxis = "sei",
xlim = c(-3, 3),
level = c(95, 99),
shade = c("white", "gray55"),
refline = 0, legend = F,
ylab = "Standard error (SE)", xlab = "Effect size (Zr)")
legend(x = 1, y = 0, legend = c("0.01 < p < 0.05"), pch = c(15), col = c("gray55"), bty = "n")
c <- recordPlot()
invisible(dev.off())
# d
d <- viz_sunset(dat[,c("yi", "sei")],
power_contours = "continuous",
contours = T,
power_stats = F,
xlab = "Effect size (Zr)",
ylab = "Standard error (SE)")
# e
# meta-regression (random-effects model)
mod_re1 <- rma(yi, vi, mod = ~ xi, data = dat)
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(mod_re1,
yaxis = "sei",
xlim = c(-3, 3), ylim = c(0, 1),
#level = c(95, 99),
#shade = c("white", "gray75"),
refline = 0, legend = F, ylab = "Standard error (SE)", xlab = "Standardized residuls (Zr)")
#legend(x = 1, y = 0, legend = c("0.01 < p < 0.05", ""), pch = c(15, 0), col = c("gray75", "white"), bg = "white")
e <- recordPlot()
invisible(dev.off())
# f
#
mod <- metagen(TE = yi, seTE = sqrt(vi) , data = dat)
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
#radial.rma(mod_fe, xlim = c(0, 12.5), zlab = "Z score", xlab = "Precision (1/SE)")
#legend(1,1, "test", bty = "n")
radial(mod, level = 0.95, pch = 19, xlab = "Precision (1/SE)", ylab = "Z score")
abline(a = 0, b = 0.1908, lwd = 1.5)
f <- recordPlot()
invisible(dev.off())
# fig 3
fig3 <- (ggdraw(a) + ggdraw(b)) /(ggdraw(c) + ggdraw(d))/ (plot_grid(e) + ggdraw(f)) + plot_annotation(tag_levels = 'a')
fig3
# creating data with publication bias
dat2 <- dat[dat$col_point != "red", ]
# meta-analysis
mod_ra2 <- rma(yi, vi, data = dat2)
# egger regression (the same as in S2)
egger1 <- lm(zi ~ prec, data = dat2)
egger2 <- lm(yi ~ sei, weight = wi, data = dat2)
# data for time-lag bias
set.seed(123)
position <- sample(1:nrow(dat2), size = 15)
dat3 <- dat2[sort(position), ]
sorting <- c(12, 13, 14, 3, 4, 9, 8, 5, 10, 11, 6, 7, 2, 15, 1)
dat3 <- dat3[sorting, ]
dat3$year <- c(2000, 2001, 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013, 2014, 2016, 2018, 2019)
# cumlatie meta-analysis
cum <- rma(yi, vi, data=dat3)
cum_ma <- cumul(cum)
#forest(cum_ma, xlab = "Overall estimate (Zr)")
# meta-regression
mod_re2 <- rma(yi, vi, mod = ~ year, data = dat3)
# funnel
taf <- trimfill(mod_ra2)
# Plotting from here
p1 <- ggplot(dat2, aes(prec, zi)) + geom_point() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_hline(yintercept = egger1$coefficients[[1]]) +
#geom_smooth(method = "lm") +
labs(x = "Precision (1/SE)",
y = "Z score")
p2 <- ggplot(dat2, aes(sei, yi)) + geom_point() +
# geom_smooth(method = "lm") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_abline(intercept = egger2$coefficients[[1]], slope = egger2$coefficients[[2]]) +
labs(x = "Standard error (SE)",
y = "Effect size (Zr)")
# c and d
# probably 15 data points for cumulative meta-analyses
cum <- rma(yi, vi, data=dat3)
cum_ma <- cumul(cum)
# p3
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
forest(cum_ma, xlab = "Overall estimate (Zr)")
p3 <- recordPlot()
invisible(dev.off())
# p4
p4 <- ggplot(dat3, aes(x = year, y = yi, size = prec)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_abline(intercept = mod_re2$beta[[1]], slope = mod_re2$beta[[2]]) +
geom_point(shape = 21, fill = "grey90") +
labs(x = "Publication year", y = "Effect size (Zr)", size ="Precision (1/SE)") +
guides(fill = "none", colour = "none") +
theme(legend.position = c(0, 0.15), legend.justification = c(0, 1)) +
theme(legend.direction = "horizontal") +
# theme(legend.background = element_rect(fill = "white", colour = "black")) +
theme(legend.background = element_blank()) +
theme(axis.text.y = element_text(size = 10, colour = "black", hjust = 0.5, angle = 90))
# p5
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(taf, yaxis="seinv", col = col_point, estimator="R0",
ylim = c(1, 12),
#xlim = c(-3, 3),
ylab = "Precison (1/SE)", xlab = "Effect size (Zr)")
p5 <- recordPlot()
invisible(dev.off())
# p6
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(taf, yaxis="sei", col = col_point, estimator="R0",
#xlim = c(-3, 3),
ylab = "Standard error (SE)", xlab = "Effect size (Zr)")
p6 <- recordPlot()
invisible(dev.off())
# e and f
# maybe use the same datasets as Fig 3
# fig 4
fig4 <- (plot_grid(p1) + plot_grid(p2)) /(ggdraw(p3) + plot_grid(p4))/(plot_grid(p5) + plot_grid(p6)) + plot_annotation(tag_levels = 'a')
fig4
# dmetr
#devtools::install_github("MathiasHarrer/dmetar")
#library(dmetar)
####################
# preparation#######
####################
# chanting the pcurve function in dmetar so fonts are large enough to see
pcurve2 <- function (x, effect.estimation = FALSE, N, dmin = 0, dmax = 1)
{
metaobject = x
rm(x)
if (!(class(metaobject)[1] %in% c("metagen", "metabin", "metacont",
"metacor", "metainc", "meta", "metaprop"))) {
for (i in 1:length(colnames(metaobject))) {
te.exists = FALSE
if (colnames(metaobject)[i] == "TE") {
te.exists = TRUE
break
}
else {
}
}
for (i in 1:length(colnames(metaobject))) {
sete.exists = FALSE
if (colnames(metaobject)[i] == "seTE") {
sete.exists = TRUE
break
}
else {
}
}
for (i in 1:length(colnames(metaobject))) {
studlab.exists = FALSE
if (colnames(metaobject)[i] == "studlab") {
studlab.exists = TRUE
break
}
else {
}
}
if (te.exists == FALSE | sete.exists == FALSE | studlab.exists ==
FALSE) {
stop("x must be a meta-analysis object generated by meta functions or a data.frame with columns labeled studlab, TE, and seTE.")
}
}
options(scipen = 999)
zvalues.input = abs(metaobject$TE/metaobject$seTE)
getncp.f = function(df1, df2, power) {
error = function(ncp_est, power, x, df1, df2) pf(x, df1 = df1,
df2 = df2, ncp = ncp_est) - (1 - power)
xc = qf(p = 0.95, df1 = df1, df2 = df2)
return(uniroot(error, c(0, 1000), x = xc, df1 = df1,
df2 = df2, power = power)$root)
}
getncp.c = function(df, power) {
xc = qchisq(p = 0.95, df = df)
error = function(ncp_est, power, x, df) pchisq(x, df = df,
ncp = ncp_est) - (1 - power)
return(uniroot(error, c(0, 1000), x = xc, df = df, power = power)$root)
}
getncp = function(family, df1, df2, power) {
if (family == "f")
ncp = getncp.f(df1 = df1, df2 = df2, power = power)
if (family == "c")
ncp = getncp.c(df = df1, power = power)
return(ncp)
}
percent <- function(x, digits = 0, format = "f", ...) {
paste(formatC(100 * x, format = format, digits = digits,
...), "%", sep = "")
}
pbound = function(p) pmin(pmax(p, 0.00000000000000022), 1 -
0.00000000000000022)
prop33 = function(pc) {
prop = ifelse(family == "f" & p < 0.05, 1 - pf(qf(1 -
pc, df1 = df1, df2 = df2), df1 = df1, df2 = df2,
ncp = ncp33), NA)
prop = ifelse(family == "c" & p < 0.05, 1 - pchisq(qchisq(1 -
pc, df = df1), df = df1, ncp = ncp33), prop)
prop
}
stouffer = function(pp) sum(qnorm(pp), na.rm = TRUE)/sqrt(sum(!is.na(pp)))
zvalues.input = paste("z=", zvalues.input, sep = "")
filek = "input"
raw = zvalues.input
raw = tolower(raw)
ktot = length(raw)
k = seq(from = 1, to = length(raw))
stat = substring(raw, 1, 1)
test = ifelse(stat == "r", "t", stat)
family = test
family = ifelse(test == "t", "f", family)
family = ifelse(test == "z", "c", family)
par1 = str_locate(raw, "\\(")[, 1]
par2 = str_locate(raw, "\\)")[, 1]
comma = str_locate(raw, ",")[, 1]
eq = str_locate(raw, "=")[, 1]
df = as.numeric(ifelse(test == "t", substring(raw, par1 +
1, par2 - 1), NA))
df1 = as.numeric(ifelse(test == "f", substring(raw, par1 +
1, comma - 1), NA))
df1 = as.numeric(ifelse(test == "z", 1, df1))
df1 = as.numeric(ifelse(test == "t", 1, df1))
df1 = as.numeric(ifelse(test == "c", substring(raw, par1 +
1, par2 - 1), df1))
df2 = as.numeric(ifelse(test == "f", substring(raw, comma +
1, par2 - 1), NA))
df2 = as.numeric(ifelse(test == "t", df, df2))
equal = abs(as.numeric(substring(raw, eq + 1)))
value = ifelse((stat == "f" | stat == "c"), equal, NA)
value = ifelse(stat == "r", (equal/(sqrt((1 - equal^2)/df2)))^2,
value)
value = ifelse(stat == "t", equal^2, value)
value = ifelse(stat == "z", equal^2, value)
p = ifelse(family == "f", 1 - pf(value, df1 = df1, df2 = df2),
NA)
p = ifelse(family == "c", 1 - pchisq(value, df = df1), p)
p = pbound(p)
ksig = sum(p < 0.05, na.rm = TRUE)
khalf = sum(p < 0.025, na.rm = TRUE)
if (ksig <= 2) {
stop("Two or less significant (p<0.05) effect sizes were detected, so p-curve analysis cannot be conducted.")
}
ppr = as.numeric(ifelse(p < 0.05, 20 * p, NA))
ppr = pbound(ppr)
ppr.half = as.numeric(ifelse(p < 0.025, 40 * p, NA))
ppr.half = pbound(ppr.half)
ncp33 = mapply(getncp, df1 = df1, df2 = df2, power = 1/3,
family = family)
pp33 = ifelse(family == "f" & p < 0.05, 3 * (pf(value, df1 = df1,
df2 = df2, ncp = ncp33) - 2/3), NA)
pp33 = ifelse(family == "c" & p < 0.05, 3 * (pchisq(value,
df = df1, ncp = ncp33) - 2/3), pp33)
pp33 = pbound(pp33)
prop25 = 3 * prop33(0.025)
prop25.sig = prop25[p < 0.05]
pp33.half = ifelse(family == "f" & p < 0.025, (1/prop25) *
(pf(value, df1 = df1, df2 = df2, ncp = ncp33) - (1 -
prop25)), NA)
pp33.half = ifelse(family == "c" & p < 0.025, (1/prop25) *
(pchisq(value, df = df1, ncp = ncp33) - (1 - prop25)),
pp33.half)
pp33.half = pbound(pp33.half)
Zppr = stouffer(ppr)
Zpp33 = stouffer(pp33)
Zppr.half = stouffer(ppr.half)
Zpp33.half = stouffer(pp33.half)
p.Zppr = pnorm(Zppr)
p.Zpp33 = pnorm(Zpp33)
p.Zppr.half = pnorm(Zppr.half)
p.Zpp33.half = pnorm(Zpp33.half)
main.results = as.numeric(c(ktot, ksig, khalf, Zppr, p.Zppr,
Zpp33, p.Zpp33, Zppr.half, p.Zppr.half, Zpp33.half, p.Zpp33.half))
prop25.obs = sum(p < 0.025)/sum(p < 0.05)
binom.r = 1 - pbinom(q = prop25.obs * ksig - 1, prob = 0.5,
size = ksig)
binom.33 = ppoibin(kk = prop25.obs * ksig, pp = prop25[p <
0.05])
binomial = c(mean(prop25.sig), prop25.obs, binom.r, binom.33)
cleanp = function(p) {
p.clean = round(p, 4)
p.clean = substr(p.clean, 2, 6)
p.clean = paste0("= ", p.clean)
if (p < 0.0001)
p.clean = " < .0001"
if (p > 0.9999)
p.clean = " > .9999"
return(p.clean)
}
if (khalf == 0) {
Zppr.half = "N/A"
p.Zppr.half = "=N/A"
Zpp33.half = "N/A"
p.Zpp33.half = "=N/A"
}
if (khalf > 0) {
Zppr.half = round(Zppr.half, 2)
Zpp33.half = round(Zpp33.half, 2)
p.Zppr.half = cleanp(p.Zppr.half)
p.Zpp33.half = cleanp(p.Zpp33.half)
}
Zppr = round(Zppr, 2)
Zpp33 = round(Zpp33, 2)
p.Zppr = cleanp(p.Zppr)
p.Zpp33 = cleanp(p.Zpp33)
binom.r = cleanp(binom.r)
binom.33 = cleanp(binom.33)
powerfit = function(power_est) {
ncp_est = mapply(getncp, df1 = df1, df2 = df2, power = power_est,
family = family)
pp_est = ifelse(family == "f" & p < 0.05, (pf(value,
df1 = df1, df2 = df2, ncp = ncp_est) - (1 - power_est))/power_est,
NA)
pp_est = ifelse(family == "c" & p < 0.05, (pchisq(value,
df = df1, ncp = ncp_est) - (1 - power_est))/power_est,
pp_est)
pp_est = pbound(pp_est)
return(stouffer(pp_est))
}
fit = c()
fit = abs(powerfit(0.051))
for (i in 6:99) fit = c(fit, abs(powerfit(i/100)))
mini = match(min(fit, na.rm = TRUE), fit)
hat = (mini + 4)/100
x.power = seq(from = 5, to = 99)/100
get.power_pct = function(pct) {
z = qnorm(pct)
error = function(power_est, z) powerfit(power_est) -
z
return(uniroot(error, c(0.0501, 0.99), z)$root)
}
p.power.05 = pnorm(powerfit(0.051))
p.power.99 = pnorm(powerfit(0.99))
if (p.power.05 <= 0.95)
power.ci.lb = 0.05
if (p.power.99 >= 0.95)
power.ci.lb = 0.99
if (p.power.05 > 0.95 && p.power.99 < 0.95)
power.ci.lb = get.power_pct(0.95)
if (p.power.05 <= 0.05)
power.ci.ub = 0.05
if (p.power.99 >= 0.05)
power.ci.ub = 0.99
if (p.power.05 > 0.05 && p.power.99 < 0.05)
power.ci.ub = get.power_pct(0.05)
power_results = c(power.ci.lb, hat, power.ci.ub)
gcdf1 = prop33(0.01)
gcdf2 = prop33(0.02)
gcdf3 = prop33(0.03)
gcdf4 = prop33(0.04)
green1 = mean(gcdf1, na.rm = TRUE) * 3
green2 = mean(gcdf2 - gcdf1, na.rm = TRUE) * 3
green3 = mean(gcdf3 - gcdf2, na.rm = TRUE) * 3
green4 = mean(gcdf4 - gcdf3, na.rm = TRUE) * 3
green5 = mean(1/3 - gcdf4, na.rm = TRUE) * 3
green = 100 * c(green1, green2, green3, green4, green5)
ps = ceiling(p[p < 0.05] * 100)/100
blue = c()
for (i in c(0.01, 0.02, 0.03, 0.04, 0.05)) blue = c(blue,
sum(ps == i, na.rm = TRUE)/ksig * 100)
red = c(20, 20, 20, 20, 20)
x = c(0.01, 0.02, 0.03, 0.04, 0.05)
par(mar = c(6, 5.5, 1.5, 3))
moveup = max(max(blue[2:5]) - 66, 0)
ylim = c(0, 105 + moveup)
legend.top = 100 + moveup
plot(x, blue, type = "l", col = "dodgerblue2", main = "",
lwd = 2, xlab = "", ylab = "", xaxt = "n", yaxt = "n",
xlim = c(0.01, 0.051), ylim = ylim, bty = "L", las = 1,
axes = F)
x_ = c(".01", ".02", ".03", ".04", ".05")
axis(1, at = x, labels = x_)
y_ = c("0%", "25%", "50%", "75%", "100%")
y = c(0, 25, 50, 75, 100)
axis(2, at = y, labels = y_, las = 1, cex.axis = 1.2)
mtext("Percentage of test results", font = 2, side = 2, line = 3.85,
cex = 1.25)
mtext("p ", font = 4, side = 1, line = 2.3, cex = 1.25)
mtext(" -value", font = 2, side = 1, line = 2.3, cex = 1.5)
points(x, blue, type = "p", pch = 20, bg = "dodgerblue2",
col = "dodgerblue2")
text(x + 0.00075, blue + 3.5, percent(round(blue)/100), col = "black",
cex = 0.75)
lines(x, red, type = "l", col = "firebrick2", lwd = 1.25,
lty = 3)
lines(x, green, type = "l", col = "springgreen4", lwd = 1.5,
lty = 5)
tab1 = 0.017
tab2 = tab1 + 0.0015
gap1 = 9
gap2 = 4
font.col = "gray44"
text.blue = paste0("Power estimate: ", percent(hat), ", CI(",
percent(power.ci.lb), ",", percent(power.ci.ub), ")")
text(tab1, legend.top, adj = 0, cex = 0.85, bquote("Observed " *
italic(p) * "-curve"))
text(tab2, legend.top - gap2, adj = 0, cex = 0.68, text.blue,
col = font.col)
text.red = bquote("Tests for right-skewness: " * italic(p) *
""[Full] ~ .(p.Zppr) * ", " * italic(p) * ""[Half] ~
.(p.Zppr.half))
text(tab1, legend.top - gap1, adj = 0, cex = 0.85, "Null of no effect")
text(tab2, legend.top - gap1 - gap2, adj = 0, cex = 0.68,
text.red, col = font.col)
text.green = bquote("Tests for flatness: " * italic(p) *
""[Full] ~ .(p.Zpp33) * ", " * italic(p) * ""[half] ~
.(p.Zpp33.half) * ", " * italic(p) * ""[Binomial] ~
.(binom.33))
text(tab1, legend.top - 2 * gap1, adj = 0, cex = 0.85, "Null of 33% power")
text(tab2, legend.top - 2 * gap1 - gap2, adj = 0, cex = 0.68,
text.green, col = font.col)
segments(x0 = tab1 - 0.005, x1 = tab1 - 0.001, y0 = legend.top,
y1 = legend.top, col = "dodgerblue2", lty = 1, lwd = 1.5)
segments(x0 = tab1 - 0.005, x1 = tab1 - 0.001, y0 = legend.top -
gap1, y1 = legend.top - gap1, col = "firebrick2", lty = 3,
lwd = 1.5)
segments(x0 = tab1 - 0.005, x1 = tab1 - 0.001, y0 = legend.top -
2 * gap1, y1 = legend.top - 2 * gap1, col = "springgreen4",
lty = 2, lwd = 1.5)
rect(tab1 - 0.0065, legend.top - 2 * gap1 - gap2 - 3, tab1 +
0.032, legend.top + 3, border = "gray85")
msgx = bquote("Note: The observed " * italic(p) * "-curve includes " *
.(ksig) * " statistically significant (" * italic(p) *
" < .05) results, of which " * .(khalf) * " are " * italic(p) *
" < .025.")
mtext(msgx, side = 1, line = 4, cex = 0.9, adj = 0)
kns = ktot - ksig
if (kns == 0)
ns_msg = "There were no non-significant results entered."
if (kns == 1)
ns_msg = bquote("There was one additional result entered but excluded from " *
italic(p) * "-curve because it was " * italic(p) *
" > .05.")
if (kns > 1)
ns_msg = bquote("There were " * .(kns) * " additional results entered but excluded from " *
italic(p) * "-curve because they were " * italic(p) *
" > .05.")
mtext(ns_msg, side = 1, line = 4.75, cex = 0.9, adj = 0)
table_calc = data.frame(raw, p, ppr, ppr.half, pp33, pp33.half,
qnorm(ppr), qnorm(ppr.half), qnorm(pp33), qnorm(pp33.half))
headers1 = c("Entered statistic", "p-value", "ppr", "ppr half",
"pp33%", "pp33 half", "Z-R", "Z-R half", "Z-33", "z-33 half")
table_calc = setNames(table_calc, headers1)
headers2 = c("p-value", "Observed (blue)", "Power 33% (Green)",
"Flat (Red)")
table_figure = setNames(data.frame(x, blue, green, red),
headers2)
dropk = function(pp, k, droplow) {
pp = pp[!is.na(pp)]
n = length(pp)
pp = sort(pp)
if (k == 0)
ppk = pp
if (droplow == 1 & k > 0) {
ppk = (pp[(1 + k):n])
ppmin = min(pp[k], k/(n + 1))
ppk = (ppk - ppmin)/(1 - ppmin)
}
if (droplow == 0 & k > 0) {
ppk = pp[1:(n - k)]
ppmax = max(pp[n - k + 1], (n - k)/(n + 1))
ppk = ppk/ppmax
}
ppk = pmax(ppk, 0.00001)
ppk = pmin(ppk, 0.99999)
Z = sum(qnorm(ppk))/sqrt(n - k)
return(pnorm(Z))
}
droplow.r = droplow.33 = drophigh.r = drophigh.33 = c()
for (i in 0:(round(ksig/2) - 1)) {
droplow.r = c(droplow.r, dropk(pp = ppr, k = i, droplow = 1))
drophigh.r = c(drophigh.r, dropk(pp = ppr, k = i, droplow = 0))
droplow.33 = c(droplow.33, dropk(pp = pp33, k = i, droplow = 1))
drophigh.33 = c(drophigh.33, dropk(pp = pp33, k = i,
droplow = 0))
}
if (khalf > 0) {
droplow.halfr = drophigh.halfr = c()
for (i in 0:(round(khalf/2) - 1)) {
droplow.halfr = c(droplow.halfr, dropk(pp = ppr.half,
k = i, droplow = 1))
drophigh.halfr = c(drophigh.halfr, dropk(pp = ppr.half,
k = i, droplow = 0))
}
}
plotdrop = function(var, col) {
k = length(var)
plot(0:(k - 1), var, xlab = "", ylab = "", type = "b",
yaxt = "n", xaxt = "n", main = "", cex.main = 1.15,
ylim = c(0, 1), col = col)
points(0, var[1], pch = 19, cex = 1.6)
abline(h = 0.05, col = "red")
axis(2, c(0.05, 2:9/10), labels = c(".05", ".2", ".3",
".4", ".5", "6", "7", ".8", ".9"), las = 1, cex.axis = 1.5)
axis(1, c(0:(k - 1)), las = 1, cex.axis = 1.4)
}
if (effect.estimation == TRUE) {
ci.to.t = function(TE, lower, upper, n) {
z.to.d = function(z, n) {
d = (2 * z)/sqrt(n)
return(abs(d))
}
ci.to.p = function(est, lower, upper) {
SE = (upper - lower)/(2 * 1.96)
z = abs(est/SE)
p = exp(-0.717 * z - 0.416 * z^2)
return(p)
}
d.to.t = function(d, n) {
df = n - 2
t = (d * sqrt(df))/2
return(t)
}
p = ci.to.p(TE, lower, upper)
z = abs(qnorm(p/2))
d = z.to.d(z, n)
t = d.to.t(d, n)
return(t)
}
loss = function(t_obs, df_obs, d_est) {
t_obs = abs(t_obs)
p_obs = 2 * (1 - pt(t_obs, df = df_obs))
t.sig = subset(t_obs, p_obs < 0.05)
df.sig = subset(df_obs, p_obs < 0.05)
ncp_est = sqrt((df.sig + 2)/4) * d_est
tc = qt(0.975, df.sig)
power_est = 1 - pt(tc, df.sig, ncp_est)
p_larger = pt(t.sig, df = df.sig, ncp = ncp_est)
ppr = (p_larger - (1 - power_est))/power_est
KSD = ks.test(ppr, punif)$statistic
return(KSD)
}
if (missing(N)) {
stop("If 'effect.estimation=TRUE', argument 'N' must be provided.")
}
if (length(N) != length(metaobject$TE)) {
stop("N must be of same length as the number of studies contained in x.")
}
lower = metaobject$TE - (metaobject$seTE * 1.96)
upper = metaobject$TE + (metaobject$seTE * 1.96)
t_obs = ci.to.t(metaobject$TE, lower, upper, N)
df_obs = N - 2
loss.all = c()
di = c()
for (i in 0:((dmax - dmin) * 100)) {
d = dmin + i/100
di = c(di, d)
options(warn = -1)
loss.all = c(loss.all, loss(df_obs = df_obs, t_obs = t_obs,
d_est = d))
options(warn = 0)
}
imin = match(min(loss.all), loss.all)
dstart = dmin + imin/100
dhat = optimize(loss, c(dstart - 0.1, dstart + 0.1),
df_obs = df_obs, t_obs = t_obs)
options(warn = -0)
plot(di, loss.all, xlab = "Effect size\nCohen-d", ylab = "Loss (D stat in KS test)",
ylim = c(0, 1), main = "How well does each effect size fit? (lower is better)")
points(dhat$minimum, dhat$objective, pch = 19, col = "red",
cex = 2)
text(dhat$minimum, dhat$objective - 0.08, paste0("p-curve's estimate of effect size:\nd=",
round(dhat$minimum, 3)), col = "red")
}
main.results = round(main.results, 3)
ktotal = round(main.results[1])
k.sign = round(main.results[2])
k.025 = round(main.results[3])
skew.full.z = main.results[4]
skew.full.p = main.results[5]
flat.full.z = main.results[6]
flat.full.p = main.results[7]
skew.half.z = main.results[8]
skew.half.p = main.results[9]
flat.half.z = main.results[10]
flat.half.p = main.results[11]
skew.binomial.p = round(binomial[3], 3)
flat.binomial.p = round(binomial[4], 3)
skewness = c(skew.binomial.p, skew.full.z, skew.full.p, skew.half.z,
skew.half.p)
flatness = c(flat.binomial.p, flat.full.z, flat.full.p, flat.half.z,
flat.half.p)
colnames.df = c("pBinomial", "zFull", "pFull", "zHalf", "pHalf")
rownames.df = c("Right-skewness test", "Flatness test")
pcurveResults = rbind(skewness, flatness)
colnames(pcurveResults) = colnames.df
rownames(pcurveResults) = rownames.df
power_results = round(power_results, 3)
powerEstimate = power_results[2]
powerLower = power_results[1]
powerUpper = power_results[3]
Power = as.data.frame(cbind(powerEstimate, powerLower, powerUpper))
rownames(Power) = ""
if (skew.half.p < 0.05 | (skew.half.p < 0.1 & skew.full.p <
0.1)) {
presence.ev = "yes"
}
else {
presence.ev = "no"
}
if (flat.full.p < 0.05 | (flat.half.p < 0.1 & flat.binomial.p <
0.1)) {
absence.ev = "yes"
}
else {
absence.ev = "no"
}
PlotData = round(table_figure, 3)
table_calc[, 1] = NULL
colnames(table_calc) = c("p", "ppSkewFull", "ppSkewHalf",
"ppFlatFull", "ppFlatHalf", "zSkewFull", "zSkewHalf",
"zFlatFull", "zFlatHalf")
Input = cbind(metaobject$TE, round(table_calc, 3))
rownames(Input) = paste(1:length(metaobject$TE), metaobject$studlab)
colnames(Input)[1] = "TE"
if (effect.estimation == TRUE) {
dEstimate = round(dhat$minimum, 3)
return.list = list(pcurveResults = pcurveResults, Power = Power,
PlotData = PlotData, Input = Input, EvidencePresent = presence.ev,
EvidenceAbsent = absence.ev, kInput = ktot, kAnalyzed = k.sign,
kp0.25 = k.025, dEstimate = dEstimate, I2 = metaobject$I2,
class.meta.object = class(metaobject)[1])
class(return.list) = c("pcurve", "effect.estimation")
}
else {
return.list = list(pcurveResults = pcurveResults, Power = Power,
PlotData = PlotData, Input = Input, EvidencePresent = presence.ev,
EvidenceAbsent = absence.ev, kInput = ktot, kAnalyzed = k.sign,
kp0.25 = k.025, I2 = metaobject$I2, class.meta.object = class(metaobject)[1])
class(return.list) = c("pcurve", "no.effect.estimation")
}
cat(" ", "\n")
invisible(return.list)
return.list
}
# this is required for pcurve function
library(poibin)
#########################
# Drawing starts here ###
#########################
p_dat <- data.frame("studlab" = as.factor(1:nrow(dat2)), "TE" = dat2$yi, "seTE" = dat2$sei)
#pcurve(p_dat)
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0,0) + 0.1, cex = 1.2) # NOTE: here is where you can change the font size for at least some parts of the graph. E.g. add cex = 2 - this did ont seem to work well.
invisible(pcurve2(p_dat))
#mtext(msgx, side = 1, line = 4, cex = 2, adj = 0)
plot<- recordPlot()
invisible(dev.off())
# turuning the base plot into ggplot
plot <- ggdraw(plot)
#dat2
model <- rma(yi, vi = vi, data = dat2, method = "ML", mod = ~ xi)
# only sel1 and sel3 are used
#sel0 <- selmodel(model, type = "beta")
sel1 <- selmodel(model, type="halfnorm", prec="sei", scaleprec=FALSE)
#sel2 <- selmodel(model, type="negexp", prec="sei", scaleprec=FALSE)
sel3 <- selmodel(model, type="logistic", prec="sei", scaleprec=FALSE)
#sel4 <- selmodel(model, type="power", prec="sei", scaleprec=FALSE)
sel1b <- selmodel(model, type="halfnorm")
#sel2b <- selmodel(model, type="negexp")
sel3b <- selmodel(model, type="logistic")
#sel4b <- selmodel(model, type="power")
# step function
sel5 <- selmodel(model, type="stepfun", steps=c(0.05, 0.10, 0.50, 1.00))
#sel6 <- selmodel(model, type="stepfun", steps=c(0.001, 0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 1))
sel7 <- selmodel(model, type="stepfun", steps=c(0.05, 1))
#plot(sel5, ylim = c(0,1))
#plot(sel6, add=TRUE, col="red", scale = T)
#plot(sel7, add=TRUE, col="red")
#legend(x = 0.5, y = 1, legend = c("3 cutpoints", "","1 cutpoint (3 PMS)"), lty =c(1, 0, 1), col = c("black", "", "red"), bty = "n")
# plotting from here
pA <- plot + theme(plot.margin=unit(c(-1,0,-1,0),"cm"))
# B
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
plot(sel1)
plot(sel3, add=TRUE, col="red")
plot(sel1b, add=TRUE, lty = "dotted")
plot(sel3b, add=TRUE, col="red", lty = "dotted")
legend(x = 0.6, y = 1, legend = c("half-normal", "","logistic"), lty =c(1, 0, 1), col = c("black", "", "red"), bty = "n")
pB <- recordPlot()
invisible(dev.off())
# C
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
plot(sel5, ylim = c(0,1))
plot(sel7, add=TRUE, col="red", lty = 3)
legend(x = 0.5, y = 1, legend = c("3 cutpoints", "","1 cutpoint (3 PSM)"), lty =c(1, 0, 1), col = c("black", "", "red"), bty = "n")
pC <- recordPlot()
invisible(dev.off())
# Fig 5
fig5 <- pA /(ggdraw(pB) + ggdraw(pC)) + plot_annotation(tag_levels = 'a')
fig5
########################
# Zr data creation #####
#######################
set.seed(777)
# setting parameters
n.study <- 70
sigma2.s <- 0.05
sigma2.u <- 0.05
beta1 <- 0.1
beta2 <- - 0.05
# using negative binomial we get good spread of sample size
n.sample <- rnbinom(n.study, mu = 30, size = 0.2) + 4
# the number of effect size per study
set.seed(777777)
n.es.per.s <- rpois(n.study,3) + 1
#sum(n.es.per.s)
n.effect <- sum(n.es.per.s)
# variance for Zr
vi <- 1/(rep(n.sample, n.es.per.s) - 3)
# In the text, we have two moderators
# moderator x 1 - study level
x1j <- rnorm(n.study)
# moderator x 2 - effect isze level
x2i <- rnorm(n.effect)
# study position
place <- rep(1:n.study, n.es.per.s)
# there is underling overall effect to r = 0.3 or Zr = 0.3095196
Zr <- 0 + rnorm(n.study, 0, sqrt(sigma2.s))[place] +
rnorm(n.effect, 0, sqrt(sigma2.u)) + rnorm(n.effect, 0, sqrt(vi))
#qplot(Zr, 1/sqrt(vi))
datA <- data.frame(yi = Zr, vi = vi, sei = sqrt(vi), x1 = x1j[place], x2 = x2i, ni = n.sample[place], prec = 1 / sqrt(vi), wi = 1/vi, zi = Zr/sqrt(vi), studyID = place, effectID = 1:n.effect)
# cutting publication mbias like stuff
expected <- c(which(1/datA$sei < 5.5 & datA$yi < 0.2), which(1/datA$sei < 3 & datA$yi < 0.7))
datB <- datA[-expected, ]
#qplot(x = yi, y = 1/sqrt(vi), data = datB)
mod_ma <- rma.mv(yi, vi, mod = ~ sqrt(vi), random = list(~ 1 | studyID, ~1 | effectID), data = datB)
mod_mb <- rma.mv(yi, vi, mod = ~ vi, random = list(~ 1 | studyID, ~1 | effectID), data = datB)
#summary(mod_ma)
#summary(mod_mb)
# creating curve lines
x = seq(0, 1, by = 0.001)
#x <- x[-1000]
y = mod_mb$beta[[1]] + sqrt(x)* mod_mb$beta[[2]]
vi_curve <- tibble(x = x, y = y)
############################
# lnRR - data creation #####
############################
set.seed(777)
# setting parameters
n.study <- 30
sigma2.s <- 0.01
sigma2.u <- 0.005
beta1 <- 0.1
beta2 <- -0.01
# using negative binomial we get good spread of sample size
n.sample <- rnbinom(n.study, mu = 40, size = 0.5) + 4
# the number of effect size per study
set.seed(7777)
n.es.per.s <- rpois(n.study,3) + 1
#sum(n.es.per.s)
# sample size
n.effect <- sum(n.es.per.s)
# cv for each study
cv <- sample(seq(0.3, 0.35,by = 0.001), n.effect, replace = T)
rep_sample <- rep(n.sample, times = n.es.per.s)
# variance for lnRR
# balanced design
vi <- (2*cv^2)/rep_sample
# In the text, we have two moderators
# moderator x 1 - study level
x1j <- rnorm(n.study)
# moderator x 2 - effect isze level
x2i <- rnorm(n.effect)
# study position
pos <- rep(1:n.study, n.es.per.s)
# there is underling overall effect to lnRR = 0.2
dat3 <- data.frame(vi = vi, sei = sqrt(vi), x1 = x1j[pos], x2 = x2i, ni = n.sample[pos], prec = 1 / sqrt(vi), wi = 1/vi, studyID = pos, effectID = 1:n.effect)
# ni here is sample size per group - we have the same N for control and treatment groups
dat3$n_tilde <- (dat3$ni^2)/dat3$ni*2
#
# M matrix (V = VCV)
sigma <- make_VCV_matrix(dat3, "vi", "studyID", "effectID", rho = 0)
lnRR<- 0.3 + beta1*x1j[pos] + beta2*x2i + rnorm(n.study, 0, sqrt(sigma2.s))[pos] +
rnorm(n.effect, 0, sqrt(sigma2.u)) + MASS::mvrnorm(1, rep(0, n.effect), sigma)
dat3$yi <- lnRR
mod_ml <- rma.mv(yi, vi, mod = ~ x1 + x2, random = list(~ 1 | studyID, ~1 | effectID), data = dat3)
# residuals - standardized
#summary(mod_ml)
#funnel(mod_ml, yaxis="seinv", col = inferno(25)[pos])
# residual marginal
residm <- dat3$yi - predict(mod_ml)[[1]]
vi_residm <- vi + predict(mod_ml)[[2]]^2 # adding prediction errors
# residual conditional 1 & 2 + its error
blups <- ranef(mod_ml)
study_effect <- blups$studyID[[1]][pos]
vi_study <- (blups$studyID[[2]][pos])^2
residc1 <- dat3$yi - (predict(mod_ml)[[1]] + study_effect)
vi_residc1 <- vi_residm + vi_study
effect_effect <- blups$effectID[[1]]
vi_effect <- blups$effectID[[1]]^2
residc2 <- dat3$yi - (predict(mod_ml)[[1]] + study_effect + effect_effect)
vi_residc2 <- vi_residm + vi_study + vi_effect
# plotting
# Panel A
mod1 <- rma(yi, vi, data = dat3)
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(mod1, col =inferno(25)[pos],
ylim = c(0, 0.26), xlim = c(-0.6, 0.8),
ylab = "Standard error (SE)", xlab = "Effect size (lnRR)")
pa <- recordPlot()
invisible(dev.off())
#Panel A+
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(dat3$yi, dat3$vi, ni = dat3$n_tilde, yaxis="ni",
refline=mod1$beta,
col =inferno(25)[pos],
ylim = c(0, 250), xlim = c(-0.6, 0.8),
ylab = bquote("Effective sample size " (tilde(n))), xlab = "Effect size (lnRR)")
pa2 <- recordPlot()
invisible(dev.off())
# Panel B
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel (residm, vi = vi_residm, yaxis = "sei", col = inferno(25)[pos],
ylim = c(0, 0.26), xlim = c(-0.6, 0.8),
ylab = "Standard error (SE)", xlab = "Residuals marginal (lnRR)")
pb <- recordPlot()
invisible(dev.off())
# Panel B+
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(residm, vi = vi_residm, ni = dat3$n_tilde, yaxis="ni",
#refline=mod1$beta,
col =inferno(25)[pos],
ylim = c(0, 250), xlim = c(-0.6, 0.8),
ylab = bquote("Effective sample size " (tilde(n))), xlab = "Residual marginal (lnRR)")
pb2 <- recordPlot()
invisible(dev.off())
# Panel C
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(residc1, vi = vi_residc1, yaxis = "sei", col = inferno(25)[pos],
ylim = c(0, 0.26), xlim = c(-0.6, 0.8),
ylab = "Standard error (SE)",
xlab = "Residuals conditional 1 (lnRR)")
pc <- recordPlot()
invisible(dev.off())
# Panel C+
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(residc1, vi = vi_residc1, ni = dat3$n_tilde, yaxis="ni",
#refline=mod1$beta,
col =inferno(25)[pos],
ylim = c(0, 250), xlim = c(-0.6, 0.8),
ylab = bquote("Effective sample size " (tilde(n))), xlab = "Residuals conditional 1 (lnRR)")
pc2 <- recordPlot()
invisible(dev.off())
# Panel D
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(residc2, vi = vi_residc2, yaxis = "sei", col = inferno(25)[pos],
ylim = c(0, 0.26), xlim = c(-0.6, 0.8),
ylab = "Standard error (SE)",
xlab = "Residuals conditional 2 (lnRR)")
pd <- recordPlot()
invisible(dev.off())
# Panel D+
pdf(NULL)
dev.control(displaylist="enable")
par(mar=c(4,4,0.1,0) + 0.1)
funnel(residc2, vi = vi_residc2, ni = dat3$n_tilde, yaxis="ni",
#refline=mod1$beta,
col =inferno(25)[pos],
ylim = c(0, 250), xlim = c(-0.6, 0.8),
ylab = bquote("Effective sample size " (tilde(n))), xlab = "Residuals conditional 2 (lnRR)")
pd2 <- recordPlot()
invisible(dev.off())
# Panel E
pe <- ggplot(datB, aes(sei, yi)) + geom_point() +
geom_line(data = vi_curve, mapping = aes(x = x, y = y, col = "red")) +
#geom_point(aes(col = studyID)) +
# geom_smooth(method = "lm") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_abline(intercept = mod_ma$beta[[1]], slope = mod_ma$beta[[2]]) +
xlim(0, 1) + ylim(-1.5, 2.5) +
labs(x = "Standard error (SE)", y = "Effect size (Zr)")+
theme(legend.position = "none")
# F
# Variance
pf <- ggplot(datB, aes(vi, yi)) + geom_point() +
#geom_point(aes(col = studyID)) +
# geom_smooth(method = "lm") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_abline(intercept = mod_mb$beta[[1]], slope = mod_mb$beta[[2]], col = "red") +
xlim(0, 1) + ylim(-1.5, 2.5) +
labs(x = "Sampling variance", y = "Effect size (Zr)")
# fig6
fig6 <- (ggdraw(pa) + ggdraw(pb)) /(ggdraw(pc) + ggdraw(pd))/(plot_grid(pe) + plot_grid(pf)) + plot_annotation(tag_levels = 'a')
fig6
#fig6+
fig6_extra <- (ggdraw(pa2) + ggdraw(pb2)) /(ggdraw(pc2) + ggdraw(pd2)) + plot_annotation(tag_levels = 'a')
fig6_extra
There is no code for Fig. 7 (created in PowerPoint)
SN is extremely grateful to Russell Lenth for making his
emmeans
work for metafor
objects.
sessionInfo() %>% pander::pander()
R version 4.2.0 (2022-04-22 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale: LC_COLLATE=English_Germany.utf8, LC_CTYPE=English_Germany.utf8, LC_MONETARY=English_Germany.utf8, LC_NUMERIC=C and LC_TIME=English_Germany.utf8
attached base packages: grid, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: emmeans(v.1.7.4-1), ggpubr(v.0.4.0), viridis(v.0.6.2), viridisLite(v.0.4.0), ggplotify(v.0.1.0), meta(v.5.5-0), metaviz(v.0.3.1), cowplot(v.1.1.1), orchaRd(v.2.0), readxl(v.1.4.0), lme4(v.1.1-29), here(v.1.0.1), patchwork(v.1.1.1), kableExtra(v.1.3.4), knitr(v.1.39), ape(v.5.6-2), rotl(v.3.0.12), openxlsx(v.4.2.5), pander(v.0.6.5), gt(v.0.7.0), metafor(v.3.4-0), metadat(v.1.2-0), Matrix(v.1.4-1), forcats(v.0.5.1), stringr(v.1.4.0), dplyr(v.1.0.9), purrr(v.0.3.4), readr(v.2.1.2), tidyr(v.1.2.0), tibble(v.3.1.7), ggplot2(v.3.3.6), tidyverse(v.1.3.1) and R.rsp(v.0.44.0)
loaded via a namespace (and not attached): minqa(v.1.2.4), colorspace(v.2.0-3), ggsignif(v.0.6.3), ellipsis(v.0.3.2), rprojroot(v.2.0.3), estimability(v.1.3), fs(v.1.5.2), rstudioapi(v.0.13), farver(v.2.1.0), mvtnorm(v.1.1-3), fansi(v.1.0.3), lubridate(v.1.8.0), mathjaxr(v.1.6-0), xml2(v.1.3.3), codetools(v.0.2-18), splines(v.4.2.0), R.methodsS3(v.1.8.2), jsonlite(v.1.8.0), nloptr(v.2.0.1), broom(v.0.8.0), dbplyr(v.2.1.1), R.oo(v.1.25.0), BiocManager(v.1.30.18), rentrez(v.1.2.3), compiler(v.4.2.0), httr(v.1.4.3), backports(v.1.4.1), assertthat(v.0.2.1), fastmap(v.1.1.0), cli(v.3.3.0), htmltools(v.0.5.2), prettyunits(v.1.1.1), tools(v.4.2.0), coda(v.0.19-4), gtable(v.0.3.0), glue(v.1.6.2), Rcpp(v.1.0.8.3), carData(v.3.0-5), cellranger(v.1.1.0), jquerylib(v.0.1.4), vctrs(v.0.4.1), svglite(v.2.1.0), nlme(v.3.1-157), xfun(v.0.31), rvest(v.1.0.2), CompQuadForm(v.1.4.3), lifecycle(v.1.0.1), pacman(v.0.5.1), rstatix(v.0.7.0), XML(v.3.99-0.10), MASS(v.7.3-57), scales(v.1.2.0), hms(v.1.1.1), parallel(v.4.2.0), yaml(v.2.3.5), gridExtra(v.2.3), yulab.utils(v.0.0.5), sass(v.0.4.1), stringi(v.1.7.6), highr(v.0.9), boot(v.1.3-28), zip(v.2.2.0), commonmark(v.1.8.0), rlang(v.1.0.2), pkgconfig(v.2.0.3), systemfonts(v.1.0.4), rncl(v.0.8.6), evaluate(v.0.15), lattice(v.0.20-45), labeling(v.0.4.2), tidyselect(v.1.1.2), magrittr(v.2.0.3), R6(v.2.5.1), generics(v.0.1.2), DBI(v.1.1.2), pillar(v.1.7.0), haven(v.2.5.0), withr(v.2.5.0), abind(v.1.4-5), car(v.3.0-13), modelr(v.0.1.8), crayon(v.1.5.1), utf8(v.1.2.2), tzdb(v.0.3.0), rmarkdown(v.2.14), progress(v.1.2.2), reprex(v.2.0.1), digest(v.0.6.29), webshot(v.0.5.3), xtable(v.1.8-4), R.cache(v.0.15.0), numDeriv(v.2016.8-1.1), gridGraphics(v.0.5-1), R.utils(v.2.11.0), munsell(v.0.5.0) and bslib(v.0.3.1)