Content Disclaimer Copyright @2020. All Rights Reserved. |
Links : Home Index (Subjects) Contact StatsToDo
Explanations and References
This page presents 4 programs related to sample size requirements when comparing 2 means, and tables of these sample sizes.
Sample Size Tables
The programs and tables on this page assumes that the measurements are continuous and normally distributed. The following 4 programs are available on this page
ReferencesArmitage P (1980) Statistics in Medical Research. Blackwell Scientific Publication, Oxford. ISBN 0 632 05430 1 p.120Machin D, Campbell M, Fayers, P, Pinol A (1997) Sample Size Tables for Clinical Studies. Second Ed. Blackwell Science IBSN 0-86542-870-0 p. 24-25 Johanson GA and Brooks GP (2010) Initial Scale Development: Sample Size for Pilot Studies. Educational and Psychological Measurement Vol.70,Iss.3;p.394-400
These tables provides sample sizes (per group) needed to compare the means of 2
groups, assuming that the data is normally distributed, that the variance
in the groups are similar, and that the sample sizes for the groups are the same
Javascript Programs
- Power = 1-β - α = Probability of Type I Error - es = Effect size = difference between means / population of within group Standard Deviation - column header 1T and 2T represents 1 and 2 tail - sample size is that for each of the 2 groups Effect Size 0.05 to 1
This subpanel presents 4 R programs related to sample size for 2 unpaired means
Program 1: Estimating sample size# Pgm 1: Sample Size # data entry dat = (" Alpha Power Diff SD 0.05 0.8 0.5 1.0 0.01 0.8 0.5 1.0 0.05 0.9 0.5 1.0 0.01 0.9 0.5 1.0 ") df <- read.table(textConnection(dat),header=TRUE) # conversion to data frame # vectors to store results SSiz1Tail <- vector() SSiz2Tail <- vector() # Calculations delta <- abs(df$Diff / df$SD) # effect size zb <- abs(qnorm(1 - df$Power)) # z for beta # 1 tail za <- abs(qnorm(df$Alpha)) # 1 tail z for alpha f <- (za + zb) / delta SSiz1Tail <- append(SSiz1Tail,ceiling(2.0 * f**2 + za**2 / 4.0)) # 1 tail sample size # 2 tail za <- abs(qnorm(df$Alpha / 2)) # 2 tail z for alpha #za f <- (za + zb) / delta SSiz2Tail <- append(SSiz2Tail,ceiling(2.0 * f**2 + za**2 / 4.0)) # 2 tail sample size # append results to data frame df$SSiz1Tail <- SSiz1Tail df$SSiz2Tail <- SSiz2Tail df # show data frame with input data and rsultsThe results are as follows. Sample size is for each group > df # show data frame with input data and rsults Alpha Power Diff SD SSiz1Tail SSiz2Tail 1 0.05 0.8 0.5 1 51 64 2 0.01 0.8 0.5 1 82 96 3 0.05 0.9 0.5 1 70 86 4 0.01 0.9 0.5 1 106 121 Program 2: Power# Pgm2: Power # data entry dat = (" Alpha Diff N1 SD1 N2 SD2 0.05 0.5 64 1.0 64 1.0 0.01 0.5 96 1.0 96 1.0 0.01 0.5 64 1.0 96 1.0 0.05 0.5 86 1.0 86 1.0 0.01 0.5 121 1.0 121 1.0 0.01 0.5 86 1.0 121 1.0 ") df <- read.table(textConnection(dat),header=TRUE) # conversion to data frame # vector to store results Power1Tail <- vector() Power2Tail <- vector() # Calculations se <- sqrt(((df$N1 - 1) * df$SD1**2 + (df$N2 - 1) * df$SD2**2) / (df$N1 + df$N2 - 2)) n <- (df$N1 + df$N2) / 2 delta <- abs(df$Diff / se) # 1 tail za <- abs(qnorm(df$Alpha)) # 1 tail z for alpha zb <- delta * sqrt(n / 2.0 - za / 8.0) - za Power1Tail <- append(Power1Tail,pnorm(zb)) # power 1 tail # 2 tail za <- abs(qnorm(df$Alpha / 2)) # 2 tail z for alpha zb <- delta * sqrt(n / 2.0 - za / 8.0) - za Power2Tail <- append(Power2Tail,pnorm(zb)) # power 2 tail # append ewsults to data frame df$Power1Tail <- Power1Tail df$Power2Tail <- Power2Tail df # show data frame with input data and resultsThe results are as follows > df # show data frame with input data and results Alpha Diff N1 SD1 N2 SD2 Power1Tail Power2Tail 1 0.05 0.5 64 1 64 1 0.8798970 0.8044474 2 0.01 0.5 96 1 96 1 0.8701805 0.8096574 3 0.01 0.5 64 1 96 1 0.7951479 0.7169130 4 0.05 0.5 86 1 86 1 0.9480270 0.9048008 5 0.01 0.5 121 1 121 1 0.9398340 0.9036948 6 0.01 0.5 86 1 121 1 0.8962385 0.8437134 Program 3: Confidence Interval# Pgm3: Confidence Interval # subroutine to calculate confidence interval # subroutine to calculate confidence interval CalCI <- function(alpha, n1, sd1, n2, sd2) { se = sqrt(((n1 - 1) * sd1**2 + (n2 - 1) * sd2**2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2)) degfm = n1 + n2 - 2 t1 = qt(1 - alpha, degfm) ci1 = t1*se t2 = qt(1 - alpha / 2, degfm) ci2 = t2 * se return (c(se, ci1, ci2)) }The main program for confidence interval, using the subroutine CalCI # data entry dat = (" Pc N1 SD1 N2 SD2 95 100 18.5 100 16.8 99 100 18.5 100 16.8 95 50 8.5 30 5.2 99 50 8.5 30 5.2 95 50 400 60 380 ") df <- read.table(textConnection(dat),header=TRUE) # conversion to data frame # df # vectors for results SE <- vector() # Standard Error CI1 <- vector() # Confidence interval 1 tail CI2 <- vector() # Confidence interval 2 tail # calculations # 1 tail for(i in 1 : nrow(df)) { alpha <- (1 - df$Pc[i] / 100) # alpha 1 tail ar <- CalCI(alpha,df$N1[i], df$SD1[i], df$N2[i], df$SD2[i]) #ar <- CalCI(1 - df$Pc[i] / 100,df$N1[i], df$SD1[i], df$N2[i], df$SD2[i]) SE <- append(SE, ar[1]) # Standard Error CI1 <- append(CI1, ar[2]) # Confidence interval 1 tail CI2 <- append(CI2, ar[3]) # Confidence interval 2 tail } df$SE <- SE df$CI1 <- CI1 df$CI2 <- CI2 dfThe results are as follows
> df # Data frame withinput data and results Pc N1 SD1 N2 SD2 SE CI1 CI2 1 95 100 18.5 100 16.8 2.498980 4.129778 4.928032 2 99 100 18.5 100 16.8 2.498980 5.860928 6.499565 3 95 50 8.5 30 5.2 1.719553 2.862410 3.423366 4 99 50 8.5 30 5.2 1.719553 4.084128 4.540204 5 95 50 400.0 60 380.0 74.526406 123.645653 147.724266 Program 4: Pilot studyPlease Note: This program shares the same subroutine as Program 3 to calculate SE and CI, but the subroutine is not duplicated here# Program 4. Pilot study # Parameters pc = 95 # % confidence sd = 1.0 # within group or population SD intv = 5 # interval maxN = 100 # maximum sample size # vectors for results SSiz <- vector() # sample size CI1 <- vector() # confidence interval 1 tail Diff1 <- vector() # difference in CI from previous row 1 tail DecCase1 <- vector() # decrease in CI per case increase 1 tail PDCase1 <- vector() # % decrease in CI per case increase 1 tailCI1 <- vector() # confidence interval 1 tail CI2 <- vector() # confidence interval 2 tail Diff2 <- vector() # difference in CI from previous row 2 tail DecCase2 <- vector() # decrease in CI per case increase 2 tail PDCase2 <- vector() # % decrease in CI per case increase 2 tail # Calculations alpha = 1 - pc / 100 # first row n = intv SSiz <- append(SSiz,n) ar <- CalCI(alpha, n, sd, n, sd) ci1 = ar[2] * 2 CI1 <- append(CI1,sprintf(ci1, fmt="%#.4f")) # confidence interval 1 tail Diff1 <- append(Diff1,0) # difference in CI from previous row 1 tail DecCase1 <- append(DecCase1,0) # decrease in CI per case increase 1 tail PDCase1 <- append(PDCase1,0) # % decrease in CI per case increase 1 tailCI1 <- vector() # confidence interval 1 tail ci2 = ar[3] * 2 CI2 <- append(CI2,sprintf(ci2, fmt="%#.4f")) # confidence interval 1 tail Diff2 <- append(Diff2,0) # difference in CI from previous row 1 tail DecCase2 <- append(DecCase2,0) # decrease in CI per case increase 1 tail PDCase2 <- append(PDCase2,0) # % decrease in CI per case increase 1 tailCI1 <- vector() # confidence interval 1 tail # subsequent rows while(n < maxN) { n = n + intv SSiz <- append(SSiz,n) ar <- CalCI(alpha, n, sd, n, sd) oldci1 = ci1 ci1 = ar[2] * 2 CI1 <- append(CI1,sprintf(ci1, fmt="%#.4f")) # confidence interval 1 tail diff1 = oldci1 - ci1 Diff1 <- append(Diff1,sprintf(diff1, fmt="%#.4f")) # difference in CI from previous row 1 tail decCase1 = diff1 / intv DecCase1 <- append(DecCase1,sprintf(decCase1, fmt="%#.4f")) # decrease in CI per case increase 1 tail pDCase1 = sprintf(decCase1 / oldci1 * 100, fmt="%#.1f") PDCase1 <- append(PDCase1,pDCase1) # % decrease in CI per case increase 1 tail oldci2 = ci2 ci2 = ar[3] * 2 CI2 <- append(CI2,sprintf(ci2, fmt="%#.4f")) # confidence interval 2 tail diff2 = oldci2 - ci2 Diff2 <- append(Diff2,sprintf(diff2, fmt="%#.4f")) # difference in CI from previous row 2 tail decCase2 = diff2 / intv DecCase2 <- append(DecCase2,sprintf(decCase2, fmt="%#.4f")) # decrease in CI per case increase 2 tail pDCase2 = sprintf(decCase2 / oldci2 * 100, fmt="%#.1f") PDCase2 <- append(PDCase2,pDCase2) # % decrease in CI per case increase 2 tail } df <- data.frame(SSiz,CI1,Diff1,DecCase1,PDCase1,CI2,Diff2,DecCase2,PDCase2) df # display results in data frameThe results are as follows. Please note:
> df # display results in data frame SSiz CI1 Diff1 DecCase1 PDCase1 CI2 Diff2 DecCase2 PDCase2 1 5 2.3522 0 0 0 2.9169 0 0 0 2 10 1.5510 0.8012 0.1602 6.8 1.8791 1.0378 0.2076 7.1 3 15 1.2423 0.3087 0.0617 4.0 1.4959 0.3832 0.0766 4.1 4 20 1.0663 0.1760 0.0352 2.8 1.2803 0.2156 0.0431 2.9 5 25 0.9488 0.1175 0.0235 2.2 1.1374 0.1430 0.0286 2.2 6 30 0.8632 0.0856 0.0171 1.8 1.0337 0.1037 0.0207 1.8 7 35 0.7973 0.0659 0.0132 1.5 0.9540 0.0797 0.0159 1.5 8 40 0.7444 0.0528 0.0106 1.3 0.8903 0.0637 0.0127 1.3 9 45 0.7009 0.0435 0.0087 1.2 0.8379 0.0524 0.0105 1.2 10 50 0.6642 0.0367 0.0073 1.0 0.7938 0.0441 0.0088 1.1 11 55 0.6328 0.0315 0.0063 0.9 0.7560 0.0378 0.0076 1.0 12 60 0.6054 0.0274 0.0055 0.9 0.7231 0.0329 0.0066 0.9 13 65 0.5813 0.0241 0.0048 0.8 0.6942 0.0289 0.0058 0.8 14 70 0.5598 0.0214 0.0043 0.7 0.6685 0.0257 0.0051 0.7 15 75 0.5406 0.0192 0.0038 0.7 0.6454 0.0231 0.0046 0.7 16 80 0.5232 0.0174 0.0035 0.6 0.6246 0.0208 0.0042 0.6 17 85 0.5074 0.0158 0.0032 0.6 0.6057 0.0189 0.0038 0.6 18 90 0.4930 0.0144 0.0029 0.6 0.5883 0.0173 0.0035 0.6 19 95 0.4797 0.0133 0.0027 0.5 0.5724 0.0159 0.0032 0.5 20 100 0.4674 0.0123 0.0025 0.5 0.5578 0.0147 0.0029 0.5 |