6.1 Example Calculation

We start by showing what has been implemented — the resampling based approach:

# load the convey package
library(convey)

# load the survey library
library(survey)

# load the laeken library
library(laeken)

# load the synthetic EU statistics on income & living conditions
data(eusilc)

# make all column names lowercase
names(eusilc) <- tolower(names(eusilc))

# construct a survey.design
# using our recommended setup
des_eusilc <-
  svydesign(
    ids = ~ rb030 ,
    strata = ~ db040 ,
    weights = ~ rb050 ,
    data = eusilc
  )

# create bootstrap-based design object
des_eusilc_rep <- as.svrepdesign( des_eusilc , type = "bootstrap" )

# immediately run the convey_prep function on it
des_eusilc <- convey_prep( des_eusilc )
des_eusilc_rep <- convey_prep(des_eusilc_rep)

# estimate gini by gender, with and without covariance matrices
ginis.rep <- svyby( ~ py010n , ~rb090 , des_eusilc_rep , svygini , na.rm = TRUE , covmat = TRUE )
ginis.rep.nocov <- svyby( ~ py010n , ~rb090 , des_eusilc_rep , svygini , na.rm = TRUE , covmat = FALSE )

# variance of the gini difference: naive
svycontrast( ginis.rep.nocov , quote( `male` - `female` ) )
## Warning in vcov.svyby(stat): Only diagonal elements of vcov() available
##             nlcon     SE
## contrast -0.15006 0.0065
# variance of the gini difference: accounting for covariance
svycontrast( ginis.rep , quote( `male` - `female` ) )
##             nlcon     SE
## contrast -0.15006 0.0068

Notice that the warning Only diagonal elements of vcov() available means that the covariance terms are null for the vcov( ginis.rep.nocov ) result.

While not implemented yet, a linearization-based approach can be applied using:

# estimate gini by gender, with and without covariance matrices
ginis.lin.nocov <- svyby( ~ py010n , ~rb090 , des_eusilc , svygini , na.rm = TRUE , covmat = FALSE )
gini.female <- svygini( ~ py010n , subset( des_eusilc , rb090 == "female" ), na.rm = TRUE , influence = TRUE )
gini.male   <- svygini( ~ py010n , subset( des_eusilc , rb090 == "male" ), na.rm = TRUE , influence = TRUE )

# estimate full variance-covariance matrix
linmat <- rep( 0 , nrow( des_eusilc$variables ) )
linmat[ attr( gini.female , "index" ) ] <- attr( gini.female , "influence" )
linmat[ attr( gini.male , "index" ) ] <- attr( gini.male , "influence" )
linmat <- linmat * des_eusilc$prob
lintot <- svyby( linmat , ~rb090 , des_eusilc , svytotal , covmat = TRUE )

# compare the variance-covariance matrices
vcov( ginis.lin.nocov ) # naive, with null covariance terms
## Warning in vcov.svyby(ginis.lin.nocov): Only diagonal elements of vcov()
## available
##                male       female
## male   2.954413e-05 0.000000e+00
## female 0.000000e+00 2.292063e-05
vcov( lintot ) # full, with non-null covariance terms
##                male       female
## male   2.954413e-05 2.764584e-10
## female 2.764584e-10 2.292063e-05
# SE of the gini difference: naive
SE( svycontrast( ginis.lin.nocov , quote( `male` - `female` ) ) )
## Warning in vcov.svyby(stat): Only diagonal elements of vcov() available
##    contrast 
## 0.007243256
# SE of the gini difference: accounting for covariance
SE( svycontrast( lintot , quote( `male` - `female` ) ) )
##    contrast 
## 0.007243218

Notice that the warning Only diagonal elements of vcov() available means that the covariance terms are null for the vcov( ginis.lin.nocov ) result.