Loading data and feature selected variables

## load LASSO feature selected variables
coefs <- read.csv("../data/lasso_only_numeric.csv", 
                  stringsAsFactors = FALSE)
## drop intercept coeficient
coefs <- filter(coefs, coefficient != "(Intercept)")

top20vars <- coefs$coefficient[1:20]


## load cleaned data
vih_data <- read.csv("../data/cleandata.csv", stringsAsFactors = FALSE)

## selecting CD4 increase variable as output for lm
vih_data$"output" <- vih_data$Delta_CD4_year1

## processing data for lasso
input <- vih_data[, names(vih_data) %in% c(top20vars, "output")]

Fitting the linear model

## perform linear model
lModel <- lm(output~., data = input)
preds <- predict(lModel, newdata = input)

summary(lModel)
## 
## Call:
## lm(formula = output ~ ., data = input)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -81.384 -22.534   3.531  24.990  74.294 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         236.17815   87.78879   2.690 0.009916 ** 
## LeucocitosS0         -6.42580    3.16210  -2.032 0.047938 *  
## HematocritoS0        -1.39398    1.25261  -1.113 0.271550    
## CreatininaS0        -34.28025   24.06836  -1.424 0.161112    
## CD4porcentajeS0     -19.33717    2.58284  -7.487 1.71e-09 ***
## CocCD4_CD8_S08      -16.58811  105.91933  -0.157 0.876237    
## CD4porcentajeS12      5.58676    3.71513   1.504 0.139472    
## CocCD4_CD8_S12       50.72623  161.39470   0.314 0.754715    
## GlucosaS24           -0.87249    0.52366  -1.666 0.102482    
## Acido_uricoS24       -3.75077    4.41956  -0.849 0.400457    
## CreatininaS24        88.97258   37.70871   2.359 0.022601 *  
## TGO_S24              -0.18963    0.16837  -1.126 0.265895    
## CD4_S24              -0.05159    0.09466  -0.545 0.588387    
## CocCD4_CD8_S24       72.04397  138.84589   0.519 0.606334    
## CocCD4_CD8_S39      161.35314   78.03988   2.068 0.044332 *  
## CD4porcentajeS52      8.87100    3.61355   2.455 0.017933 *  
## CD8_S52               0.18229    0.02039   8.939 1.27e-11 ***
## CD8porcentajeS52     -4.28224    1.13940  -3.758 0.000481 ***
## CocCD4_CD8_S52       61.31527  146.59432   0.418 0.677700    
## Num_eventos_6_meses   3.95161    5.54243   0.713 0.479463    
## CV_S052              -0.41452    0.42214  -0.982 0.331264    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.8 on 46 degrees of freedom
## Multiple R-squared:  0.9076, Adjusted R-squared:  0.8675 
## F-statistic:  22.6 on 20 and 46 DF,  p-value: < 2.2e-16