# S&W:n kirjan kotisivut: http://wps.aw.com/aw_stock_ie_2/ # Harjoitukset 2: data datacps04 = t(matrix(scan("C:/HY-Data/JPLUOTO/OPETUS/EKONOMETRIAN_PERUSKURSSI/LUENNOT/DATA/CPS04.txt"),nrow=4)) colnames(datacps04) = c("AHE","bachelor","female","age") AHE = datacps04[,"AHE"] age = datacps04[,"age"] # # datamat = t(matrix(scan("C:/HY-Data/JPLUOTO/OPETUS/EKONOMETRIAN_PERUSKURSSI/test_data.txt"),nrow=13)) datam = matrix(datamat,nc=13) colnames(datam) = c("enrl_tot","teachers","calw_pct","meal_pct","computer","testscr","comp_stu","expn_stu","str","avginc","el_pct","read_scr","math_scr") Test_score = datam[,"testscr"] STR = datam[,"str"] plot(STR,Test_score) hist(Test_score) mean(Test_score) var(Test_score) cor(STR,Test_score) cov(STR,Test_score) #Ehdollinen jakauma hist(Test_score[STR<20]) mean(Test_score[STR<20]) mean(Test_score[STR<20]) - mean(Test_score[STR>=20]) ## Yksinkertainen satunnaisotanta (simple random sampling) 1:100 sample(1:100,10,prob=rep(1/100,100)) ## Suurten numerojen laki ## n = 2 ymean = numeric(1000) for(i in 1:1000) ymean[i] = mean(rbinom(n,1,0.78)) hist(ymean) mean(ymean) var(ymean) hist((ymean-0.78)/(sqrt(0.78*0.22/n))) n = 5 ymean = numeric(1000) for(i in 1:1000) ymean[i] = mean(rbinom(n,1,0.78)) hist(ymean) mean(ymean) var(ymean) hist((ymean-0.78)/(sqrt(0.78*0.22/n))) n = 25 ymean = numeric(1000) for(i in 1:1000) ymean[i] = mean(rbinom(n,1,0.78)) hist(ymean,breaks=25) mean(ymean) var(ymean) hist((ymean-0.78)/(sqrt(0.78*0.22/n))) n = 100 ymean = numeric(1000) for(i in 1:1000) ymean[i] = mean(rbinom(n,1,0.78)) hist(ymean) mean(ymean) var(ymean) hist((ymean-0.78)/(sqrt(0.78*0.22/n))) ############################### # Test_score:n ja STR:n jakaumat hist(Test_score) hist(STR) mean(Test_score) sd(Test_score) mean(STR) sd(STR) quantile(STR,c(0.1,0.25,0.4,0.5,0.6,0.75,0.9)) quantile(Test_score,c(0.1,0.25,0.4,0.5,0.6,0.75,0.9)) #Lineaarinen regressio plot(STR,Test_score) abline(lm(Test_score~STR)) summary(lm(Test_score~STR)) corr(STR,Test_score) cov(STR,Test_score)/var(STR) # Kulmakerroin mean(Test_score)-(cov(STR,Test_score)/var(STR))*mean(STR) # Vakio Yhat = 698.9339 -2.2799*STR sum((Yhat-mean(Test_score))^2)/sum((Test_score-mean(Test_score))^2) # R2 cor(STR,Test_score)^2 # Tai #Heteroskedastisuus korjatut keskivirheet library(sandwich) library(lmtest) model = lm(Test_score~STR) model$newse = vcovHC(model) coeftest(model,model$newse) Y = 698.9339 - 2.2799*15 + sqrt(sum(lm(Test_score~STR)$resid^2)/418)*rnorm(10000) plot(density(Y))