##Sat Oct 3 10:33:05 PDT 2009 ## A more relaxed week 7: review of R for those who would rather ## nail down the basics and postpone the fancy bits about proportional ## hazard for another time. ## ## This file has a bunch of R exercises with the answers. This is purely ## a review of material that we have coverd earlier. The intention is ## to provide some drills in the hopes of developing some intuition ## and thereby making R code less mysterious ## ## Read this file into your ~/213/Week7/exercise7.r buffer by telling emacs: ## C-x i to "include a file" and then editting the suggested path to: ## ~carlm/213/PropHazII/reviewR.r ## ## The answers are in : ## http://www.demog.berkeley.edu/213/PropHazII/reviewRanswers.r ## ######################################################################## ######################################################################## ## Quesstions AND ANSWERS ######################################################################## ## 1) create a vector called "vector1" of integers between 7and32 ## can you think of two ways of doing this? vector1 <- 7:32 ## or vector1<- seq(from=7,to=32) ## or vector1<-vector() for(i in 7:32){ vector1[i]<-i } ## 2) create a vector called "vector2" consisting of all of the ## elements of vector1 divided by 3 vector2<- vector1/3 ## 3) what will this expression produce and why? vector1 * c(1,0) [1] 7 0 9 0 11 0 ... ## R recycles the shorter vector so every other element will be multiplied by 0 ## 4) crate a vector cs.vector1 whose elements are the cumulative sum ## of the elements of vector1. In other words the ith element of ## cs.vector1 contains the sum the 1..i elements of vector1. cs.vector1 <- cumsum(vector1) ## or cs.vector1<-vector() for (i in 1:length(vector1)){ cs.vector1[i]<- sum(vector1[1:i]) } ## 5) print to the screen, vector 1 in reverse order: rev(vector1) ## or vector1[length(vector1):1] ## or sort(vector1,decreasing=T) ## since vector1 is in ascending order ## or ######################################################################## ## 6) "modes" of vectors ######################################################################## ## Vectors in R all have a "mode" the possible modes are "numeric", ## "character" and "logical". Identify the mode of each of the ## following expressions: 1:7 (1:7)*T (1:7) > T (1:7) (-7:T) T:F "1":"7" (1:7) > "T" #### > mode(1:7) [1] "numeric" > mode((1:7)*T) [1] "numeric" > mode((1:7) > T) [1] "logical" > mode((1:7)) [1] "numeric" > mode((-7:T)) [1] "numeric" > mode(T:F) [1] "numeric" > mode("1":"7") [1] "numeric" > mode((1:7) > "T") [1] "logical" > ######################################################################## ##7) Using square brackets [ ] to select elements from objects: ######################################################################## ## First create rv2 as below then write the expressions asked for below rv2<- rnorm(100) ## creates a vector rv2 which has 100 random normals ##a) the last 5 elements of rv2 rv2[(length(rv2)-5):length(rv2)] ##b) the largest 5 elements of rv2 sort(rv2)[(length(rv2)-5):length(rv2)] ##c) the elements of rv2 which are greater than the 2 rv2[rv2 > 2] ##d) the index (that is the position in the vector) of the 5 largest ## elements of rv2 (1:length(rv2))[rv2 >= sort(rv2,decreasing=T)[5]] ######################################################################## ##8) Arrays/Matricies: ######################################################################## ## A vector with a dimension "attribute" is an ##array. If the dimension attribute has length 2, it is also a ##"matrix". ## wirte three different expressions that will create a 20X5 matrix from a ## vector such as rv2 above ##(1) using the matrix() function: matrix(rv2,nrow=10) ##(2) using the array() function array(rv2,c(10,10)) ##(3) by setting the dimension attribute dim(rv2)<-c(10,10) ######################################################################## ## 9) Using square bracket selection with matrices: ######################################################################## ## use mat1 to do what is asked for below: mat1<- array(NA,c(10,8)) ## change the 8th column of mat1 to be all 8s mat1[,8]<-1 ## change the 2nd row of mat1 to be 2s BUT do not change the element ## in the 8th column mat1[2,1:7]<-2 ## change the 10th row of mat1 to 2,1,2,1 ... including the 8th ## column mat1[10,]<- 2:1 ## change every element whose row index is greater than it's column ## index to 5 mat1[row(mat1) > col(mat1)]<-5 ## or mat1[lower.tri(mat1)]<- 5 ## or for (i in 1:dim(mat1)[1]){ for (j in 1:dim(mat1)[2]){ if(i > j){ mat1[i,j]<-5 } } } ######################################################################## ## 10)Loops ######################################################################## ## write a for loop that creates a vector containing the numbers ## between 1 and 1000 which are divisible by 27 (HINT: remember the ## modula operator "%%" res<-vector() for(i in 1:1000){ if( i %% 27 == 0){ res<-c(res,i) } } ## write a while loop that does the same thing res<-vector() i<-0 while(i <= (1000 - 26)){ i<- i+ 27 res<-c(res,i) } ## create a 10X9 matrix of NA and then use two for loops to populate ## each element of the matrix with product of the row and the column ## number mat<-matrix(NA,10,9) for ( i in 1:nrow(mat)){ for (j in 1:ncol(mat)){ mat[i,j]<- i*j } } ######################################################################## ## 11) Using apply() to operate on rows and columns ######################################################################## ## Do the following pointless tasks using the matrix that you produced ## in the previous exercise (a 10X9 matrix where element i,j = i*j ## use apply to find the column sums apply(mat,2,sum) ## use apply (and selection with [ ] to find the colum sums of rows ## 5-9 apply(mat[5:9,],2,sum) ## Divide each element of mat by it's collumn mean. In other words, ## write an expression that evaluates to a matrix with the same ## dimensions as mat and with each element being mat[i,j]/mean(mat[,j]) mat/apply(mat,1,mean) ## write an expression that evaluates to a matrix with each element ## divided by the mean of the elements in its COLUMN (Why is this so ## much trickier than it looks) t(t(mat)/apply(mat,2,mean)) ######################################################################## ## 12) factors and logical expressions, the %in% operator and tapply() ######################################################################## ## read in the ACS05 data from a few weeks ago library(foreign) acs<-read.dta(file='/data/commons/carlm/ACS05/ipumsACS05.dta') acs.small<-acs[acs$serial %in% sort(unique(acs$serial))[1:1000],] ## What does this mean? is.numeric(acs$age) ## and this: levels(acs$age) ## Would this work to create a numeric age variable? acs.small$Age<-as.numeric((acs.small$age)) ## Hint: nope what's wrong with it? acs.small$Age<-as.numeric((acs.small$age)) -1 ## write a logical expression that evaluates to true if the ## observation (in acs.small) is under 18 and not a Child of the Head. acs.small$Age < 18 & acs.small$relate != "Child" ## use the expression you just wrote to find the number of people in ## the small sample who are under 18 and NOT listed as Child of Head sum(acs.small$Age < 18 & acs.small$relate != "Child") ## the %in% operator to find the intersection of two vectors ## EXAMPLE: How many households include both a parent and a child of the Head sum(unique(acs$serial[acs$relate == "Child"]) %in% unique(acs$serial[acs$relate == "Parent"])) ## How many Children (of head) live in households that also contain a ## parent of the head? sum((acs$serial[acs$relate == "Child"]) %in% unique(acs$serial[acs$relate == "Parent"])) ## How many households contain at least 2 children of the Head sel<-acs$relate=="Child" table(tapply(acs$pernum[sel],acs$serial[sel],length)) ## OR table(table(acs$serial[acs$relate=="Child"])) ##How many households include at least 2 children ## and a parent of the ## Head collect the serial of each child's hh where there is also a ## Parent of Head rresiding chWp<-acs$serial[ acs$serial[acs$relate=="Child"] %in% unique(acs$serial[acs$relate == "Parent"])] sum(table(chWp)>=2) ## use logical expressions and clever selection with [ ] to find the ## important demographic information asked for below ## Example -- find the mean age difference between Head and Spouse in ##each household that has both sp.serial<-unique(acs.small$serial[acs.small$relate == "Spouse"]) sel<- acs.small$serial %in% sp.serial ageOfHead<-acs.small$Age[sel & acs.small$relate == "Head/Householder"] ageOfSpouse<-acs.small$Age[sel & acs.small$relate == "Spouse"] mean(ageOfHead - ageOfSpouse) ## How many same sex couples are there in the full acs sample sp.serial<-unique(acs$serial[acs$relate == "Spouse"]) sel<- acs$serial %in% sp.serial sexOfHead<-acs$sex[sel & acs$relate == "Head/Householder"] sexOfSpouse<-acs$sex[sel & acs$relate == "Spouse"] sum(sexOfHead == sexOfSpouse) ## What is the average number of children in a female headed household sh.serial<-unique(acs$serial[acs$relate == "Head/Householder" & acs$sex == "Female"]) sel<- acs$serial %in% sh.serial & acs$relate == "Child" mean(tapply(acs$pernum[sel],acs$serial[sel],length)) ## Assuming that we have a numeric variable called Age ## use tapply() to find the mean age of males and females in acs.small tapply(acs.small$Age,acs.small$sex,mean) ## use tapply() to find the mean age of spouses tapply(acs.small$Age,acs.small$relate,mean) ## Use tapply() in addition to clever selection tricks and logical ## expressions to find these interesting demographic fun facts: ## What is the average size of a household that includes both Head and ## Spouse sp.serial<-unique(acs$serial[acs$relate == "Spouse"]) sel<- acs$serial %in% sp.serial mean(tapply(acs$pernum[sel],acs$serial[sel],length)) ## Find the median age of the oldest children (of the head) in each hh in the ## acs.small sample sel<-acs.small$relate =="Child" median(tapply(acs.small$Age[sel],acs.small$serial[sel],max))