###################################################################### ## Mon Sep 19 13:00:04 PDT 2005 ## ## ~carlm/213/DataStructures/demonstration.r ## Demog 213 demonstration: contains illuminating and thaught ## provoking examples of the construction and use of matrices, arrays, ## data frames, lists and factors. ## ## includes a bit on apply(), tapply() to make this weeks homework doable ## You can load this file into your emacs buffer by typing: ## C-x i ~carlm/213/DataStructures/demonstration.r ###################################################################### ## Chapter 3 Objects, Modes and Attributes ## All *data* objects in R are either vectors or "lists": all vectors ## have a mode (which applies equally to each of its elements) and a ## length -- which indicates how many elements it has. You can change ## the mode and length of a vector very easily. small.vector<- 9 mode(small.vector) length(small.vector) length(small.vector)<-10 ; small.vector mode(small.vector)<-"character" ; small.vector ## vectors with "ATTRIBUTES" are often called by other names ## there are lots of attributes, one that we use a lot is the ## dimesionality. or "dim" dim(small.vector) dim(small.vector)<-c(5,2) small.vector is.array(small.vector) dim(small.vector)<- NULL is.array(small.vector) small.vector ## Constructing matrices and arrays from vectors the ## array() and matrix() functions are used to construct arrays and ## matrices. ## Which brings us to Chapter 5 : Arrays and Matricies ## more traditional ways of creating matrices are with the: ## matrix(), array(), cbind() and rbind() functions small.vector<-1:10 dim(small.vector)<-c(2,5) small.vector dim(small.vector)<-NULL vect1<- 1:42 mat1<- matrix(1:62,nrow=7,ncol=6) mat1 matrix(1:60,nrow=7,byrow=T) ## it is not necessary to specify both nrow and ncol matrix(vect1,nrow=7) ## Notice the difference when the byrow=T argument is added matrix(vect1,nrow=7,byrow=T) matrix(data=vect1,nrow=7,byrow=T) ## array takes a *vector* of dimensions as it's 'dim' argument) mat1<-array(42:1,dim=c(6,7)) ; mat1 mat1<-array(42:1,dim=c(6,7,2)) ; mat1 ## cbind combines things as columns of a matrix mat2<- cbind(1:5, 1:7, 9999); mat2 ## rbind combines things as rows: mat3<- rbind(1,2,3,4,5, 1:7); mat3 ## square brackets are used to select into and out of vectors and arrays mat3[6,7] <- 43; mat3 mat3[6,7] == 43; mat3[3,] <- 0; mat3 mat3[,7] mat3[5:6,6:7] mat3[5:6,6:7]<- -88; mat3 mat3[5:6,6:7]<- 1:2; mat3 ## matrix and vector arithmetic ## in class 08 even<-c(2,4,6,8) mat3[3,seq(1,dim(mat3)[2],by=2) ] mat3[4,1:dim(mat3)[2] %% 2 == 1] mat3[4,1:dim(mat3)[2] %% 2 ] Amat<- matrix(1:42,nrow=6); Amat Bmat<- matrix(seq(0,70,length=36),nrow=6); Bmat ## matrix multiplication (standard) Bmat %*% Amat dim(Bmat); dim(Amat) ## what about these ? Bmat * Amat[,1:6] Bmat / Amat[,1:6] Bmat * Amat[,3] Bmat * Amat[,1:2] as.vector(Bmat) * Amat[1:2,3] ## Back to Chapter 4 -- "Factors" Ordered and unordered whatzit<-c("tomorrow","and","tomorrow","and","tomorrow","creeps","in", "and","etc"); whatzit mode(whatzit) length(whatzit) attributes(whatzit) whatzitnow<-factor(whatzit) whatzitnow mode(whatzitnow) length(whatzitnow) attributes(whatzitnow) whatzitnow == "tomorrow" whatzit == "tomorrow" whatzitnow == 3 as.numeric(whatzit) as.numeric(whatzitnow) ##------------------- ## A dataframe is a collection of data vectors arranged so that the ## i'th element of each vector corresponds to the same observation ## For example suppose we had a file wherein each row contained the ## first name, last name, parish, sex, year and age at death of a ## person who lived in a certain part of Croatia during the 18th and ## 19th centuries. Each row contains information on a particular person, each ## collumn contains a particular piece of information for each person ## in the dataset. A dataframe can be thought of (and treated) like a ## matrix but there are a couple of additional things one can do with a ## dataframe that one cannot do with a matrix. ## --- ## read.table() constructs a dataframe from a file. It can be told what ## to expect as a delimiter (sep) and whether or not the first row ## contains the variable names (header), By default, read.table() ## assumes that the '#' character indicates a comment. It is odd to ## encounter comments in a data file, it is much less odd to encounter ## the '#' in a variable name. The as.is=F argument makes ## read.table() from convert all character data into type ## "factor". We'll encounter factors later. as.is=F is the default, ## when you want character vectors to be read as character vectors you ## would use as.is=T ## --- ### READING an ascii file into a data.frame croats<- read.table('/hdir/0/carlm/213/DataStructures/croats.dat', sep="\t", header=T,comment="",as.is=F) ## examine the structure of this new object, croats mode(croats) length(croats) class(croats) dim(croats) names(croats) mode(croats$fname) class(croats$fname) class(croats$lname) class(croats$parish) class(croats$sex) class(croats$year) class(croats$agedt) ## with factors, you can do tapply -- a very useful function tapply(croats$agedt,croats$parish,mean) tapply(croats$agedt,croats$parish,mean) / 12 tapply(croats$agedt,list(croats$parish,croats$sex),mean) / 12 tapply(croats[,"agedt"], list(cut(croats[,"year"],breaks=5),croats[,"sex"]), median) tapply(croats$agedt,list(croats$parish,croats$sex),mean) / tapply(croats$agedt,list(croats$parish,croats$sex),sd) ## apply is VERY different from tapply -- it operates on arrays ## by dimension rather than by subsets defined by factors apply(croats[,5:6] , 2, mean) ## or if you like to name those arugments -- note the odd capitalization ## also note, perhaps the subsetting tricks used: apply(X=croats[(croats[,"year"] <1800), 5:6] , MARGIN= 2, FUN= mean)