############################### # A couple of examples showing # how to use a few R functions ############################### # # ------------------------------------------------- # # ------------------------------------------------- # ------------------------------------------------- # Example 1 # ------------------------------------------------- ## file store the table contained in path ## "header = T" means that there is a name for each col ## so R do not consider the first line as data pdbs.info = read.table("/gn23/elevy/pqs/in_scop/pdb_stat_info_2.txt", header=T) ## get the homodimers dimers = which(pdbs.info[,2]==2) ## complexes containing at least 1 contact between id. chains homo.contact= which(pdbs.info[,4] >= 1) ## get homodimers homodimers = intersect(dimers, homo.contact) ## print 100 first homodimers names pdbs.info[homodimers[1:100],1] # ------------------------------------------------- # Example 2 # ------------------------------------------------- ## shows the histogram of the "number of chains / complex" hist(pdbs.info[,2]) # ------------------------------------------------- # Example 3 # ------------------------------------------------- ## Read a table storing gene expression data expression = read.table("yeast.timeseries.mod", header=T) ## Take a sub array (remove the 1st line & column ## as.matrix casts the table as a matrix ## this is necessary for next operations exp.sub = as.matrix(expression[2:1523, 2:53]) ## remove the lines containing *only* 1 value (SD=0) exp.sub.filter = remove.val.on.line(mat=exp.sub) ##replaces all 99 values by 0 exp.sub[which(exp.sub==99)]=0 ## Calculates the correlation between each couple of gene exp.cor = cor(t(exp.sub.filter)) ## Clusters the covariance matrix exp.clust = hclust(dist(exp.cor), met="ward") ## Split the visualisation window in 2 par(mfrow=c(1,2)) # 1 line / 2 cols ## look at the NON clustered data image(exp.sub.filter) ## look at the clustered data ## exp.clust is the result from the clustering ## it contains different objects that u can access ## using exp.cluster$NAME. ## order contains the order of the clustered rows image(exp.sub.filter[exp.clust$order,]) # ------------------------------------------------- # Example 4 # ------------------------------------------------- ## The clustering algorithm has constructed a tree and ## we ask to cut it so that we get 100 groups groups.def = cutree(exp.clust, c(100)) ## t is of the form ## ## group_num (group_num of the fist gene in exp.sub.filter) ## group_num ## group_num ## . ## . plot.1.group(grp=36) # plot defaut group, ie 7 plot.1.group(grp=37) # plot defaut group, ie 7 plot.1.group(grp=100) # plot defaut group, ie 7 # ------------ Functions ------------- # ## Now we look if these genes really have the same expression pattern plot.1.group = function(expression=exp.sub.filter, grp=7, groups = groups.def){ ## Get the genes that are in the group "grp" group.grp = which(groups==grp) plot(y = expression[group.grp[1],], x = 1:(length(expression[group.grp[1],])),t = "l") for (i in 2:(length(group.grp))){ lines(y = expression[group.grp[i],], x = 1:(length(expression[group.grp[i],])),col = i) } } ## this function takes a matrix as argument and ## remove the lines which SD is equal to 0 remove.val.on.line = function(mat){ # The matrix I'll return result = c() # number of lines n.lines = dim(mat)[1] # for each line for( i in 1:n.lines){ if( sd(mat[i,]) != 0){ result = rbind(result,mat[i,]) } } return(result) }