setwd('/users/daf/Current/courses/BigData/VisualizationExample') d1<-read.delim('chimps_16154-2010-10-20_14-33-35/ufo_awesome.tsv',header=FALSE); # reads the whole thing # this will turn the dates into r dates # wd<-as.character(d1$V1) cleand1<-ifelse(nchar(wd)!=8, FALSE, TRUE) d1<-d1[cleand1,] d1$gooddates<-as.Date(as.character(d1$V1), format='%Y%m%d') substrRight <- function(x, n){ substr(x, nchar(x)-n+1, nchar(x)) } d1$goodstates<-substrRight(as.character(d1$V3), 2) statecounts<-with(d1, table(goodstates)) #install.packages('GeoXp') #install.packages('maps') library(ggplot2) library(maps) #load us map data all_states <- map_data("state") #wdat<-subset(all_states, region %in% c('wisconsin')) #wdat<-all_states[match(all_states$region, 'wisconsin', nomatch=FALSE)] all_states$statefactors<-factor(all_states$region) statedat<-read.csv('statelist',header=FALSE) # now as far as I can tell, we must iterate statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(d1, goodstates %in% as.character(statedat[[i, 2]])) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("All UFO sightings over period") p ggsave('allUFO.pdf') # this gives all counts by state # now we should look at counts in time intervals d1$numdates<-as.numeric(d1$gooddates) #summary(d1$numdates) # Min. 1st Qu. Median Mean 3rd Qu. Max. NA's #-208000 10850 12400 11360 13680 14850 8 # so we'll do periods of 1000 days from 10000 to 14000 d1$datecats<-cut(d1$numdates, c(-Inf, 10000, 11000, 12000, 13000, 14000, Inf)) d1$datetypes<-as.numeric(factor(d1$datecats)) # first period statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(subset(d1, goodstates %in% as.character(statedat[[i, 2]])), datetypes==1) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("UFO sightings over up to 19 May 1997") p ggsave('UFO1.pdf') # first period statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(subset(d1, goodstates %in% as.character(statedat[[i, 2]])), datetypes==2) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("UFO sightings 19 May 1997 to 13 Feb 2000") #as.Date(11000,origin='1970-01-01') p ggsave('UFO2.pdf') statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(subset(d1, goodstates %in% as.character(statedat[[i, 2]])), datetypes==3) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("UFO sightings 13 Feb 2000 to 09 Nov 2002") #as.Date(12000,origin='1970-01-01') p ggsave('UFO3.pdf') statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(subset(d1, goodstates %in% as.character(statedat[[i, 2]])), datetypes==4) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("UFO sightings 09 Nov 2002 to 5 Aug 2005") #as.Date(13000,origin='1970-01-01') p ggsave('UFO4.pdf') statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(subset(d1, goodstates %in% as.character(statedat[[i, 2]])), datetypes==5) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("UFO sightings 5 Aug 2005 to 1 May 2008") #as.Date(14000,origin='1970-01-01') p ggsave('UFO5.pdf') statecounts<-mat.or.vec(49, 1) for (i in 1:49){ wd2<-subset(subset(d1, goodstates %in% as.character(statedat[[i, 2]])), datetypes==6) statecounts[i]<-length(wd2$goodstates) } crossref<-data.frame(statedat[1], statecounts) allstateswcounts<-merge(all_states, crossref, by.x='region', by.y='V1') # # so now we have a count of sightings by state #plot with ggplot p <- ggplot(data=allstateswcounts, aes(x=long, y=lat))+ geom_polygon( data=allstateswcounts, aes(fill=statecounts,group=group))+ scale_fill_continuous()+ ggtitle("UFO sightings after 1 May 2008") #as.Date(14000,origin='1970-01-01') p ggsave('UFO6.pdf') # now we want to look at shapes against time # but what shapes are common? shapevec<-c(' chevron', ' cigar', ' circle' , ' cone' , ' crescent' ,' cross' , ' cylinder' , ' delta' , ' diamond' ,' disk' ,' dome' , ' egg' , ' fireball' ,' flare', ' flash' ,' formation' , ' hexagon' ,' light' , ' other' , ' oval' , ' pyramid' , ' rectangle' , ' round' , ' sphere' ,' teardrop', ' triangle' , ' unknown') wd2<-ifelse(d1$V4 %in% shapevec, TRUE, FALSE) cleand1<-d1[wd2,] cleand1$V4<-factor(as.factor(cleand1$V4)) # reset the levels cd1tab<-with(cleand1, table(V4, datetypes)) # most important are isvec<-c(' disk', ' light', ' circle', ' triangle', ' sphere', ' oval', ' other', ' unknown') wd2<-ifelse(d1$V4 %in% isvec, TRUE, FALSE) cd2<-d1[wd2,] cd2$V4<-factor(as.factor(cd2$V4)) # reset the levels cd2tab<-with(cd2, table(V4, datetypes)) library(vcd) setEPS() postscript("shapemosaic.eps") mosaic(cd2tab) dev.off()