Rstudio's Shiny enabled me to try out ideas on the Web.
** Creating Mazes
The most visitors came from these two posts:
- The hottest areas in CS for 2013 - per Google Research grants
- Anand-vs-Carlsen: What the numbers tell?
Ram
Targeted at anyone interested in Data Science, Data Mining, Data Analytics and Machine Learning
# @ Given a data frame slice, this function determines who had more wins in that slice | |
moreWins <- function(df) { | |
valid.rows <- sum(!is.na(df$GK.won)) # count of 0 or 1 values | |
if(valid.rows==0) { | |
return("00") | |
} | |
gk.wins <- sum(na.omit(df$GK.won == "1")) #how many times did GK win | |
gk.losses <- sum(na.omit(df$GK.won == "0")) #how many times did GK lose | |
if(gk.wins > gk.losses) { | |
return("GK") | |
} | |
else if(gk.wins == gk.losses) { | |
return("DR") | |
} | |
else { | |
return("AK") | |
} | |
} | |
yearlywinner.df <- ddply(df, .(year), moreWins) | |
names(yearlywinner.df)[2] <- "winner.1" | |
five.year.winner.df <- ddply(df, .(five.year), moreWins) | |
names(five.year.winner.df)[2] <- "winner.5" | |
decade.winner.df <- ddply(df, .(decade), moreWins) | |
names(decade.winner.df)[2] <- "winner.10" |
terminal.cells <- read.csv(problemfile, header=T, stringsAsFactors=FALSE) | |
terminal.cells$tcell <- (terminal.cells$Y -1)* side + terminal.cells$X # cell serial number | |
num.colors <- length(unique(terminal.cells$color)) | |
colorpalette <- unique(terminal.cells$palette) | |
#The problemfile | |
#X,Y,color, palette | |
#1,5,1,red | |
#2,4,1,red | |
#1,4,2,yellow | |
#5,5,2,yellow | |
#1,2,3,blue | |
#4,4,3,blue | |
#2,2,4,green | |
#4,3,4,green |
init(side, num.colors) #df, const.type.vec have been initialized | |
df <- populate.Amatrix() #see github link for this function | |
rhs.vector <- create_rhs_vector(rhs.vector, terminal.cells) | |
const.type.vec <- createConstraintTypeVector(const.type.vec) | |
length(rhs.vector); length(const.type.vec) | |
# actual problem definition | |
lpff <- make.lp(nrow=n.row, ncol=n.col) | |
defineIP() | |
lpff | |
solve(lpff) | |
sol <- get.variables(lpff) | |
defineIP <- function() { | |
set.objfn(lpff, rep(1, n.col)) | |
set.constr.type(lpff, const.type.vec, 1:n.row) #horiz, vert, corners | |
set.rhs(lpff, b=rhs.vector, constraints=1:n.row) # assign rhs values | |
#Set all the columns at once | |
for (col in 1:n.col) { | |
set.column(lpff, col, df[ ,col]) | |
set.type(lpff, col, "binary") | |
} | |
#assemble it all | |
dimnames(lpff) <- setRowAndColNames() | |
write.lp(lpff, "flowfreeIP.lp", "lp")#write it out | |
} |
library("ggmap") | |
library(maptools) | |
library(maps) | |
visited <- c("SFO", "Chennai", "London", "Melbourne", "Johannesbury, SA") | |
ll.visited <- geocode(visited) | |
visit.x <- ll.visited$lon | |
visit.y <- ll.visited$lat | |
#> dput(visit.x) | |
#c(-122.389979, 80.249583, -0.1198244, 144.96328, 28.06084) | |
#> dput(visit.y) | |
#c(37.615223, 13.060422, 51.5112139, -37.814107, -26.1319199) |
#USING MAPS | |
map("world", fill=TRUE, col="white", bg="lightblue", ylim=c(-60, 90), mar=c(0,0,0,0)) | |
points(visit.x,visit.y, col="red", pch=16) |
#Using GGPLOT, plot the Base World Map | |
mp <- NULL | |
mapWorld <- borders("world", colour="gray50", fill="gray50") # create a layer of borders | |
mp <- ggplot() + mapWorld | |
#Now Layer the cities on top | |
mp <- mp+ geom_point(aes(x=visit.x, y=visit.y) ,color="blue", size=3) | |
mp |
library(ggplot2) | |
library(reshape2) | |
data(movies) | |
movieGenres <- movies[c(18:23)] #subset to 6 genres | |
cor(movieGenres) # 6x6 cor matrix | |
#ggplot likes the data 'melted' one value per row | |
m <-melt(cor(movieGenres)) | |
p <- ggplot(data=m, aes(x=Var1, y=Var2, fill=value)) + geom_tile() | |
#set up a coloring scheme using colorRampPalette | |
red=rgb(1,0,0); green=rgb(0,1,0); blue=rgb(0,0,1); white=rgb(1,1,1) | |
RtoWrange<-colorRampPalette(c(red, white ) ) | |
WtoGrange<-colorRampPalette(c(white, green) ) | |
p <- p + scale_fill_gradient2(low=RtoWrange(100), mid=WtoGrange(100), high="gray") | |
library(XML) | |
#Recursive Function to visit the XML tree (depth first) | |
visitNode <- function(node) { | |
if (is.null(node)) { | |
#leaf node reached. Turn back | |
return() | |
} | |
print(paste("Node: ", xmlName(node))) | |
num.children = xmlSize(node) | |
if(num.children == 0 ) { | |
# Add your code to process the leaf node here | |
print( paste(" ", xmlValue(node))) | |
} | |
#Go one level deeper | |
for (i in 1 : num.children) { | |
visitNode(node[[i]]) #the i-th child of node | |
} | |
} | |
xmlfile <- "books.xml" | |
#read the XML tree into memory | |
xtree <- xmlInternalTreeParse(xmlfile) | |
root <- xmlRoot(xtree) | |
visitNode(root) |
# Genotype AA = 1, Aa = 2 and aa = 3 | |
# Gamete A =1 and a = 0 | |
#Uniformly distribute AA, Aa and aa | |
start.pop <- sample(1:3, kStartPop, replace=TRUE) |
#Given a parent individual, get one of their Alleles in the gamete | |
getGamete <- function(indiv) { | |
if (indiv == 1) return(1) #AA | |
if (indiv == 3) return(0) #aa | |
#if Parent is Aa, the gamete is one binomial trial with prob.big.A | |
if (indiv == 2) return(rbinom(1, size=1, prob.big.A)) #Aa | |
} | |
#Two parental Gametes combine to form a zygote | |
combineGametes <- function(mg,dg) { | |
if ((mg == 1) && (dg == 1)) return(1) #AA | |
else if ((mg == 0) && (dg == 0)) return(3) #aa | |
else return(2) #Aa | |
} | |
#Given two individuals, get an offspring for next generation | |
getOffspring <- function(mom, dad) { | |
momGam <- getGamete(mom) | |
dadGam <- getGamete(dad) | |
return(combineGametes(momGam, dadGam)) | |
} |
getNextGen <- function(x) { | |
nextgen <- list() | |
for(i in seq(1, kStartPop, by=2)) { | |
firstborn <- getOffspring(x[i], x[i+1]) | |
secondborn <- getOffspring(x[i], x[i+1]) | |
nextgen[i] <- firstborn | |
nextgen[i+1] <- secondborn | |
} | |
return(unlist(nextgen)) | |
} |
simulationOneTrial <- function(start.pop, knumGenerations, trial.index) { | |
df.allele <- NULL | |
df.gen <- NULL | |
#Keep track of the individuals in each generation | |
df.gen <- rbind(df.gen, start.pop) | |
x <- start.pop | |
for ( gen in 1:knumGenerations) { | |
#print(unlist(nex)) | |
#print(paste("Gen:", gen)) | |
numA <- calcAllelesInGeneration(x) | |
df.gen <- rbind(df.gen, x) | |
df.allele <- rbind(df.allele, c(gen, trial.index, numA)) | |
nex <- getNextGen(x) | |
x <- sample(nex) #shuffle the population for breeding | |
} |
#Bookkeeping | |
calcAllelesInGeneration <- function(x) { | |
AA = sum(x==1) | |
AB = sum(x==2) | |
BB = sum(x==3) | |
#print(unlist(x)) | |
#print(c(AA, AB, BB)) | |
num.A <- (2 * AA) + AB | |
num.B <- (2 * BB) + AB | |
return(c(num.A, num.B)) | |
} | |
###plotting function | |
plotAllelesWithTime <- function(df, num.trials) { | |
colorRange<-colorRampPalette(c(rgb(0,0,1), rgb(1,0.7,0) )) | |
p <- ggplot(df, aes(x= Generation, y= value, group=Trial, color=factor(Trial))) + geom_line() | |
p <- p + scale_colour_manual(values = colorRange(num.trials), | |
name="Trial") | |
p <- p + labs(title = "Allele A Frequencies Across Generations (2011) ") | |
p <- p + ylab("Number of \"A\" Allele in the Population") | |
return(p) | |
} |
# R-Rows C-cols in the center | |
seedAreaCenter <- function(r, c){ | |
for(x in as.integer((areaW-c)/2): as.integer((areaW + c)/2)){ | |
for(y in as.integer((areaH-r)/2): as.integer((areaH + r)/2)){ | |
area_df[x,y] <<- 1 | |
} | |
} | |
} | |
#Seed a central area and let it grow | |
seedAreaCenter(7,7) |
# RING of R-Rows C-cols in the center of width w cells | |
seedCenterRing <- function(r, c, wide){ | |
for(x in as.integer((areaW-c)/2): as.integer((areaW + c)/2)){ | |
for(y in as.integer((areaH-r)/2): as.integer((areaH + r)/2)){ | |
area_df[x,y] <<- 1 | |
} | |
} | |
#scoop out the inner ring | |
for(x in as.integer((areaW-c)/2 + wide): as.integer((areaW + c)/2 - wide)){ | |
for(y in as.integer((areaH-r)/2+wide): as.integer((areaH + r)/2- wide )){ | |
area_df[x,y] <<- 0 | |
} | |
} | |
} |
# w-cols to the R and L of center column | |
seedColumns <- function(c, w){ | |
startLCol = as.integer( (areaW-c) / 2 ) | |
startRCol = as.integer( (areaW+c) / 2 ) | |
for(y in 1:areaH) { | |
clist = c((startLCol-w):startLCol, startRCol:(startRCol+w)) | |
lapply(clist, function(x) area_df[x,y] <<- 1) | |
} | |
} | |
#seed two vertical columns of width w | |
seedColumns(12,2) | |
# Are any of the 4 direct N E W S adjacent cells occupied? | |
isAnyOfNEWSCellsOccupied <- function(m,n) { | |
canOccupy <- FALSE | |
NEWSdir = c(2,4,5,7) | |
#traverse the vector of 4 elements | |
for(k in 1:4) { | |
xCheck = m + adjacells[NEWSdir[k],1] | |
yCheck = n + adjacells[NEWSdir[k],2] | |
if(!(outOfBounds(xCheck, yCheck))) { | |
if(area_df[xCheck, yCheck] > 0) { | |
print(paste("area df of ", xCheck, yCheck, area_df[xCheck, yCheck])) | |
print(paste("(",m,",",n,")" ,"NEWS neighbor of (", yCheck, "-", xCheck)) | |
canOccupy <- TRUE | |
} | |
} | |
} #end of looping through k | |
return(canOccupy) | |
} |
store_iteration_stats<- function(kNumSettlers, found.home, max.look.around) { | |
#how many settlers found homes | |
print(c(found.home," settled out of ", kNumSettlers, (found.home/kNumSettlers)*100, "%" ) ) | |
#avg # of steps per new settler | |
print(c("num of look arounds max'ed:", max.look.around)) | |
} |
# Make multiple runs (Replication of simulation) and take the average of stats | |
st <- data.frame() | |
st_row<- vector() | |
for(i in 1:kNumReplications) { | |
area_df <- resetIteration() | |
seedAreaWithPioneers(numPioneers,seeding.opt) | |
simstats <- accommodateSettlers(kNumSettlers, settling.option) | |
found.home <- simstats[1] | |
max.look.around <- simstats[2] | |
#compute for this iterations | |
st_row <- store_iteration_stats(i, kNumSettlers, found.home, max.look.around) | |
st <- rbind(st,st_row) | |
} |