library(ggplot2)
library(scales)
library(plyr)

k.vals<- c(1,2,4,6,8,10)

# read in sim data
setwd("~/github/2014-paper-oneswarm-journal/java_sim/eclipse/oneswarm-timing-attack-simulation/")
data<- read.csv("results.tsv",sep="\t") #checked in as a .gz file; please decompress if you get an error
# count is mis-named in the sim; it's really proportion
data<-ddply(data,.(frac,k),summarize,prob=mean(count),size=length(count))
data<- subset(data,k %in% k.vals)
data$k<- factor(data$k)
 

N=1000;
neighbors=39;
x = seq(from=1,to=N,by=1);
tor<- ((x)*(x-1))/(N*(N-1))

data2<- rbind(data,
							data.frame(frac=1,k=k.vals,prob=1,size=1),
							data.frame(frac=x/N,k=as.factor("O.R."),prob=tor,size=1))
	 
	#sim data alone
	pdf("../../journal/figures/hyper.pdf",4,3)
	ggplot(data2,aes(frac,prob,group=k,color=k,linetype=k))+
		theme_bw(12)+	xlab("Fraction of Peers that are Attackers") +
		ylab("Prob. of Attack Success")+ geom_line()+scale_x_continuous(limits=c(0,.75),breaks=pretty_breaks(7)) +theme(legend.position=c(.7,.5))+
		scale_linetype(name="", labels=list(bquote(k>=1),	bquote(k>=2),bquote(k>=4),	bquote(k>=6),	bquote(k>=8),	bquote(k>=10),	"OR")) +
		scale_color_hue(name="",	labels=list(bquote(k>=1),bquote(k>=2),bquote(k>=4),bquote(k>=6),	bquote(k>=8),bquote(k>=10),"OR")) 
	dev.off()


######################

# comparison against binomial


# phyper(q, m, n, k, lower.tail = TRUE, log.p = FALSE)
 # p(x) = choose(m, x) choose(n, k-x) / choose(m+n, k)
#  q 	 vector of quantiles representing the number of attackers drawn without replacement from an urn which contains both non-attackers and attackers.
#  m 	the number of attackers in the urn.
#  n 	the number of non-attackers in the urn.
#  k 	the number of peers drawn from the urn.
#  lower.tail 	logical; if TRUE (default), probabilities are P[X ≤ x], otherwise, P[X > x].
#
#  setwd("/Users/brian/svns/oneswarm-analysis/branches/journal/figures/")

# 
# y1=phyper(q=0,m=x,n=N-x,k=neighbors,lower.tail=FALSE)
# y2=phyper(q=1,m=x,n=N-x,k=neighbors,lower.tail=FALSE)
# y4=phyper(q=4,m=x,n=N-x,k=neighbors,lower.tail=FALSE)
# y6=phyper(q=6,m=x,n=N-x,k=neighbors,lower.tail=FALSE)
# y8=phyper(q=8,m=x,n=N-x,k=neighbors,lower.tail=FALSE)
# y10=phyper(q=10,m=x,n=N-x,k=neighbors,lower.tail=FALSE)
# 
binomial<- mdply(expand.grid(q=k.vals,prob=x/N),pbinom,size=neighbors,lower.tail=FALSE)
colnames(binomial)<- c(q='k',prob='frac',V1='prob')
binomial$name <- with(binomial,factor(paste0("k>=",k),levels=paste0("k>=",k.vals)))

data$name <- with(data,factor(paste0("k>=",k),levels=paste0("k>=",k.vals)))
mrg<- merge(binomial,data,by=c('frac','k','name'),suffixes=c('.bin','.sim'))
mrg$diff<- with(mrg,(prob.sim-prob.bin)/prob.sim)

pdf("../../journal/figures/sim-binomial-difference.pdf",width=5,height=3)
ggplot(subset(mrg,(prob.sim-prob.bin)>=.02 ),aes(x=frac,y=diff,group=name,color=name,linetype=name)) + 	theme_bw(14)+ xlab("Fraction of Peers that are Attackers") +  ylab("Error in predicting  success") + geom_line(aes(shape=name))+scale_y_continuous(label=percent,breaks=pretty_breaks(10),limits=c(0,1))+scale_x_continuous(limits=c(0,.4),breaks=pretty_breaks(4)) +theme(legend.position=c(.85,.6))+	scale_linetype(name="", labels=list(bquote(k>=1),	bquote(k>=2),bquote(k>=4), bquote(k>=6),		bquote(k>=8),	bquote(k>=10))) +	scale_color_hue(name="",	labels=list(bquote(k>=1),bquote(k>=2),bquote(k>=4),bquote(k>=6),	bquote(k>=8),bquote(k>=10)))
dev.off()
#  
# # http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_%28ggplot2%29/
# multiplot(ggplot(data,aes(x=frac,y=prob,group=name,color=name,linetype=name)) + 	theme_bw(10)+ xlab("Fraction of Peers that are Attackers") +  ylab("Error in predicting attack success") + geom_line(aes(shape=name))+scale_y_continuous(label=percent,breaks=pretty_breaks(10),limits=c(0,1))+scale_x_continuous(limits=c(0,.5)),
# ggplot(binomial,aes(x=frac,y=prob,group=name,color=name,linetype=name)) + 	theme_bw(10)+ xlab("Fraction of Peers that are Attackers") +  ylab("Error in predicting attack success") + geom_line(aes(shape=name))+scale_y_continuous(label=percent,breaks=pretty_breaks(10),limits=c(0,1))+scale_x_continuous(limits=c(0,.5)))

library(binom)
d<-cbind(mrg,with(mrg,binom.confint(x=round(prob.sim*size,0),n=size,conf.level = .95,method=c('bayes'),tol=1e-12)))
d$pass<- with(d,prob.bin>=lower & prob.sim<=upper)

ggplot(d,aes(x=frac,y=prob.sim,group=name,color=pass)) + 	theme_bw(10)+geom_line(aes(y=lower,color=pass))+geom_line(aes(y=upper,color=pass))+geom_line(color="black")+geom_line(aes(y=prob.bin),color="grey")+scale_y_continuous(label=percent,breaks=pretty_breaks(10),limits=c(0,1))+scale_x_continuous(limits=c(0,.6),breaks=pretty_breaks(3)) +theme(legend.position= c(.6,.1))+facet_wrap(~name,nrow=3)

# range of values where the binomial is not significantly different from monte carlo sim
ddply(subset(d,pass &frac<.15 ),.(method,k),summarize,frac.min=min(frac),frac.max=max(frac))
ddply(subset(d,pass &frac>.15 ),.(method,k),summarize,frac.min=min(frac),frac.max=max(frac))

subset(mrg,frac==30/1000 & k==6)
