remember to change “root” to your home directory!
# load packages
pkgs = c("ape", "phangorn", "seqinr", "phytools", "apTreeshape", "DMwR",
"phyloTop", "outliers", "e1071", "ROCR",
"igraph", "plotly", "dplyr", "tidyr", "Matrix",
"geosphere", "leaflet")
pkgs_ui = setdiff(pkgs, rownames(installed.packages()))
if (length(pkgs_ui) > 0) install.packages(pkgs_ui, verbose=F)
sapply(pkgs, require, character.only=T)
# set paths
root = "~/projects/ai4all-sfu_bio"
seq_dir = paste(root, "/data/FASTA.fa", sep="") #original genetic sequence
meta_dir = paste(root, "/data/meta.csv", sep="")
align_dir = paste(root, "/data/alignment.fa", sep="")
aux_dir = paste(root, "/data/Aux_Data.csv", sep="")
df_dir = paste(root, "/data/df.csv", sep="")
tree_rax_dir = paste(root, "/data/FinalH3N2.tree", sep="")
result_dir_ = paste(root, "/result", sep=""); dir.create(result_dir_, showWarnings=F)
# use a previous result; set to "" if starting anew
result_time_ = "2018-07-09_20-09-04"
# options
nseq = 200 #number of sequences to sample; 2*nseq < total # of sequences
## ape phangorn seqinr phytools apTreeshape DMwR
## TRUE TRUE TRUE TRUE FALSE TRUE
## phyloTop outliers e1071 ROCR igraph plotly
## TRUE TRUE TRUE TRUE TRUE TRUE
## dplyr tidyr Matrix geosphere leaflet
## TRUE TRUE TRUE TRUE TRUE
# load sequence alignments; to align sequences, install mafft @ (https://mafft.cbrc.jp/alignment/software/)
if (!file.exists(align_dir)) system(paste("mafft ", seq_dir, " > ", align_dir, sep=""))
align = read.dna(align_dir, format="fasta")
strains_id = rownames(align)
strains = sapply(strsplit(strains_id,"[-]"), function(x) x[1])
dates = as.Date(sapply(strsplit(strains_id,"[-]"), function(x) x[2]))
#sample for more recent strains
set.seed(17)
result_time_ = ifelse(result_time_!="" & !dir.exists(paste(result_dir_, "/", result_time_, sep="")), "", result_time_)
result_time = ifelse(result_time_=="", gsub(" ", "_", gsub("[:]", "-", Sys.time())), result_time_)
result_dir = paste(result_dir_, "/", result_time, sep=""); dir.create(result_dir, showWarnings=F)
ind_dir = paste(result_dir, "/ind.csv", sep="")
if (result_time_ == "") {
sam_ind = rownames(align)[sample(c(1:min(2*nseq,nrow(align))), nseq, replace=F, prob=NULL)]
write.csv(sam_ind, file=ind_dir, row.names=F)
}
sam_ind = read.csv(ind_dir, stringsAsFactors=F)[,1]
# sam_ind
calculate distance between sequences
align_n = align[sam_ind,]
align_n = as.phyDat(align_n)
dm = dist.ml(align_n, model="JC69")
infer the tree using upgma
tree_UPGMA = upgma(dm)
plot(tree_UPGMA, show.tip.label=F, main="UPGMA")
infer the tree using neighbour joining
tree_NJ = NJ(dm)
plot(tree_NJ, main="Phylogeny: Neighbour Joining", show.tip.label=F)
tree = ladderize(tree_NJ)
plot(tree, main="Phylogeny: Neighbour Joining", show.tip.label=F)
is.rooted(tree_NJ) #is tree rooted
is.binary(tree_NJ) #are tree branches binary
# save to use in a later section
tree_dir = paste(result_dir, "/tree_NJ.Rdata", sep="")
if (!file.exists(tree_dir)) save(tree_NJ, file=tree_dir)
## [1] FALSE
## [1] TRUE
but which of these trees is a better fit for your data? Using the parsimony() function, you can compare their respective parsimony scores.
parsimony(tree_UPGMA, align_n)
parsimony(tree_NJ, align_n)
## [1] 598
## [1] 561
optim.parsimony tries to find the maximum parsimony tree using either Nearest Neighbor Interchange (NNI) rearrangements or sub tree pruning and regrafting (SPR).
tree_optim = optim.parsimony(tree_NJ,align_n)
tree_pratchet = pratchet(align_n)
plot(tree_optim, main="Phylogeny: Maximum Parsimony NNI", show.tip.label=F)
plot(tree_pratchet, main="Phylogeny: Maximum Parsimony SPR", show.tip.label=F)
## Final p-score 559 after 2 nni operations
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
## [1] "Best pscore so far: 559"
maximum likelihood methods allow you to include the full data from your sequence alignment in a statistical framework that estimates model parameters.
the first thing we need to do is create a special type of object of class “pml”. This object includes mostly our tree and data, but also parameters of an evolutionary model, their values (usually set to some default), and the likelihood of the model.
fit = pml(tree_NJ,align_n)
## negative edges length changed to 0!
print(fit)
plot(fit, main="Phylogeny: Neighbour Joining (PML)", show.tip.label=F)
##
## loglikelihood: -7200.111
##
## unconstrained loglikelihood: -5500.084
##
## Rate matrix:
## a c g t
## a 0 1 1 1
## c 1 0 1 1
## g 1 1 0 1
## t 1 1 1 0
##
## Base frequencies:
## 0.25 0.25 0.25 0.25
function optim.pml() will optimize tree topology and branch length for your selected model of nucleotide evolution.
fitJC = optim.pml(fit, model="JC", rearrangement="stochastic")
logLik(fitJC)
plot(fitJC, main="Phylogeny: Neighbour Joining Optimized (PML)", show.tip.label=F)
## optimize edge weights: -7200.111 --> -7175.895
## optimize edge weights: -7175.895 --> -7175.895
## optimize topology: -7175.895 --> -7165.78
## optimize topology: -7165.78 --> -7165.78
## optimize topology: -7165.78 --> -7164.921
## 4
## optimize edge weights: -7164.921 --> -7164.921
## optimize topology: -7164.921 --> -7164.921
## 0
## [1] "Ratchet iteration 1 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 2 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 3 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 4 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 5 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 6 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 7 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 8 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 9 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 10 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 11 , best pscore so far: -7164.92050417452"
## [1] "Ratchet iteration 12 , best pscore so far: -7164.92050394874"
## [1] "Ratchet iteration 13 , best pscore so far: -7164.92050394874"
## [1] "Ratchet iteration 14 , best pscore so far: -7164.92050383717"
## [1] "Ratchet iteration 15 , best pscore so far: -7164.92050383717"
## [1] "Ratchet iteration 16 , best pscore so far: -7164.92050383717"
## [1] "Ratchet iteration 17 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 18 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 19 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 20 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 21 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 22 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 23 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 24 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 25 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 26 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 27 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 28 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 29 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 30 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 31 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 32 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 33 , best pscore so far: -7164.92050374767"
## [1] "Ratchet iteration 34 , best pscore so far: -7164.920503434"
## [1] "Ratchet iteration 35 , best pscore so far: -7164.920503434"
## [1] "Ratchet iteration 36 , best pscore so far: -7164.920503434"
## [1] "Ratchet iteration 37 , best pscore so far: -7164.92049494019"
## [1] "Ratchet iteration 38 , best pscore so far: -7164.92049494019"
## [1] "Ratchet iteration 39 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 40 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 41 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 42 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 43 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 44 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 45 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 46 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 47 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 48 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 49 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 50 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 51 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 52 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 53 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 54 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 55 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 56 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 57 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 58 , best pscore so far: -7164.92049435348"
## [1] "Ratchet iteration 59 , best pscore so far: -7164.92049435348"
## optimize edge weights: -7164.92 --> -7164.92
## optimize topology: -7164.92 --> -7164.92
## 0
## optimize edge weights: -7164.92 --> -7164.92
## 'log Lik.' -7164.92 (df=397)
note that the object returned by optim.pml is not a phylogeny, but an optimized object of class “pml” (which contains an optimized phylogeny with edge lengths). Let’s plot this phylogeny:
tree_JC = fitJC$tree
plot(tree_JC, type="phylogram", lab4ut="horizontal", edge.width = 2, show.tip.label=F)
# define what tree we want to do ancestral sequence reconstruction for; since we saved this tree_NJ before, let's use this one
tree_type = "NJ"
tree = tree_NJ
if (file.exists(tree_dir)) tree = get(load(tree_dir))
# convert alignment format
align_npd = phyDat(align_n)
# parsimony reconstructions: based on the fitch algorithm for bifurcating trees (note: there will be often no unique solution)
anc = ancestral.pars(tree, align_npd, "ACCTRAN")
plotAnc(tree, anc, cex.pie=.5, show.tip.label=F)
title(paste0(tree_type, ": ACCTRAN"))
align known influenza virus with ancestral sequences; known strains must have a date preceding their child strains; we assume there are at least 2 layers in a tree
# get sequences ready (combine ancestral with known strains)
seqa_dir = paste(result_dir, "/FASTA_anc.fa", sep="")
seq_all_dir = paste(result_dir, "/FASTA_all.fa", sep="")
# merge ancestral and unknown sequences
if (!file.exists(seq_all_dir)) {
sequ = read.dna(seq_dir, format="fasta")
sequ_anc = as.DNAbin(lapply(anc, function(x) colnames(x)[apply(x,1,function(y) which.max(y))] ))
write.dna(sequ_anc, file=seqa_dir, format="fasta")
sequ_anc = read.dna(seqa_dir, format="fasta", as.matrix=F)
sequ_all = append(sequ_anc, sequ)
sequ_all = sequ_all[!(duplicated(names(sequ_all)))] # leaf sequences not included in distance calulcation
write.dna(sequ_all, file=seq_all_dir, format="fasta")
}
# align sequences, install mafft @ (https://mafft.cbrc.jp/alignment/software/)
align_anc_dir = paste(result_dir, "/alignment_anc.fa", sep="")
if (!file.exists(align_anc_dir)) system(paste("mafft ", seq_all_dir, " > ", align_anc_dir, sep=""))
align_anc = read.dna(align_anc_dir, format="fasta")
# get distances ready
dm_anc_dir = paste(result_dir, "/dm_anc.Rdata", sep="")
if (!file.exists(dm_anc_dir)) {
dm_anc_ = dist.ml(phyDat(align_anc),model="JC69")
dm_anc = as.matrix(dm_anc_, sparse=T)
tree_inds = !rownames(dm_anc)%in%strains_id | rownames(dm_anc)%in%sam_ind
dm_anc = dm_anc[tree_inds, !tree_inds]
save(dm_anc, file=dm_anc_dir)
}
dm_anc = get(load(dm_anc_dir))
strains_kn = sapply(strsplit(rownames(dm_anc),"[-]"), function(x) x[1])
strains_unkn = sapply(strsplit(colnames(dm_anc),"[-]"), function(x) x[1])
dates_unkn = as.Date(sapply(strsplit(colnames(dm_anc),"[-]"), function(x) x[2]))
names(dates_unkn) = strains_unkn
evo_month = 444 # max number of months needed for a parent strain to evolve into a child strain (max = 444)
# load metadata
meta = read.csv(meta_dir, stringsAsFactors=F)[,-1]
rownames(meta) = meta[,"strain"]
# label nodes starting from root using depth first search
edges = tree$edge[order(tree$edge[,2]),] #first nseq nodes are leaves
edges[1:nseq,2] = tree$tip.label
edges[,2] = sapply(strsplit(edges[,2], "[-]"), function(x) x[1])
nodes = unique(as.vector(edges))
node = unique(edges[!edges[,1]%in%edges[,2],1]) #root as the starting node
try it yourself :)
given the starting node, all nodes, and edges, try labelling all the non-leaf nodes with virus strains (i.e. the numbers inside ‘nodes’ and ‘edges’ should should be replaced by strain names (e.g. KY583561)) following the constrains below: - parent nodes should be a strain had a sampling date earlier than child nodes - parent nodes should be a strain that are most similar to the inferred parent strain (labelled as numbers on the ‘rownames(dm_anc)’) i.e. have the shortest distance in dm_anc matrix (ancestral sequences x known sequences) - tip: you can use ‘as.Date’ to convert character dates into date variables so that you can use the ‘<’, ‘>’ to compare earlier or later dates!
check your answers:
edges_t = get(load(paste(result_dir,"/edges.Rdata", sep=""))) # this is what 'edges' should look like
nodes_t = get(load(paste(result_dir,"/nodes.Rdata", sep=""))) # this is what 'nodes' should look like
prepare nodelist virus & edgelist vphy
edges = edges_t
nodes = nodes_t
# viral strains
virus = meta[nodes, c("strain", "subtype", "date", "lng", "lat")]
# viral phylogeny
vphy = data.frame(id=seq_len(nrow(edges)),
start_lng=meta[edges[,1], "lng"], start_lat=meta[edges[,1], "lat"],
end_lng=meta[edges[,2], "lng"], end_lat=meta[edges[,2], "lat"])
plot map
edges are coloured based on the child strain; red -> yellow (ancestor -> new strains)
# heat colours: red (early) to yellow (recent)
colours = sapply(heat.colors(length(unique(virus$date))), function(x) gsub("FF$","",x))
names(colours) = sort(unique(virus$date))
# phylogeny edges
geo_lines = gcIntermediate(vphy[,c("start_lng", "start_lat")],
vphy[,c("end_lng", "end_lat")],
n=200, addStartEnd=T, sp=T, breakAtDateLine=T)
# map
leaflet() %>%
addMiniMap(tiles = providers$Esri.OceanBasemap, width = 120, height=80) %>%
addProviderTiles(providers$Esri.OceanBasemap) %>%
addCircleMarkers(data=virus, lng=~lng, lat=~lat,
color = as.vector(colours[virus[,"date"]]), weight=2, radius=2,
label=paste(virus[,"subtype"], " (",
virus[,"strain"], ") ",
virus[,"date"], sep="")) %>%
addPolylines(data=geo_lines, color = as.vector(colours[meta[edges[,2],"date"]]),
weight = 1, opacity = 0.5, fill = FALSE, fillOpacity = 0.5, label = NULL)
keep in mind that our data set contains monstly strains found in he USA, therefore most of the strains on our phylogeny will be in the USA; this could be because USA has the most flu strains, or a more practical reason can be simply because USA collects the most data!
let’s see where the root strain comes from
# display root and leaf nodes
meta[unique(edges[!edges[,1]%in%edges[,2],1]),] # root strain
# meta[unique(edges[!edges[,2]%in%edges[,1],2]),] # leaf strains
we assume that only the strongest influenza virus strain will sruvive to cause the influenza in the coming season; therefore, we try to predict which strains, out of all the strains we have collected, will survive
# load the data
Auxdata = read.csv(aux_dir)[,2:3]
df = read.csv(df_dir,sep=",",stringsAsFactors=F)[-1,-1]
nrow(df)
names(df) = c("Clade","number_of_tips","number_of_tips_prune","sackin",
"colless","Variance","I2","B1","avgLadder","ILnumber","pitchforks",
"maxHeight","MaxWidth","DelW","Stairs1","Stairs2","Cherries","BS","descinm","getstattest","skewness","kurtosis","tips_pairwise_distance","tips_pairwise_distance_max",
"diameter", "WienerIndex", "betweenness", "closeness", "eigenvector","Labels")
tree = read.tree(tree_rax_dir)
#prepare the dataset
df$number_of_tips = as.numeric(df$number_of_tips)
df$number_of_tips_prune = as.numeric(df$number_of_tips_prune)
df$sackin = as.numeric(df$sackin)
df$colless = as.numeric(df$colless)
df$Variance = as.numeric(df$Variance)
df$I2 = as.numeric(df$I2)
df$B1 = as.numeric(df$B1)
df$avgLadder = as.numeric(df$avgLadder)
df$ILnumber = as.numeric(df$ILnumber)
df$pitchforks = as.numeric(df$pitchforks)
df$maxHeight = as.numeric(df$maxHeight)
df$MaxWidth = as.numeric(df$MaxWidth)
df$DelW = as.numeric(df$DelW)
df$Stairs1 = as.numeric(df$Stairs1)
df$Stairs2 = as.numeric(df$Stairs2)
df$Cherries = as.numeric(df$Cherries)
df$BS = as.numeric(df$BS )
df$descinm = as.numeric(df$descinm )
df$getstattest = as.numeric(df$getstattest)
df$skewness = as.numeric(df$skewness)
df$kurtosis = as.numeric(df$kurtosis)
df$tips_pairwise_distance = as.numeric(df$tips_pairwise_distance)
df$tips_pairwise_distance_max = as.numeric(df$tips_pairwise_distance_max)
df$tips_pairwise_distance_max = as.numeric(df$tips_pairwise_distance_max)
df$diameter = as.numeric(df$diameter)
df$WienerIndex = as.numeric(df$WienerIndex)
df$betweenness = as.numeric(df$betweenness)
df$closeness = as.numeric(df$closeness)
df$eigenvector = as.numeric(df$eigenvector)
df$Labels = as.factor(df$Labels)
scaled.df = scale(df[,3:(ncol(df)-1)])
df1 = cbind(as.data.frame(df[,1:2]),scaled.df)
df = cbind(df1,df$Labels)
names(df) = c("Clade","NumberofTipsMain","NumberofTips","sackin",
"colless","Variance","I2","B1","avgLadder","ILnumber","pitchforks",
"maxHeight","MaxWidth","DelW","Stairs1","Stairs2","Cherries","BS","descinm","getstattest","skewness","kurtosis","MeanPairwiseDist","MaxPairwiseDist",
"diameter", "WienerIndex", "betweenness", "closeness", "eigenvector","Labels")
#remove NA's
rm = which(is.na(df), TRUE)
df = df[-rm,]
#remove the outliers
df_outlier = df[,3:(ncol(df)-1)]
outlier.scores = lofactor(df_outlier, k=5)
# plot(density(outlier.scores))
# pick top 5 as outliers
outliers = order(outlier.scores, decreasing=T)[1:5]
# who are outliers? remove them
#print(outliers)
df = df[-outliers,]
## [1] 191
we train the prediction model on the strains before year 2015 and test on the latest strains (after 2015) clades rooted at 10563 to 11927 (are used for testing)
clades = df[,1]
test_ind = c()
for(i in 1:length(clades)){
t = extract.clade(tree,as.numeric(clades[i]))
labels = t$tip.label
ind = match(labels,Auxdata[,1])
dates = Auxdata[ind,2]
dates = as.Date(dates)
start = min(dates)
if (start >= "2015-01-01") test_ind=c(test_ind,i)
}
train = df[-test_ind, ]
test = df[test_ind, ]
dim(train)
dim(test)
train = train[,3:ncol(train)]
test = test[,3:ncol(test)]
## [1] 140 30
## [1] 42 30
try it yourself :)
tune the parameters to choose the best parameters
tune.out = tune(svm, Labels ~ .,
data = train,kernel ="radial",ranges=list(gamma = 2^c(-5:5),cost=2^c(-5:5)),
coef0 =0,degree =3,nu = 0.5,class.weigth=c("0"=0.45,"1"=0.55))
tune.out
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost
## 0.125 1
##
## - best performance: 0.2428571
build the model (SVM)
svm.fit = svm(data = train, Labels ~.,
kernel ="radial", degree = 3, gamma = 0.03125 ,
coef0 = 3, cost =2, nu = 0.5,class.weigth=c("0"=0.45,"1"=0.55))
svm.prob = predict(svm.fit, newdata = test)
summary(svm.prob)
## 0 1
## 30 12
finally, we calculate metrics to evaluate how well prediction model did
table(test$Labels,svm.prob)
agreement = svm.prob == test$Labels
table(agreement)
prop.table(table(agreement))
#computing the true possitive rate
p = length(which(test$Labels==1))
TP = which(test$Labels==1)
TP = length(which(svm.prob[TP]==1))
TPR = TP/p
TPR
#computing the true negative rate
N = length(which(test$Labels==0))
TN = which(test$Labels==0)
TN = length(which(svm.prob[TN]==0))
TNR = TN/N
TNR
#computing the false positive rate
P = length(which(test$Labels==0))
FP = which(test$Labels==0)
FP = length(which(svm.prob[FP]==1))
FPR = FP/P
FPR
#computing the false negative rate
P = length(which(test$Labels==1))
FN = which(test$Labels==1)
FN = length(which(svm.prob[FN]==0))
FNR = FN/P
FNR
#compute the AUC
svmmodel.predict = predict(svm.fit, newdata = test,decision.values=TRUE)
svmmodel.probs = attr(svmmodel.predict,"decision.values")
svmmodel.class = predict(svm.fit,test,type="class")
svmmodel.labels = test$Labels
#roc analysis for test data
svmmodel.prediction = prediction(svmmodel.probs,svmmodel.labels)
svmmodel.performance = performance(svmmodel.prediction,"tpr","fpr")
svmmodel.auc = performance(svmmodel.prediction,"auc")@y.values[[1]]
## svm.prob
## 0 1
## 0 26 2
## 1 4 10
## agreement
## FALSE TRUE
## 6 36
## agreement
## FALSE TRUE
## 0.1428571 0.8571429
## [1] 0.7142857
## [1] 0.9285714
## [1] 0.07142857
## [1] 0.2857143
plot the AUC (area under curve)
plot(svmmodel.performance,type="l", col="blue")
round(svmmodel.auc,2)
legend("bottomright", legend=c("AUC=0.90"), fill=c("blue"), cex=0.7)
## [1] 0.89