https://youtu.be/JUFLIS1y3rI
https://www.youtube.com/user/philippedebrouwer
http://www.de-brouwer.com/publications/r-book/
https://www.linkedin.com/in/philippedebrouwer/
http://www.de-brouwer.com/assets/r-book/MBA_R/startR.R


 # +----------------------------------------------------------------------------+
 # |                               the R code for the                           |
 # |                                   Big R-book                               |
 # |                           by Philippe J.S. De Brouwer                      |
 # +----------------------------------------------------------------------------+
 # | Copyright: (C) Philippe De Brouwer, 2020                                   |
 # |            Owners of the book can use this code for private and commercial |
 # |            use, except re-publishing.                                      |
 # +----------------------------------------------------------------------------+
 # | This document is a true copy of all the R code that appears in the book.   |
 # | It is provided for you so that when you work with the book, you can copy   | 
 # | and paste any code segment that you want to test without typing everything |
 # | again.                                                                     |
 # | No warranties or promises are given.                                       |
 # | Have an interesting time and learn faster than me!                         |
 # | Philippe J.S. De Brouwer                                                   |
 # +----------------------------------------------------------------------------+
 # | For your convenience, blocks are separated by: '# ----------------------'. |
 # +----------------------------------------------------------------------------+
 #

rm(list = ls())


# R as a calculator ####
3+2
3*2
3^2
sin(pi)
cos(pi)

# Elementary types ####
1
2L
"a"
3+2i
TRUE

# Basic types ####


## vector ####
x <- c(1,2)   
y <- c(1:10)

x + y
x * y
x | y
x & y

## Matrices ####

M <- matrix(c(1:4), nrow = 2, byrow = FALSE)
M
M[1,2]
M[ ,2]
M[3,]


v <- c(2, 2)

# Invert the matrix:
M[3,5] <- 10
M_inv <- solve(M)

# And the solution is:
p <- M_inv %*% v


# Data Frames ####

mtcars

# ------- addressing data
mtcars$mpg

# ------- manipulate data
d <- mtcars
d$lp100km <- round(235.215  / mtcars$mpg, 1)
d$wt <- d$wt / 2
d$wt2 <- mtcars$wt/2 * 1000


# Functions ####
mean(mtcars$mpg)

myFUN <- function(x) {
  round(235.215  / x, 1)
}


# Lnear regression model ####
lm1 <- lm(formula = mpg ~ hp, data = mtcars)

# Visualise the relation:
plot(mtcars$hp, mtcars$mpg,col = "red",
     main = "MPG i.f.o. HP for {mtcars}", 
     abline(lm1,
            col='blue',lwd = 3),
     cex = 1.3,pch = 16,
     xlab = "HP",ylab ="MPG")


## Multiple linear regression ####
lm2 <- lm(mpg ~ gear + wt + cyl * am, data = mtcars)
summary(lm2)
print(lm2)
plot(lm2)


# decision trees ####

## example of a regression tree with rpart on the dataset of the Titanic
library(rpart)
library (rpart.plot)

# Fit the tree:
t <- rpart(mpg ~ cyl + disp + hp + drat + wt + qsec + am + gear,
 data       = mtcars, 
 na.action  = na.rpart, 
 method     = "anova",
 control    = rpart.control(
   minsplit       = 20,  # minimum nbr. of observations required for split
   minbucket      = 20/3,# minimum nbr. of observations in a terminal node
                         # the default = minsplit/3
   cp             = 0.0001,# complexity parameter set to a very small value 
                         # his will grow a large (over-fit) tree
   maxcompete     = 4,   # nbr. of competitor splits retained in output
   maxsurrogate   = 5,   # nbr. of surrogate splits retained in output
   usesurrogate   = 2,   # how to use surrogates in the splitting process
   xval           = 7,   # nbr. of cross validations
   surrogatestyle = 0,   # controls the selection of a best surrogate
   maxdepth       = 30   # maximum depth of any node of the final tree
   )
 )


# Print the tree:
print(t)
summary(t)


# plot(t) ; text(t)  # This would produce the standard plot from rpart.
# Instead we use:
prp(t, type = 5, extra = 1, box.palette = "Blues", digits = 4, shadow.col = 'darkgray', branch = 0.5)

plotcp(t)

# Prune the tree:
t1 <- prune(t, cp = 0.05)

# Finally, plot the pruned tree:
prp(t1, type = 5, extra = 1, box.palette = "Reds", digits = 4,
    shadow.col = 'darkgray', branch = 0.5)


# ensemble models

############## random forest ########################

#install.packages("randomForest")
library(randomForest)


frm      <- mpg ~ cyl + disp + hp + drat + wt + qsec + am + gear

set.seed(1879)

# Fit the random forest:
forestCars   = randomForest(frm, data = mtcars)

# Show an overview:
print(forestCars)

# Plot the random forest overview:
plot(forestCars)

# Show the summary of fit:
summary(forestCars)

# visualization of the RF:
getTree(forestCars, 1, labelVar=TRUE)

# Show the purity of the nodes:
imp <- importance(forestCars)
imp

################### NEURAL NETS ##########################

# install.packages("neuralnet")
library(neuralnet)
nn0 <- neuralnet(mpg ~ wt + qsec + am + hp + disp + cyl + drat + gear + carb,
                 data = mtcars, hidden = c(0),
                 linear.output = TRUE)
plot(nn0, rep = "best", intercept = TRUE, information = FALSE, 
     show.weights = TRUE);


# Fit the aNN with 2 hidden layers that have resp. 3 and 2 neurons:
# (neuralnet does not accept a formula wit a dot as in 'y ~ .' )
nn1 <- neuralnet(mpg ~ .,
                 data = mtcars, hidden = c(9,3,2),
                 linear.output = TRUE)


plot(nn1, rep = "best", information = FALSE);


# iri
nn.titanic <- neuralnet(Survived ~ Pclass + Sex + Age + SibSp + Parch + Fare + Cabin + Embarked,
                        data   = d,
                        hidden = c(15, 7, 4),
                        linear.output = F)

#####  PREDICT > lm, glm, rpart, nn, ...

############## SVM ##############################

library(e1071)
svmCars1 <- svm(cyl ~ ., data = mtcars)
summary(svmCars1)


# split mtcars in two subsets (not necessary but easier later):
x <- subset(mtcars, select = -cyl)
y <- mtcars$cyl

# fit the model again as a classification model:
svmCars2 <- svm(cyl ~ ., data = mtcars, type = 'C-classification')

# create predictions
pred <- predict(svmCars2, x)

# show the confusion matrix:
table(pred, y)


svmTune <- tune(svm, train.x=x, train.y=y, kernel = "radial",
                ranges =  list(cost = 10^(-1:2), gamma = c(.5, 1, 2)))

print(svmTune)


library(ggplot2)
library(ggrepel)  # provides geom_label_repel()
ggplot(mtcars, aes(wt, mpg, color = factor(cyl))) + 
       geom_point(size = 5) + 
       geom_label_repel(aes(label = rownames(mtcars)),
                  box.padding   = 0.2, 
                  point.padding = 0.25,
                  segment.color = 'grey60')


######### K-MEANS ###########################

# normalize weight and mpg
d <- data.frame(matrix(NA, nrow = nrow(mtcars), ncol = 1))
d <- d[,-1]  # d is an empty data frame with 32 rows

rngMpg <- range(mtcars$mpg, na.rm = TRUE)
rngWt  <- range(mtcars$wt,  na.rm = TRUE)

d$mpg_n <- (mtcars$mpg - rngMpg[1]) / rngMpg[2]
d$wt_n  <- (mtcars$wt  - rngWt[1])  / rngWt[2]

# Here is the k-means clustering itself. 
# Note the nstart parameter (the number of random starting sets)
carCluster <- kmeans(d, 3, nstart = 15)

print(carCluster)


table(carCluster$cluster, mtcars$cyl)
# Note that the rows are the clusters (1, 2, 3) and the number of 
# cylinders are the columns (4, 6, 8).


# Additionally, we customize colours and legend title. 
# First, we need color names:
library(tidyverse)
my_colors <- if_else(carCluster$cluster == 1, "darkolivegreen3",
                if_else(carCluster$cluster == 2, "coral", "cyan3"))

# We already loaded the libraries as follows:
#library(ggplot2); library(ggrepel)

# Now we can create the plot:
ggplot(mtcars, aes(wt, mpg, fill = factor(carCluster$cluster))) + 
       geom_point(size = 5, colour = my_colors) + 
       scale_fill_manual('Clusters', 
                   values = c("darkolivegreen3","coral", "cyan3"))+
       geom_label_repel(aes(label = rownames(mtcars)),
                  box.padding   = 0.2, 
                  point.padding = 0.25,
                  segment.color = 'grey60') 


##### PRINCIPLE COMPONENTS ################

# Normalize the whole mtcars dataset:
d <- data.frame(matrix(NA, nrow = nrow(mtcars), ncol = 1))
d <- d[,-1]  # d is an empty data frame with 32 rows
for (k in 1:ncol(mtcars)) {
  rng <- range(mtcars[, k], na.rm = TRUE)
  d[, k]  <- (mtcars[, k]  - rng[1])  / rng[2]
  }
colnames(d) <- colnames(mtcars)
rownames(d) <- rownames(mtcars)

# The PCA analysis:
pca1 <- prcomp(d) 
summary(pca1)

# Note also:
class(pca1)


# Make the clustering:
carCluster <- kmeans(d, 4, nstart = 10)

d_cluster <- cbind(d, cluster = factor(carCluster$cluster))

# Load the graph libraries:
library(ggplot2)
library(ggrepel)

# Plot the results:
autoplot(pca1, data = d_cluster, label = FALSE, shape = 18, size = 5, 
         alpha = 0.6, colour = 'cluster',
         loadings = TRUE, loadings.colour = 'blue',
         loadings.label = TRUE, loadings.label.size = 5) + 
       geom_label_repel(aes(label = rownames(mtcars)),
                  box.padding   = 0.2, 
                  point.padding = 0.25,
                  segment.color = 'grey60')


# We use the same data as defined in previous section: d is the normalised 
# dataset mtcars.
pca1 <- prcomp(d)   # unchanged

# Extract the first 3 PCs
PCs <- data.frame(pca1$x[,1:3])

# Then use this in the kmeans function for example.
carCluster <- kmeans(PCs, 4, nstart=10, iter.max=1000)


# Reminder of previous code:
d <- data.frame(matrix(NA, nrow = nrow(mtcars), ncol = 1))
d <- d[,-1]  # d is an empty data frame with 32 rows
for (k in 1:ncol(mtcars)) {
  rng     <- range(mtcars[, k], na.rm = TRUE)
  d[, k]  <- (mtcars[, k]  - rng[1])  / rng[2]
  }
colnames(d) <- colnames(mtcars)
rownames(d) <- rownames(mtcars)
pca1 <- prcomp(d)               # runs the PCA
PCs <- data.frame(pca1$x[,1:3]) # extracts the first 3 PCs
# Now, PCs holds the three major PCs.

# -- New code below:
# Plot those three PCs:
plot(PCs, col = carCluster$clust, pch = 16, cex = 3)


library(plot3D)
scatter3D(x = PCs$PC1, y = PCs$PC2, z = PCs$PC3, 
   phi = 45, theta = 45,
   pch = 16, cex = 1.5, bty = "f",
   clab = "cluster", 
   colvar = as.integer(carCluster$cluster), 
   col = c("darkolivegreen3", "coral", "cyan3", "gray"),
   colkey = list(at = c(1, 2, 3, 4),
          addlines = TRUE, length = 0.5, width = 0.5,
          labels = c("1", "2", "3", "4"))
   )
text3D(x = PCs$PC1, y = PCs$PC2, z = PCs$PC3,  labels = rownames(d),
        add = TRUE, colkey = FALSE, cex = 1.2)


library(plotly)
plot_ly(x = PCs$PC1, y = PCs$PC2, z = PCs$PC3,  
        type="scatter3d", mode="markers", 
	color=factor(carCluster$cluster))


library(tidyverse)  # provides if_else
library(ggplot2)    # 2D plotting 
library(ggfortify)
library(cluster)    # provides fanny (the fuzzy clustering)
library(ggrepel)    # provides geom_label_repel (de-clutter labels)


carCluster <- fanny(d, 4)
my_colors <- if_else(carCluster$cluster == 1, "coral",
               if_else(carCluster$cluster == 2, "darkolivegreen3", 
	       if_else(carCluster$cluster == 3, "cyan3", 
	         "darkorchid1")))

# Autoplot with visualization of 4 clusters
autoplot(carCluster, label=FALSE, frame=TRUE,  frame.type='norm', 
         shape=16,
         loadings=TRUE,  loadings.colour = 'blue',
         loadings.label = TRUE, loadings.label.size = 5,
	 loadings.label.vjust = 1.2, loadings.label.hjust = 1.3) + 
       geom_point(size = 5, alpha = 0.7, colour = my_colors) + 
       geom_label_repel(aes(label = rownames(mtcars)),
                  box.padding   = 0.2, 
                  point.padding = 0.25,
                  segment.color = 'grey40') + 
		  theme_classic()


# Compute hierarchical clustering
library(tidyverse)
cars_hc <- mtcars                      %>%
           scale                       %>% # scale the data
           dist(method = "euclidean")  %>% # dissimilarity matrix
           hclust(method = "ward.D2")      # hierachical clustering

plot(cars_hc)


library(class)
knn(train, test, cl, k = 1, l = 0, prob = FALSE, use.all = TRUE)


library(tidyverse)
library(modelr)


d   <- mtcars
lm1 <- lm(mpg ~ wt + cyl, data = d)


add_predictions(data, model, var = "pred", type = NULL)


library(modelr)

# Use the data defined above:
d1 <- d %>% add_predictions(lm1)

# d1 has now an extra column "pred"
head(d1)


add_residuals(data, model, var = "resid")


d2 <- d1 %>% add_residuals(lm1)

# d2 has now an extra column "resid"
head(d2)


bootstrap(data, n, id = ".id")


set.seed(1872)   # make sure that results can be replicated
library(modelr)  # provides bootstrap
library(purrr)   # provides map, map_df, etc.
library(ggplot2) # provides ggplot
d    <- mtcars
boot <- bootstrap(d, 10)

# Now, we can leverage tidyverse functions such as map to create 
# multiple models on the 10 datasets
models <- map(boot$strap, ~ lm(mpg ~ wt + cyl, data = .))

# The function tidy of broom (also tidyverse) allows to create a
# dataset based on the list of models. Broom is not loaded, because
# it also provides a function bootstrap().
tidied <- map_df(models, broom::tidy, .id = "id")


# Visualize the results with ggplot2:
p <- ggplot(tidied, aes(estimate)) + 
     geom_histogram(bins = 5, col = 'red', fill='khaki3', 
                    alpha = 0.5) + 
     ylab('Count') + 
     xlab('Estimate of the coefficient in the plot-title') +
     facet_grid(. ~ term, scales = "free")
p


# load modelr:
library(modelr)

# Fit a model:
lm1 <- lm(mpg ~ wt + qsec + am, data = mtcars)

# MSE (mean square error):
mse(lm1, mtcars)

# RMSE (root mean square error):
rmse(lm1, mtcars)

# MAD (mean absolute error):
mae(lm1, mtcars)

# Quantiles of absolute error:
qae(lm1, mtcars)

# R-square (variance of predictions divided by the variance of the 
# response variable):
rsquare(lm1, mtcars)


set.seed(1871)

# Split the data:
rs  <- mtcars  %>%
       resample_partition(c(train = 0.6, test = 0.4))

# Train the model on the training set:
lm2 <- lm(mpg ~ wt + qsec + am, data = rs$train)

# Compare the RMSE on the training set with the testing set:
rmse(lm2, rs$train); rmse(lm2, rs$test)

# Note that this can alos be done with the pipe operator:
lm2 %>% rmse(rs$train)
lm2 %>% rmse(rs$test)


# Fit the model:
lm1 <- lm(mpg ~ wt + qsec + am, data = mtcars)

# Add the predictions and residuals:
df <- mtcars               %>% 
      add_predictions(lm1) %>%
      add_residuals(lm1)

# The predictions are now available in $pred:
head(df$pred)

# The residuals are now available in $resid:
head(df$resid)

# It is now easy to do something with those predictions and 
# residuals, e.g. the following 3 lines all do the same:
sum((df$pred - df$mpg)^2) / nrow(mtcars)
sum((df$resid)^2) / nrow(mtcars)
mse(lm1, mtcars)  # Check if this yields the same


d <- data_grid(mtcars, wt = seq_range(wt, 10), qsec, am) %>% 
     add_predictions(lm1)
plot(d$wt, d$pred)


# Create the sample:
SP500_sample <- sample(SP500,size=100)

# Change plotting to 4 plots in one output:
par(mfrow=c(2,2))

# The histogram of the complete dataset:
hist(SP500,main="(a) Histogram of all data",fr=FALSE,
     breaks=c(-9:5),ylim=c(0,0.4))

# The histogram of the sample:
hist(SP500_sample,main="(b) Histogram of the sample",
     fr=FALSE,breaks=c(-9:5),ylim=c(0,0.4))

# The boxplot of the complete dataset:
boxplot(SP500,main="(c) Boxplot of all data",ylim=c(-9,5))

# The boxplot of the complete sample:
boxplot(SP500_sample,main="(c) Boxplot of the sample",
        ylim=c(-9,5))

# Reset the plot parameters:
par(mfrow=c(1,1))


mean(SP500)
mean(SP500_sample)
sd(SP500)
sd(SP500_sample)


# Bootstrap generates a number of re-ordered datasets
boot <- bootstrap(mtcars, 3)
# The datasets are now in boot$strap[[n]]
# with n between 1 and 3

# e.g. the 3rd set is addressed as follows:
class(boot$strap[[3]])
nrow(boot$strap[[3]])
mean(as.data.frame(boot$strap[[3]])$mpg)

# It is also possible to coerce the selections into a data-frame:
df <- as.data.frame(boot$strap[[3]])
class(df)


set.seed(1871)
library(purrr)  # to use the function map()
boot <- bootstrap(mtcars, 150)
     
lmodels <- map(boot$strap, ~ lm(mpg ~ wt + hp + am:vs, data = .))

# The function tidy of broom turns a model object in a tibble:
df_mods <- map_df(lmodels, broom::tidy, .id = "id")

# Create the plots of histograms of estimates for the coefficients:
par(mfrow=c(2,2))
hist(subset(df_mods, term == "wt")$estimate, col="khaki3",
     main = '(a) wt', xlab = 'estimate for wt')
hist(subset(df_mods, term == "hp")$estimate, col="khaki3",
     main = '(b) hp', xlab = 'estimate for hp')
hist(subset(df_mods, term == "am:vs")$estimate, col="khaki3",
     main = '(c) am:vs', xlab = 'estimate for am:vs')
hist(subset(df_mods, term == "(Intercept)")$estimate, col="khaki3",
     main = '(d) intercept', xlab = 'estimate for the intercept')
par(mfrow=c(1,1))


d <- mtcars                # get data
set.seed(1871)             # set the seed for the random generator
idx.train <- sample(1:nrow(d),round(0.75*nrow(d)))
d.train <- d[idx.train,]   # positive matches for training set
d.test  <- d[-idx.train,]  # the opposite to the testing set


set.seed(1870)
sample_cars <- mtcars %>%
               resample(sample(1:nrow(mtcars),5)) # random 5 cars

# This is a resample object (indexes shown, not data):
sample_cars  

# Turn it into data:
as.data.frame(sample_cars)

# or into a tibble
as_tibble(sample_cars)

# or use the indices to get to the data:
mtcars[as.integer(sample_cars),]


library(modelr)
rs <- mtcars  %>%
      resample_partition(c(train = 0.6, test = 0.4))

# address the datasets with: as.data.frame(rs$train)
#                            as.data.frame(rs$test)

# Check execution:
lapply(rs, nrow)


# 0. Store training and test dataset for further use (optional):
d_train  <- as.data.frame(rs$train)
d_test   <- as.data.frame(rs$test)

# 1. Fit the model on the training dataset:
lm1      <- lm(mpg ~ wt + hp + am:vs, data = rs$train)

# 2. Calculate the desired performance measure (e.g.
# root mean square error (rmse)):
rmse_trn <- lm1 %>% rmse(rs$train)
rmse_tst <- lm1 %>% rmse(rs$test)
print(rmse_trn)
print(rmse_tst)


# 2. Add predictions and residuals:
x_trn  <- add_predictions(d_train, model = lm1) %>% 
          add_residuals(model = lm1)
x_tst  <- add_predictions(d_test,  model = lm1) %>% 
          add_residuals(model = lm1)


# 3. Calculate the desired risk metrics (via the residuals):
RMSE_trn  <- sqrt(sum(x_trn$resid^2) / nrow(d_train))
RMSE_tst  <- sqrt(sum(x_tst$resid^2) / nrow(d_test))
print(RMSE_trn)
print(RMSE_tst)


# Monte Carlo cross validation
cv_mc <- crossv_mc(data = mtcars, # the dataset to split
           n = 50,      # n random partitions train and test
	   test = 0.25, # validation set is 25%
	   id = ".id")  # unique identifier for each model

# Example of use:

# Access the 2nd test dataset:
d <- data.frame(cv_mc$test[2])

# Access mpg in that data frame:
data.frame(cv_mc$test[2])$mpg

# More cryptic notations are possible to obtain the same:
mtcars[cv_mc[[2]][[2]][2]$idx,1]


set.seed(1868)
library(modelr)     # sample functions
library(purrr)      # to use the function map()

cv_mc <- crossv_mc(mtcars, n = 50, test = 0.40)
mods  <- map(cv_mc$train, ~ lm(mpg ~ wt + hp + am:vs, data = .))
RMSE  <- map2_dbl(mods, cv_mc$test, rmse)
hist(RMSE, col="khaki3")


library(magrittr)  # to access the %T>% pipe 
crossv <- mtcars                                          %>% 
          crossv_mc(n = 50, test = 0.40)
RMSE <- crossv                                            %$%
        map(train, ~ lm(mpg ~ wt + hp + am:vs, data = .)) %>%
        map2_dbl(crossv$test, rmse)                      %T>%
        hist(col = "khaki3", main ="Histogram of RMSE", 
	     xlab = "RMSE")


library(modelr)
# k-fold cross validation
cv_k  <- crossv_kfold(data = mtcars, 
           k = 5,      # number of folds
           id = ".id") # unique identifier for each


cv_k$test


set.seed(1868)
library(modelr)
library(magrittr)  # to access the %T>% pipe
crossv <- mtcars                                           %>% 
          crossv_kfold(k = 5)                             
RMSE <- crossv                                             %$%
        map(train, ~ lm(mpg ~ wt + hp + am:vs, data = .))  %>%
        map2_dbl(crossv$test, rmse)                        %T>%
        hist(col = "khaki3", main ="Histogram of RMSE", 
	     xlab = "RMSE")


# Install quantmod:
if(!any(grepl("quantmod", installed.packages()))){
    install.packages("quantmod")}

# Load the library:
library(quantmod)


options("getSymbols.yahoo.warning"=FALSE)


# Download historic data of the Google share price:
getSymbols("GOOG", src = "yahoo")          # get Google's history
getSymbols(c("GS", "GOOG"), src = "yahoo") # to load more than one


setSymbolLookup(HSBC='yahoo',GOOG='yahoo')
setSymbolLookup(DEXJPUS='FRED')
setSymbolLookup(XPTUSD=list(name="XPT/USD",src="oanda"))

# Save the settings in a file:
saveSymbolLookup(file = "qmdata.rda")  
# Use this in new sessions calling:
loadSymbolLookup(file = "qmdata.rda")

# We can also download a list of symbols as follows:
getSymbols(c("HSBC","GOOG","DEXJPUS","XPTUSD")) 


stockList <- stockSymbols()  # get all symbols
nrow(stockList)     # number of symbols
colnames(stockList) # information in this list


getFX("EUR/PLN", from = "2019-01-01")


getSymbols("HSBC",src="yahoo") #get HSBC's data from Yahoo


# 1. The bar chart:
barChart(HSBC)     

# 2. The line chart:
lineChart(HSBC)
# Note: the lineChart is also the default that yields the same as plot(HSBC)

# 3. The candle chart:
candleChart(HSBC, subset='last 1 years',theme="white",
            multi.col=TRUE)


candleChart(HSBC,subset='2018::2018-01')
candleChart(HSBC,subset='last 1 months')


getSymbols(c("HSBC"))
chartSeries(HSBC, subset='last 4 months')
addBBands(n = 20, sd = 2, maType = "SMA", draw = 'bands', 
          on = -1)


myxtsdata["2008-01-01/2010-12-31"]  # between 2 date-stamps

# All data before or after a certain time-stamp:
xtsdata["/2007"]  # from start of data until end of 2007
xtsdata["2009/"]  # from 2009 until the end of the data

# Select the data between different hours:
xtsdata["T07:15/T09:45"]


HSBC['2017']    #returns HSBC's OHLC data for 2017
HSBC['2017-08'] #returns HSBC's OHLC data for August 2017
HSBC['2017-06::2018-01-15'] # from June 2017 to Jan 15 2018

HSBC['::']     # returns all data
HSBC['2017::'] # returns all data in HSBC, from 2017 onward
my.selection <- c('2017-01','2017-03','2017-11')
HSBC[my.selection]


last(HSBC)               # returns the last quotes
last(HSBC,5)             # returns the last 5 quotes
last(HSBC, '6 weeks')    # the last 6 weeks
last(HSBC, '-1 weeks')   # all but the last week
last(HSBC, '6 months')   # the last 6 months
last(HSBC, '3 years')    # the last 3 years

# these functions can also be combined:
last(first(HSBC, '3 weeks'), '5 days')


periodicity(HSBC)
unclass(periodicity(HSBC))
to.weekly(HSBC)
to.monthly(HSBC)
periodicity(to.monthly(HSBC))
ndays(HSBC); nweeks(HSBC); nyears(HSBC)


getFX("USD/EUR")
periodicity(USDEUR)
to.monthly(USDEUR)
periodicity(to.monthly(USDEUR))


endpoints(HSBC,on="years") 

# Find the maximum closing price each year:
apply.yearly(HSBC,FUN=function(x) {max(Cl(x)) } )

# The same thing - only more general:
subHSBC <- HSBC['2012::']
period.apply(subHSBC,endpoints(subHSBC,on='years'), FUN=function(x) {max(Cl(x))} )

# The following line does the same but is faster:
as.numeric(period.max(Cl(subHSBC),endpoints(subHSBC, on='years')))


seriesHi(HSBC)
has.Cl(HSBC)
tail(Cl(HSBC))


Lag(Cl(HSBC))
Lag(Cl(HSBC), c(1, 5, 10)) # One, five and ten period lags
Next(OpCl(HSBC))

# Open to close one, two and three-day lags:
Delt(Op(HSBC),Cl(HSBC),k=1:3)


dailyReturn(HSBC)   
weeklyReturn(HSBC)  
monthlyReturn(HSBC) 
quarterlyReturn(HSBC)
yearlyReturn(HSBC)
allReturns(HSBC)     # all previous returns


# First, we create a quantmod object. 
# At this point, we do not need to load data.

setSymbolLookup(SPY = 'yahoo', VXN = list(name = '^VIX', src = 'yahoo'))

qmModel <- specifyModel(Next(OpCl(SPY)) ~ OpCl(SPY) + Cl(VIX))
head(modelData(qmModel))


getSymbols('HSBC',src='yahoo') #google doesn't carry the adjusted price
lineChart(HSBC)


HSBC.tmp   <- HSBC["2010/"]          #see: subsetting for xts objects    


# use 70% of the data to train the model:
n          <- floor(nrow(HSBC.tmp) * 0.7)
HSBC.train <- HSBC.tmp[1:n]               # training data
HSBC.test  <- HSBC[(n+1):nrow(HSBC.tmp)]  # test-data
# head(HSBC.train)


# Making sure that whenever we re-run this the latest data is pulled in:
m.qm.tr <- specifyModel(Next(Op(HSBC.train)) ~ Ad(HSBC.train)
          + Hi(HSBC.train) - Lo(HSBC.train) + Vo(HSBC.train))

D <- modelData(m.qm.tr)


# Add the additional column:
D$diff.HSBC <- D$Hi.HSBC.train - D$Lo.HSBC.train 

# Note that the last value is NA:
tail(D, n = 3L)

# Since the last value is NA, let us remove it:
D <- D[-nrow(D),]                           


colnames(D) <- c("Next.Op","Ad","Hi","Lo","Vo","Diff")


m1 <- lm(D$Next.Op ~ D$Ad + D$Diff + D$Vo)
summary(m1)


m2 <- lm(D$Next.Op ~ D$Ad + D$Diff)
summary(m2)


qqnorm(m2$residuals)
qqline(m2$residuals, col = 'blue', lwd = 2)


m.qm.tst <- specifyModel(Next(Op(HSBC.test)) ~  Ad(HSBC.test) 
             + Hi(HSBC.test) - Lo(HSBC.test) + Vo(HSBC.test))

D.tst <- modelData(m.qm.tst)
D.tst$diff.HSBC.test <- D.tst$Hi.HSBC.test-D.tst$Lo.HSBC.test  
#tail(D.tst)                           # the last value is NA
D.tst <- D[-nrow(D.tst),]  # remove the last value that is NA

colnames(D.tst) <- c("Next.Op","Ad","Hi","Lo","Vo","Diff")


a   <- coef(m2)['(Intercept)']
bAd <- coef(m2)['D$Ad']
bD  <- coef(m2)['D$Diff']
est <- a + bAd * D.tst$Ad + bD * D.tst$Diff


# -- Mean squared prediction error (MSPE):
#sqrt(mean(((predict(m2,newdata = D.tst) - D.tst$Next.Op)^2)))
sqrt(mean(((est - D.tst$Next.Op)^2)))

# -- Mean absolute errors (MAE):
mean((abs(est - D.tst$Next.Op)))

# -- Mean absolute percentage error (MAPE):
mean((abs(est - D.tst$Next.Op))/D.tst$Next.Op)

# -- Squared sum of residuals:
print(sum(residuals(m2)^2))  

# -- Confidence intervals for the model:
print(confint(m2)) 


# Compare the coefficients in a refit:
m3 <- lm(D.tst$Next.Op ~ D.tst$Ad + D.tst$Diff)
summary(m3)


# MCDA ####

M0 <- matrix(c(
 1.6 , -0.83 , 1.4 , 4.7 , 1 , 0.9 , 1.1 ,
 1.8 , -0.83 , 1.0 , 4.7 , 1 , 0.9 , 0.8 ,
 1.8 , -0.83 , 1.2 , 4.7 , 1 , 0.9 , 0.6 ,
 1.6 , -1.24 , 1.4 , 2.8 , 1 , 0.9 , 0.8 ,
 0.9 , -0.83 , 1.4 , 4.7 , 1 , 0.7 , 0.8 ,
 0.9 , -0.83 , 0.8 , 4.7 , 1 , 0.7 , 0.6 ,
 0.7 ,  1.02 , 0.2 , 2.0 , 3 , 1.1 , 1.3 ,
 1.1 ,  0.52 , 1.0 , 1.3 , 3 , 0.6 , 0.9 ,
 1.2 , -0.83 , 1.3 , 4.7 , 1 , 0.8 , 0.5 ,
 0.9,  0.18 , 0.9 , 7.3 ,  1 , 0.8 , 0.6 ),
 byrow = TRUE, ncol = 7)
colnames(M0) <- c("tlnt","stab","cost","infl","trvl","infr","life")
# We use the IATA code of a nearby airport as abbreviation,
# so, instead of:
# rownames(M0) <- c("Bangalore", "Mumbai", "Delhi", "Manilla", "Hyderabad", 
#                   "Sao Polo", "Dublin", "Krakow", "Chennai", "Buenos Aires")
# ... we use this:
rownames(M0) <- c("BLR", "BOM", "DEL", "MNL", "HYD", "GRU", 
                  "DUB", "KRK", "MAA", "EZE")

M0  # inspect the matrix


# Political stability is a number between -2.5 and 2.5
# So, we make it all positive by adding 2.5:
M0[,2] <- M0[,2] + 2.5

# Lower wage inflation is better, so invert the data:
M0[,4] <- 1 / M0[,4]

# Then we define a function:

# mcda_rescale_dm 
# Rescales a decision matrix M
# Arguments:
#    M -- decision matrix
#         criteria in columns and higher numbers are better.
# Returns
#    M -- normalised decision matrix
mcda_rescale_dm <- function (M) {
  colMaxs <- function(M) apply(M, 2, max, na.rm = TRUE)
  colMins <- function(M) apply(M, 2, min, na.rm = TRUE)
  M <- sweep(M, 2, colMins(M), FUN="-")
  M <- sweep(M, 2, colMaxs(M) - colMins(M), FUN="/")
  M
}

# Use this function:
M <- mcda_rescale_dm(M0)


# Show the new decision matrix:
knitr::kable(round(M, 2))


# mcda_get_dominated
# Finds the alternatives that are dominated by others
# Arguments:
#    M -- normalized decision matrix with alternatives in rows,
#         criteria in columns and higher numbers are better.
# Returns
#    Dom -- prefM -- a preference matrix with 1 in position ij 
#                    if alternative i is dominated by alternative j.
mcda_get_dominated <- function(M) {
  Dom  <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  dominatedOnes <- c()
  for (i in 1:nrow(M)) {
    for (j in 1:nrow(M)) {
      isDom <- TRUE
      for (k in 1:ncol(M)) {
        isDom <- isDom && (M[i,k] >= M[j,k])
      }
      if(isDom && (i != j)) {
        Dom[j,i] <- 1
        dominatedOnes <- c(dominatedOnes,j)
      }
    }
  }
  colnames(Dom) <- rownames(Dom) <- rownames(M)
  Dom
}


# mcda_get_dominants
# Finds the alternatives that dominate others
# Arguments:
#    M -- normalized decision matrix with alternatives in rows,
#         criteria in columns and higher numbers are better.
# Returns
#    Dom -- prefM -- a preference matrix with 1 in position ij 
#                    if alternative i dominates alternative j.
mcda_get_dominants <- function (M) {
   M <- t(mcda_get_dominated(M))
   class(M) <- "prefM"
   M
   }


Dom <- mcda_get_dominants(M)
print(Dom)


# mcda_del_dominated
# Removes the dominated alternatives from a decision matrix
# Arguments:
#    M -- normalized decision matrix with alternatives in rows,
#         criteria in columns and higher numbers are better.
# Returns
#    A decision matrix without the dominated alternatives
mcda_del_dominated <- function(M) {
  Dom <- mcda_get_dominated(M)
  M[rowSums(Dom) == 0,]
}


M1 <- mcda_del_dominated(M)
knitr::kable(round(M1,2))


M1 <- mcda_rescale_dm(M1)


# First, we load diagram:
require(diagram)

# plot.prefM
# Specific function to handle objects of class prefM for the
# generic function plot()
# Arguments:
#    PM  -- prefM -- preference matrix
#    ... -- additional arguments passed to plotmat()
#           of the package diagram.
plot.prefM <- function(PM, ...)
{
  X <- t(PM) # We want arrows to mean '... is better than ...'
             # plotmat uses the opposite convention, because it expects flows.
  plotmat(X,  
          box.size    = 0.1, 
	  cex.txt     = 0, 
	  lwd         = 5 * X,  # lwd proportional to preference
	  self.lwd    = 3,
	  lcol        = 'blue',
	  self.shiftx = c(0.06, -0.06, -0.06, 0.06),
	  box.lcol    = 'blue',
	  box.col     = 'khaki3',
	  box.lwd     = 2,
	  relsize     = 0.9,
	  box.prop    = 0.5,
	  endhead     = FALSE,
	  main        = "",
	  ...)
}


# We pass the argument 'curve = 0' to the function plotmat, since otherwise 
# the arrow from BLR to MAA would be hidden after the box of EZE.
plot(Dom, curve = 0)


# mcda_wsm
# Calculated the Weigthed Sum MCDA for a decision matrix M and weights w.
# Arguments:
#    M -- normalized decision matrix with alternatives in rows,
#         criteria in columns and higher numbers are better.
#    w -- numeric vector of weights for the criteria
# Returns
#    a vector with a score for each alternative
mcda_wsm <- function(M, w) {
  X <- M %*% w
  colnames(X) <- 'pref'
  X
}


# The critia: "tlnt" "stab" "cost" "infl" "trvl" "infr" "life"
w <- c(        0.125, 0.2,   0.2,   0.2,  0.175,  0.05,  0.05)
w <- w / sum(w)  # the sum was 1 already, but just to be sure.

# Now we can execute our function mcda_wsm():
mcda_wsm(M1, w)


# mcda_wsm_score
# Returns the scores for each of the alternative for each of 
# the criteria weighted by their weights.
# Arguments:
#    M -- normalized decision matrix with alternatives in rows,
#         criteria in columns and higher numbers are better.
#    w -- numeric vector of weights for the criteria
# Returns
#    a score-matrix of class scoreM
mcda_wsm_score <- function(M, w) {
  X <- sweep(M1, MARGIN = 2, w, `*`)
  class(X) <- 'scoreM'
  X
}


# plot.scoreM
# Specific function for an object of class scoreM for the 
# generic function plot().
# Arguments:
#    M -- scoreM -- score matrix
# Returns:
#    plot
plot.scoreM <- function (M) {
  # 1. order the rows according to rowSums
  M <- M[order(rowSums(M), decreasing = T),]
  
  # 2. use a bar-plot on the transposed matrix
  barplot(t(M), 
     legend = colnames(M),
     xlab   = 'Score',
     col    = rainbow(ncol(M))
     )
}


# Whith the normalised decision matrix M1 and the weights w, we calculate the score matrix:
sM <- mcda_wsm_score(M1, w)

# Then we plot the result:
plot(sM)


# mcda_electre Type 2
# Push the preference matrixes PI.plus, PI.min and 
# PI.indif in the environment that calls this function.
# Arguments:
#    M -- normalized decision matrix with alternatives in rows,
#         criteria in columns and higher numbers are better.
#    w -- numeric vector of weights for the criteria
# Returns nothing but leaves as side effect:
#    PI.plus   -- the matrix of preference 
#    PI.min    -- the matrix of non-preference
#    PI.indif  -- the indifference matrix
mcda_electre <- function(M,  w) {
  # initializations
  PI.plus  <<- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  PI.min   <<- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  PI.indif <<- matrix(data=0, nrow=nrow(M), ncol=nrow(M))

  # calculate the preference matrix
  for (i in 1:nrow(M)){
    for (j in 1:nrow(M)) {
      for (k in 1:ncol(M)) {
        if (M[i,k] > M[j,k]) {
          PI.plus[i,j] <<- PI.plus[i,j] + w[k]
        }
        if (M[j,k] > M[i,k]) {
          PI.min[i,j] <<- PI.min[i,j] + w[k]
        }
        if (M[j,k] == M[i,k]) {
          PI.indif[j,i] <<- PI.indif[j,i] + w[k]
        }
      }
    }
  }
}


# mcda_electre1
# Calculates the preference matrix for the ELECTRE method
# Arguments:
#    M -- decision matrix (colnames are criteria, rownames are alternatives)
#    w -- vector of weights
#    Lambda -- the cutoff for the levels of preference
#    r -- the vector of maximum inverse preferences allowed
#    index -- one of ['C1', 'C2']
# Returns:
#    object of class prefM (preference matrix)
mcda_electre1 <- function(M,  w, Lambda, r, index='C1') {
  # get PI.plus, PI.min and PI.indif 
  mcda_electre(M,w)
  
  # initializations
  CM <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  PM <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  colnames(PM) <- rownames(PM) <- rownames(M)
  
  # calcualte the preference matrix
  if (index == 'C1') {
    # for similarity index C1
    for (i in 1:nrow(M)){
      for (j in 1:nrow(M)) {
        CM[i,j] <- (PI.plus[i,j] + PI.indif[i,j]) / (PI.plus[i,j] + 
	            PI.indif[i,j] + PI.min[i,j])
        if((CM[i,j] > Lambda) && ((M[j,] - M[i,]) <= r) && 
	  (PI.plus[i,j] > PI.min[i,j])) PM[i,j] = 1
      }
    }
  } else {
    # for similarity index C2
    for (i in 1:nrow(M)){
      for (j in 1:nrow(M)) {
        if (PI.min[i,j] != 0) 
        {CM[i,j] <- (PI.plus[i,j]) / (PI.min[i,j])}
        else
        {CM[i,j] = 1000 * PI.plus[i,j]} # to avoid dividing by 0
        if((CM[i,j] > Lambda) && ((M[j,] - M[i,]) <= r) && 
	   (PI.plus[i,j] > PI.min[i,j])) {PM[i,j] = 1}
      }
    }
  }
  for (i in 1:nrow(PM)) PM[i,i] = 0
  class(PM) <- 'prefM'
  PM
}


list(PI.plus = PI.plus, PI.min = PI.min, PI.indif = PI.indif)


# If we did not push the values PI.plus, PI.min, and 
# PI.indif into the environment of this functions, we would write:
X <- mcda_electre(M,w)
# and then address X as in the following code as follows:
X$PI.min[i,j]


# the critia: "tlnt" "stab" "cost" "infl" "trvl" "infr" "life"
w <- c(        0.125, 0.2,   0.2,   0.2,  0.175,  0.05,  0.05)
w <- w / sum(w)  # the sum was 1 already, but just to be sure.
r  <- c(0.3,    0.5,  0.5,   0.5,   1,     0.9,   0.5)

eM <- mcda_electre1(M1, w, Lambda=0.6, r=r)
print(eM)
plot(eM)


# the critia: "tlnt" "stab" "cost" "infl" "trvl" "infr" "life"
w <- c(        0.125, 0.2,   0.2,   0.2,  0.175,  0.05,  0.05)
w <- w / sum(w)  # the sum was 1 already, but just to be sure.
r  <- c(0.3,    0.5,  0.5,   0.5,   1,     0.9,   0.5)

eM <- mcda_electre1(M1, w, Lambda=1.25, r=r, index='C2')
plot(eM)


# The critia: "tlnt" "stab" "cost" "infl" "trvl" "infr" "life"
w <- c(        0.125, 0.2,   0.2,   0.2,  0.175,  0.05,  0.05)
w <- w / sum(w)  # the sum was 1 already, but just to be sure.
r  <- c(1,    1,  1,   1,   1,     1,   1)

eM <- mcda_electre1(M1, w, Lambda = 0.0, r = r)
print(eM)
plot(eM)


mcda_electre2 <- function (M1, w) {
  r <- rep(1L, ncol(M))
  mcda_electre1(M1, w, Lambda = 0.0, r = rep(1, length.out = length(w)))
  }


sum(rowSums(prefM)) == A * A - A
# with prefM the preference matrix and 
#      A the number of alternatives.


library(ggplot2)
library(latex2exp)
d <- seq(from = -3, to = +3, length.out = 100)

## error function family:
erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1
# (see Abramowitz and Stegun 29.2.29)
erfc <- function(x) 2 * pnorm(x * sqrt(2), lower = FALSE)
erfinv <- function (x) qnorm((1 + x)/2)/sqrt(2)
erfcinv <- function (x) qnorm(x/2, lower = FALSE)/sqrt(2)
 
## Gudermannian function
gd <- function(x) asin(tanh(x))

f1 <- function(x) erf( sqrt(pi) / 2 * x)
f2 <- function(x) tanh(x)
f3 <- function(x) 2 / pi * gd(pi / 2 * x)
f4 <- function(x) x / sqrt(1 + x^2)
f5 <- function(x) 2 / pi * atan(pi / 2 * x)
f6 <- function(x) x / (1 + abs(x))

df <- data.frame(d = d, y = f1(d), Function = "erf( sqrt(pi) / 2 * d)")
df <- rbind(df, data.frame(d = d, y = f2(d), Function = "tanh(d)"))
df <- rbind(df, data.frame(d = d, y = f3(d), Function = "2 / pi * gd(pi / 2 * d)"))
df <- rbind(df, data.frame(d = d, y = f4(d), Function = "d / (1 + d^2)"))
df <- rbind(df, data.frame(d = d, y = f5(d), Function = "2 / pi * atan(pi / 2 * d)"))
df <- rbind(df, data.frame(d = d, y = f6(d), Function = "x / (1 + abs(d))"))

fn <- ""
fn[1] <- "erf \\left(\\frac{\\sqrt{\\pi} d}{2}\\right)"
fn[2] <- "tanh(x)"
fn[3] <- "\\frac{2}{\\pi} gd\\left( \\frac{\\pi d}{2} \\right)"
fn[4] <- "\\frac{d}{1 + d^2}"
fn[5] <- "\\frac{2}{\\pi} atan\\left(\\frac{\\pi d}{2}\\right)"
fn[6] <- "\\frac{x}{1+ |x|}"


ggplot(data = df, aes(x = d, y = y, color = Function)) +
   geom_line(aes(col = Function), lwd=2) +
   guides(color=guide_legend(title=NULL)) +
   scale_color_discrete(labels=lapply(sprintf('$\\pi(d) = %s$', fn), TeX)) + 
   theme(legend.justification = c(1, 0), 
        legend.position = c(1, 0),   # south east
        legend.box.margin=ggplot2::margin(rep(20, times=4)),
        legend.key.size = unit(1.5, "cm")  # increase vertical space between legend items
	) +
   ylab(TeX('Preference --- $\\pi$'))


f_curve <- function(f) {
  g <- Vectorize(f)
  s <- deparse(f)[2]
   curve(g, xlab = '', ylab = '', col = 'red', lwd = 3, 
         from = -1, to = +1,
         main = bquote(bold(.(s)))
#          main = s
	  )
   }

gaus <- function(x) exp (-(x-0)^2 / (0.5)^2)
f1 <- function(x) ifelse(x<0, 0, - 3/2 * x^5 + 5/2 * x^3)
f2 <- function(x) ifelse(x<0, 0, sin(pi * x / 2))
f3 <- function(x) min(1, max(1.5*x-0.2, 0))
f4 <- function(x) ifelse(x<0, 0, x)
f5 <- function(x) ifelse(x < 0, 0, 1 - gaus(x))
f6 <- function(x) 1+tanh(6*(x-0.6))

par(mfrow=c(3,2))
f_curve(f1)
f_curve(f2)
f_curve(f3)
f_curve(f4)
f_curve(f5)
f_curve(f6)
par(mfrow=c(1,1))


# mcda_promethee
# delivers the preference flow matrices for the Promethee method
# Arguments:
#    M      -- decision matrix
#    w      -- weights
#    piFUNs -- a list of preference functions, 
#              if not provided min(1,max(0,d)) is assumed.
# Returns (as side effect)
# phi_plus <<- rowSums(PI.plus)
# phi_min  <<- rowSums(PI.min)
# phi_     <<- phi_plus - phi_min
#
mcda_promethee <- function(M, w, piFUNs='x')
{
  if (piFUNs == 'x') {
       # create a factory function:
       makeFUN <- function(x) {x; function(x) max(0,x) }
       P <- list()
       for (k in 1:ncol(M)) P[[k]] <- makeFUN(k)
       } # in all other cases we assume a vector of functions
# initializations
PI.plus  <<- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
PI.min   <<- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
# calculate the preference matrix
for (i in 1:nrow(M)){
  for (j in 1:nrow(M)) {
    for (k in 1:ncol(M)) {
      if (M[i,k] > M[j,k]) {
        PI.plus[i,j] = PI.plus[i,j] + w[k] * P[[k]](M[i,k] - M[j,k])
      }
      if (M[j,k] > M[i,k]) {
        PI.min[i,j] = PI.min[i,j] + w[k] * P[[k]](M[j,k] - M[i,k])
      }
    }
  }
}
# note the <<- which pushes the results to the upwards environment
phi_plus <<- rowSums(PI.plus)
phi_min  <<- rowSums(PI.min)
phi_     <<- phi_plus - phi_min
}


# mcda_promethee1
# Calculates the preference matrix for the Promethee1 method
# Arguments:
#    M      -- decision matrix
#    w      -- weights
#    piFUNs -- a list of preference functions, 
#              if not provided min(1,max(0,d)) is assumed.
# Returns:
#    prefM object -- the preference matrix
#
mcda_promethee1 <- function(M, w, piFUNs='x') {
  # mcda_promethee adds phi_min, phi_plus & phi_ to this environment:
  mcda_promethee(M, w, piFUNs='x') 
  
  # Now, calculate the preference relations:
  pref     <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
    for (i in 1:nrow(M)){
      for (j in 1:nrow(M)) {
        if (phi_plus[i] == phi_plus[j] && phi_min[i]==phi_min[j]) {
	    pref[i,j] <- 0
	  }
	  else if ((phi_plus[i] > phi_plus[j] && 
	            phi_min[i] < phi_min[j] ) || 
	          (phi_plus[i] >= phi_plus[j] && 
		    phi_min[i] < phi_min[j] )) {
	    pref[i,j] <- 1
	  }
	  else {
	    pref[i,j] = NA
	  }
	}
      }
  rownames(pref) <- colnames(pref) <- rownames(M)
  class(pref)    <- 'prefM'
  pref
}


# We reuse the decision matrix M1 and weights w as defined above.
m <- mcda_promethee1(M1, w)


# We reuse the decision matrix M1 and weights w as defined above.
m <- mcda_promethee1(M1, w)
plot(m)


# Make shortcuts for some of the functions that we will use:
gauss_val <- function(d) 1 - exp(-(d - 0.1)^2 / (2 * 0.5^2))
x         <- function(d) max(0,d)
minmax    <- function(d) min(1, max(0,2*(d-0.5)))
step      <- function(d) ifelse(d > 0.5, 1,0)

# Create a list of 7 functions (one per criterion):
f <- list()
f[[1]] <- gauss_val
f[[2]] <- x
f[[3]] <- x
f[[4]] <- gauss_val
f[[5]] <- step
f[[6]] <- x
f[[7]] <- minmax

# Use the functions in mcda_promethee1:
m <- mcda_promethee1(M1, w, f)

# Plot the results:
plot(m)


library(latex2exp)
f_curve <- function(f) {
  g <- Vectorize(f)
   curve(g, xlab = '', ylab = '', col = 'red', lwd = 3, 
         from = -1, to = +1,
         main = TeX(toString(deparse(f)[2])))
   }

gaus <- function(x) exp (-(x-0)^2 / 0.5)
f1 <- function(x) - 3/2 * x^5 + 5/2 * x^3
f2 <- function(x) sin(pi * x / 2)
f3 <- function(x) min(1, max(2*x, -1))
f4 <- function(x) x
f5 <- function(x) ifelse(x < 0 , gaus(x) - 1, 1 - gaus(x))
f6 <- function(x) tanh(3 * x)

par(mfrow=c(3,2))
f_curve(f1)
f_curve(f2)
f_curve(f3)
f_curve(f4)
f_curve(f5)
f_curve(f6)
par(mfrow=c(1,1))


# mcda_promethee2
# Calculates the Promethee2 preference matrix
# Arguments:
#    M      -- decision matrix
#    w      -- weights
#    piFUNs -- a list of preference functions, 
#              if not provided min(1,max(0,d)) is assumed.
# Returns:
#    prefM object -- the preference matrix
#
mcda_promethee2 <- function(M, w, piFUNs='x')
 { # promethee II
  mcda_promethee(M, w, piFUNs='x')
  pref     <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
    for (i in 1:nrow(M)){
      for (j in 1:nrow(M)) {
          pref[i,j] <- max(phi_[i] - phi_[j],0)
        }
      }
rownames(pref) <- colnames(pref) <- rownames(M)
class(pref) <- 'prefM'
pref
}


m <- mcda_promethee2(M1, w)
plot(m)


# We can consider the rowSums as a "score".
rowSums(m)

# So, consider the prefM as a score-matrix (scoreM):
plot.scoreM(m)


pca1 <- prcomp(M1) 
summary(pca1)

# plot for the prcomp object shows the variance explained by each PC
plot(pca1, type = 'l')

# biplot shows a projection in the 2D plane (PC1, PC2)
biplot(pca1)


library(ggplot2)
library(ggfortify)
library(cluster)

# Autoplot with labels colored
autoplot(pca1, data = M1, label = TRUE, shape = FALSE, colour='cost', label.size = 6,
         loadings = TRUE, loadings.colour = 'blue',
         loadings.label = TRUE, loadings.label.size = 6
         )

# Autoplot with visualization of 2 clusters
autoplot(fanny(M1,2), label=TRUE, frame=TRUE, shape = FALSE, label.size = 6,
         loadings = TRUE,  loadings.colour = 'blue',
         loadings.label = TRUE, loadings.label.size = 6)


# Use the weights as defined above:
w

# Calculate coordinates
dv1 <- sum( w * pca1$rotation[,1])  # decision vector PC1 component
dv2 <- sum( w * pca1$rotation[,2])  # decision vector PC2 component

p <- autoplot(pam(M1,2), frame=TRUE, frame.type='norm', label=TRUE, 
         shape=FALSE,
         label.colour='blue',label.face='bold', label.size=6,
         loadings=TRUE,  loadings.colour = 'dodgerblue4',
         loadings.label = TRUE, loadings.label.size = 6, 
	 loadings.label.colour='dodgerblue4',
         loadings.label.vjust = 1.2, loadings.label.hjust = 1.3
         )
p <- p + scale_y_continuous(breaks = 
                        round(seq(from = -1, to = +1, by = 0.2), 2))
p <- p + scale_x_continuous(breaks = 
                        round(seq(from = -1, to = +1, by = 0.2), 2))
p <- p + geom_segment(aes(x=0, y=0, xend=dv1, yend=dv2), size = 2,
                      arrow = arrow(length = unit(0.5, "cm")))
p <- p + ggplot2::annotate("text", x = dv1+0.2, y = dv2-0.01, 
                  label = "decision vector",
                  colour = "black", fontface = 2)
p


### Outrank
# M is the decision matrix (formulated for a maximum problem)
# w the weights to be used for each rank
outrank <- function (M, w)
{
  order      <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  order.inv  <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  order.pref <- matrix(data=0, nrow=nrow(M), ncol=nrow(M))
  
  for (i in 1:nrow(M)){
    for (j in 1:nrow(M)) {
      for (k in 1:ncol(M)) {
        if (M[i,k] > M[j,k]) { order[i,j] = order[i,j] + w[k] }
        if (M[j,k] > M[i,k]) { order.inv[i,j] = order.inv[i,j] + w[k] }
      }
    }
  }
  for (i in 1:nrow(M)){
    for (j in 1:nrow(M)) {
      if (order[i,j] > order[j,i]){
        order.pref[i,j] = 1
        order.pref[j,i] = 0
      }
      else if (order[i,j] < order[j,i]) {
        order.pref[i,j] = 0
        order.pref[j,i] = 1
      }
      else {
        order.pref[i,j] = 0
        order.pref[j,i] = 0
      }
    }
  }
 class(order.pref) <- 'prefM'
 order.pref
}