---
title: "Divsersity in Action"
output:
flexdashboard::flex_dashboard:
theme: cosmo
orientation: rows
vertical_layout: fill
#storyboard: true
social: menu
source: embed
---
```{r}
# (C) Philippe J.S. De Brouwer -- 2019
# demo: http://rpubs.com/phdb/diversity_dash01
```
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(ggplot2)
library(knitr)
library(gridExtra)
#install.packages('plotly')
library(plotly)
N <- 150
set.seed(2020)
d0 <- data.frame("ID" = 1:N,
"age" = round(rlnorm(N, log(30), log(1.25))),
"continent" = ifelse(runif(N) < 0.3, "America", ifelse(runif(N) < 0.7, "Europe","Other")),
"gender" = ifelse(runif(N) < 0.4, "F", "M"),
"grade" = 0,
"team" = ifelse(runif(N) < 0.6, "bigTeam", ifelse(runif(N) < 0.6,
"mediumTeam", ifelse(runif(N) < 0.8, "smallTeam", "XsmallTeam"))),
"dependants" = round(rlnorm(N,log(0.65),log(1.5))),
"performance" = ifelse(runif(N) < 0.1, "L", ifelse(runif(N) < 0.6, "M",
ifelse(runif(N) < 0.7, "H", "XH"))),
"salary" = 0,
"timestamp" = as.Date("2020-01-01")
)
d1 <- d0 %>%
mutate(age = ifelse((age < 18), age + 10, age)) %>%
mutate(grade = ifelse(runif(N) * age < 20, 0, ifelse(runif(N) * age < 25, 1, ifelse(runif(N) * age < 30, 2, 3)))) %>%
mutate(salary = round(exp(0.75*grade)*4000 + rnorm(N,0,2500))) %>%
mutate(lastPromoted = round(exp(0.05*(3-grade))*1 + abs(rnorm(N,0,5))) -1)
```
Overview
========
Row
-------------------------------------
```{r}
# our diversity function
diversity <- function(x, prior = NULL) {
if (min(x) <= 0) {return(0);} # the log will fail for 0
# if the numbers are higher than 1, then not probabilities but
# populations are given, so we rescale to probabilities:
if (sum(x) != 1) {x <- x / sum(x)}
N <- length(x)
if(!is.null(prior)) {
for (i in (1:N)) {
a <- (1 - 1 / (N * prior[i])) / (1 - prior[i])
b <- (1 - N * prior[i]^2) / (N * prior[i] * (1 - prior[i]))
x[i] <- a * x[i]^2 + b * x[i]
}
}
f <- function(x) x * log(x)
x1 <- mapply(FUN = f, x)
- sum(x1) / log(N)
}
# the gauges for the different dimensions
```
### Gender
```{r}
# ranges:
rGreen <- c(0.900001, 1)
rAmber <- c(0.800001, 0.9)
rRed <- c(0, 0.8)
iGender <- round(diversity(table(d1$gender)),3)
gauge(iGender, min = 0, max = 1, gaugeSectors(
success = rGreen, warning = rAmber, danger = rRed
))
kable(table(d1$gender))
```
### Age
```{r}
# consider each band of ten years as a group
iAge <- round(diversity(table(round(d1$age/10))),3)
gauge(iAge, min = 0, max = 1, gaugeSectors(
success = rGreen, warning = rAmber, danger = rRed
))
kable(table(round(d1$age/10)*10))
```
### Roots
```{r}
iRoots <- round(diversity(table(d1$continent)),3)
gauge(iRoots, min = 0, max = 1, gaugeSectors(
success = rGreen, warning = rAmber, danger = rRed
))
kable(table(d1$continent))
```
### Dependants
```{r}
# we only monitor if someone has dependants
xdep <- ifelse(d1$dependants >= 1, 1, 0)
iDep <- round(diversity(table(xdep)),3)
gauge(iDep, min = 0, max = 1, gaugeSectors(
success = rGreen, warning = rAmber, danger = rRed
))
kable(table(d1$dependants))
```
Row
-------------------------------------
### Info
The diversity indeces show how diverse our workforce is. They are calculated similar to entropy: $I = -\frac{1}{\log(N)} \sum_i^N {p_i \log p_i}$, where there are $N$ possible and mutually exclusive states $i$. They range from $0$ to $1$.
### Average Diversity Index
```{r}
x <- mean(c(iGender, iAge, iDep, iRoots))
valueBox(x,
icon = ifelse(x > 0.9, "fa-smile" , ifelse(x > 0.8, "fa-meh", "fa-sad-tear")),
color = ifelse(x > 0.9, "success" , ifelse(x > 0.8, "warning", "danger"))
)
```
Gender
======================================
Row {.tabset}
-------------------------------------
### Composition
```{r}
p2 <- ggplot(data = d1, aes(x=gender, fill=gender)) +
geom_bar(stat="count", width=0.7) +
facet_grid(rows=d1$grade) +
ggtitle('workforce composition i.f.o. salary grade (level in the company)')
ggplotly(p2)
```
### Salary
```{r}
p1 <- ggplot(data = d1, aes(x=gender, y=salary, fill=gender)) +
geom_boxplot() +
facet_grid(rows=d1$grade) +
ggtitle('The salary gap per salary grade (level in the company)')
ggplotly(p1)
```
### Promotions
```{r}
d1$promoted = ifelse(d1$lastPromoted <= 2,1,0)
p <- ggplot(data = d1, aes(x=gender, fill=gender, y=promoted)) +
stat_summary(fun.y="mean", geom="bar") +
facet_grid(rows=d1$grade) +
ggtitle('promotion propensity per grae')
p
ggplotly(p)
```
Age
===
Row {.tabset}
-------------------------------------
### Histogram
```{r}
qplot(d1$age, geom="histogram", binwidth=5,
fill=I("steelblue"), col=I("black")
) +
xlab('Age') +
ggtitle('Histogram for Age')
```
Roots {.tabset}
=====
Row {.tabset}
-------------------------------------
### rworldmap
```{r warning=FALSE}
# the default R-approach:
install.packages('rworldmap')
library(rworldmap)
nbrPerCountry = read.table(text="
country value
Poland 100
Ukraine 65
UK 2
USA 1
China 3
Germany 0
France 1
Italy 20
Greece 25
Spain 13
Portugal 7
Mexico 55
Belarus 5
Russia 7
Vietnam 1
India 25
Belgium 2
Chile 6
Congo 1
", header=T)
x <- joinCountryData2Map(nbrPerCountry, joinCode="NAME", nameJoinColumn="country")
mapCountryData(x, nameColumnToPlot="value", catMethod="fixedWidth")
```
### leaflet
```{r}
#install.packages('maps')
library(maps)
#install.packages('sp')
library(sp)
#install.packages('leaflet')
library(leaflet)
map <- leaflet() %>%
addProviderTiles(providers$OpenStreetMap) %>%
setView(lng = 0, lat = 0, zoom = 2)
map
```
Dependants
==========
Row {.tabset}
-------------------------------------
### Histogram
```{r}
qplot(d1$dependants, geom="histogram", binwidth=1,
fill=I("steelblue"), col=I("black")
) +
xlab('Dependants') +
ggtitle('Histogram for number of dependants')
```