library(tidyverse)
splitsfile <- "~/Downloads/RSA 2018 - DDSS - Presentation (5).lss"
splits <- XML::xmlParse(splitsfile)
splits <- XML::xmlToList(splits)
class(splits[['AttemptHistory']][1][['Attempt']][['.attrs']][['ended']])
chunk2 <- do.call(rbind, lapply(1:length(splits[['AttemptHistory']]), function(i){
# print(i)
attempt <- splits[['AttemptHistory']][[i]]
# print(names(splits[['AttemptHistory']][[i]]))
if ('RealTime' %in% names(attempt) | 'PauseTime' %in% names(attempt)) {
df <- tibble(attemptID=attempt[['.attrs']][['id']],
start=attempt[['.attrs']][['started']],
end=attempt[['.attrs']][['ended']])
} else {
df <- tibble(attemptID=attempt[['id']],
start=attempt[['started']],
end=attempt[['ended']])
}
if ('RealTime' %in% names(attempt)) {
df$runtime <- attempt[['RealTime']]
} else {
df$runtime <- NA
}
df
}))
chunk2$runtime <- unlist(lapply(chunk2$runtime, function(t) {
dt <- unlist(stringr::str_split(t, ":"))
as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="secs")
chunk2$start <- lubridate::mdy_hms(chunk2$start)
chunk2$end <- lubridate::mdy_hms(chunk2$end)
chunk <- do.call(rbind, lapply(splits[['Segments']], function(segments) {
segments.df <- do.call(rbind, lapply(segments[['SegmentHistory']], function(segment) {
if ('RealTime' %in% names(segment))
data.frame(`attemptID` = segment$.attrs['id'],
RealTime = segment$RealTime,
time = segment$S)
}))
segments.df$name <- rep(segments$Name, nrow(segments.df))
segments.df
}))
chunk$RealTime <- unlist(lapply(chunk$RealTime, function(t) {
dt <- unlist(stringr::str_split(t, ":"))
as.difftime(as.double(dt[1])*60*60 + as.double(dt[2])*60 + as.double(dt[3]), units="secs")
})) %>% as.difftime(units="mins")
chunk$name <- factor(chunk$name, levels=unique(chunk$name))
chunk <- chunk %>%
group_by(name) %>%
mutate(n=n()) %>%
ungroup()
labels <- unique(paste0(chunk$name, " - ", chunk$n, " attempts"))
names(labels) <- unique(chunk$name)
units(chunk$RealTime) <- "mins"
glimpse(chunk)
glimpse(chunk2)
ggplot(chunk) +
geom_line(aes(x=attemptID, y=RealTime, group=name, color=name)) +
labs(x="Run", y="Time in minutes", title="Presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
chunk %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=attemptID, y=RealTime, group=name, color=name)) +
geom_point() +
geom_smooth() +
facet_wrap(~name) +
labs(x="Run", y="Time in minutes", title="Presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
chunk %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=attemptID, y=RealTime, group=forcats::fct_rev(name), color=name)) +
geom_line(position="stack") +
labs(x="Run", y="Time in minutes", title="Stacked presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
chunk2 %>%
mutate(runtime = as.difftime(runtime/60, units="mins")) %>%
filter(!is.na(runtime)) %>%
mutate(attemptID = as.integer(attemptID)) %>%
# filter(runtime > 20) %>%
glimpse() %>%
ggplot() +
geom_line(aes(x=attemptID, y=runtime, group=1)) +
# annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) +
labs(x="Run", y="Time in minutes", title="Overall run time") +
scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) +
scale_x_continuous(expand=c(0,0), limits=c(3,NA)) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank()
)
# compare runtime to derived time
## doesn't look like runtime is accurate and the NA's are when the run wasn't completed
chunk2 %>%
mutate(runtime.derived = end-start) %>%
mutate(diff = runtime.derived-runtime)
chunk2 %>%
mutate(runtime = as.difftime(runtime/60, units="mins")) %>%
filter(!is.na(runtime)) %>%
mutate(attemptID = as.integer(attemptID)) %>%
# filter(runtime > 20) %>%
glimpse() %>%
ggplot() +
geom_line(aes(x=start, y=runtime, group=1)) +
geom_point(aes(x=start, y=runtime, group=1)) +
# annotate("segment", x=-Inf, xend=Inf, y=0, yend=0) +
labs(x="Date", y="Time in minutes", title="Overall run time") +
scale_y_continuous(expand=c(0,0)) + #, limits=c(0,NA)) +
scale_x_datetime(expand=c(0,0)) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank()
)
chunk %>%
left_join(chunk2, by="attemptID") %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=start, y=RealTime, group=name, color=name)) +
geom_point() +
geom_smooth() +
facet_wrap(~name) +
labs(x="Date", y="Time in minutes", title="Presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_datetime(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)
chunk %>%
left_join(chunk2, by="attemptID") %>%
filter(attemptID %in% 7:20) %>% # remove columns that are incomplete
filter(!(name == "Introduction VMOS and Swot" & attemptID %in% c("9", "14", "18"))) %>% # filter outliers
ggplot(aes(x=start, y=RealTime, group=forcats::fct_rev(name), color=name)) +
geom_line(position="stack") +
labs(x="Date", y="Time in minutes", title="Stacked presentation section times") +
scale_y_continuous(expand=c(0,0)) +
scale_x_datetime(expand=c(0,0)) +
scale_color_discrete(guide=guide_legend(title="Section", title.position="top")) +
theme_classic() +
theme(
axis.title.y = element_text(hjust=0),
axis.title.x = element_text(hjust=0),
panel.grid.major.y = element_line(color="gray65", size=rel(0.5), linetype="dashed"),
panel.grid.minor.y = element_line(color="gray85", size=rel(0.5), linetype="dashed"),
axis.ticks = element_blank(),
legend.position="bottom"
)