print("Before we dive into plots...")
[1] "Before we dive into plots..."
This is the IPython Notebook for R (a native R kernel lets you run R code): https://github.com/takluyver/IRkernel
batting_table <- read.csv("/home/marianne/Downloads/Batting.csv")
head(batting_table)
playerID yearID stint teamID lgID G G_batting AB R H X2B X3B HR RBI SB CS 1 aardsda01 2004 1 SFN NL 11 11 0 0 0 0 0 0 0 0 0 2 aardsda01 2006 1 CHN NL 45 43 2 0 0 0 0 0 0 0 0 3 aardsda01 2007 1 CHA AL 25 2 0 0 0 0 0 0 0 0 0 4 aardsda01 2008 1 BOS AL 47 5 1 0 0 0 0 0 0 0 0 5 aardsda01 2009 1 SEA AL 73 3 0 0 0 0 0 0 0 0 0 6 aardsda01 2010 1 SEA AL 53 4 0 0 0 0 0 0 0 0 0 BB SO IBB HBP SH SF GIDP G_old 1 0 0 0 0 0 0 0 11 2 0 0 0 0 1 0 0 45 3 0 0 0 0 0 0 0 2 4 0 1 0 0 0 0 0 5 5 0 0 0 0 0 0 0 NA 6 0 0 0 0 0 0 0 NA
summary(batting_table)
playerID yearID stint teamID lgID mcguide01: 31 Min. :1871 Min. :1.000 CHN : 4720 AA : 1890 henderi01: 29 1st Qu.:1931 1st Qu.:1.000 PHI : 4621 AL :44369 newsobo01: 29 Median :1970 Median :1.000 PIT : 4575 FL : 470 johnto01 : 28 Mean :1962 Mean :1.077 SLN : 4535 NL :49944 kaatji01 : 28 3rd Qu.:1995 3rd Qu.:1.000 CIN : 4393 PL : 147 ansonca01: 27 Max. :2013 Max. :5.000 CLE : 4318 UA : 332 (Other) :97717 (Other):70727 NA's: 737 G G_batting AB R Min. : 1.00 Min. : 0.00 Min. : 0.0 Min. : 0.00 1st Qu.: 13.00 1st Qu.: 7.00 1st Qu.: 9.0 1st Qu.: 0.00 Median : 35.00 Median : 32.00 Median : 61.0 Median : 5.00 Mean : 51.65 Mean : 49.13 Mean :154.1 Mean : 20.47 3rd Qu.: 81.00 3rd Qu.: 81.00 3rd Qu.:260.0 3rd Qu.: 31.00 Max. :165.00 Max. :165.00 Max. :716.0 Max. :192.00 NA's :1406 NA's :6413 NA's :6413 H X2B X3B HR Min. : 0.00 Min. : 0.0 Min. : 0.000 Min. : 0.000 1st Qu.: 1.00 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.: 0.000 Median : 12.00 Median : 2.0 Median : 0.000 Median : 0.000 Mean : 40.37 Mean : 6.8 Mean : 1.424 Mean : 3.002 3rd Qu.: 66.00 3rd Qu.:10.0 3rd Qu.: 2.000 3rd Qu.: 3.000 Max. :262.00 Max. :67.0 Max. :36.000 Max. :73.000 NA's :6413 NA's :6413 NA's :6413 NA's :6413 RBI SB CS BB Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.00 Median : 5.00 Median : 0.000 Median : 0.000 Median : 4.00 Mean : 18.47 Mean : 3.265 Mean : 1.385 Mean : 14.21 3rd Qu.: 28.00 3rd Qu.: 2.000 3rd Qu.: 1.000 3rd Qu.: 21.00 Max. :191.00 Max. :138.000 Max. :42.000 Max. :232.00 NA's :6837 NA's :7713 NA's :29867 NA's :6413 SO IBB HBP SH Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.000 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000 Median : 11.00 Median : 0.00 Median : 0.000 Median : 1.000 Mean : 21.95 Mean : 1.28 Mean : 1.136 Mean : 2.564 3rd Qu.: 31.00 3rd Qu.: 1.00 3rd Qu.: 1.000 3rd Qu.: 3.000 Max. :223.00 Max. :120.00 Max. :51.000 Max. :67.000 NA's :14251 NA's :42977 NA's :9233 NA's :12751 SF GIDP G_old Min. : 0.0 Min. : 0.00 Min. : 0.00 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 11.00 Median : 0.0 Median : 1.00 Median : 34.00 Mean : 1.2 Mean : 3.33 Mean : 50.99 3rd Qu.: 2.0 3rd Qu.: 5.00 3rd Qu.: 82.00 Max. :19.0 Max. :36.00 Max. :165.00 NA's :42446 NA's :32521 NA's :5189
batting_table <- subset(batting_table, yearID >= 2004)
batting_table$Avg <- with(batting_table, H / AB)
library(ggplot2)
ggplot(data=batting_table) + geom_histogram(aes(Avg), binwidth=0.05)
Let us filter out entries where players were at bat less than 10 times.
batting_table <- subset(batting_table, AB >= 10)
hist <- ggplot(data=batting_table) + geom_histogram(aes(Avg), binwidth=0.05)
hist
Let me share this plot, so that others can contribute! I want to install the latest version of the plotly package and load it (silently).
suppressPackageStartupMessages(library(devtools))
suppressMessages(install_github("ropensci/plotly"))
suppressPackageStartupMessages(library(plotly))
Open a Plotly connection (using your own credentials if you prefer).
py <- plotly("ggplot2examples", "3gazttckd7") # Instantiate plotly object
collab_hist <- py$ggplotly(hist)
packageStartupMessage in packageStartupMessage(gettextf("Loading required package: %s", : Loading required package: IRdisplay
Let me click "data and graph", then "Save & edit", "Copy", fit the distribution with a Gaussian, add two vertical lines, an annotation, give a title, etc., and "Share"!
Declarative Plotly syntax in R: https://plot.ly/~mkcor/305.r (Get the JSON https://plot.ly/~mkcor/305.json and more...)
Now retrieve the plot's DNA, edit it in R, send it back online...
enhanc_hist <- py$get_figure("mkcor", 305)
str(enhanc_hist)
List of 2 $ layout:List of 25 ..$ boxmode : chr "overlay" ..$ paper_bgcolor: chr "#fff" ..$ height : num 347 ..$ titlefont :List of 3 .. ..$ color : chr "" .. ..$ family: chr "" .. ..$ size : num 0 ..$ hovermode : chr "x" ..$ font :List of 3 .. ..$ color : chr "#444" .. ..$ family: chr "\"Open sans\", verdana, arial, sans-serif" .. ..$ size : num 12 ..$ autosize : logi TRUE ..$ title : chr "Histogram of Batting Averages in MLB" ..$ plot_bgcolor : chr "#fff" ..$ dragmode : chr "pan" ..$ smith : logi FALSE ..$ width : num 874 ..$ bargap : num 0.2 ..$ xaxis :List of 33 .. ..$ showexponent : chr "all" .. ..$ showticklabels: logi TRUE .. ..$ domain : num [1:2] 0 1 .. ..$ gridcolor : chr "#eee" .. ..$ linecolor : chr "#444" .. ..$ mirror : logi FALSE .. ..$ autotick : logi TRUE .. ..$ linewidth : num 1 .. ..$ nticks : num 0 .. ..$ title : chr "Avg" .. ..$ ticks : chr "" .. ..$ rangemode : chr "normal" .. ..$ overlaying : logi FALSE .. ..$ zeroline : logi FALSE .. ..$ type : chr "linear" .. ..$ autorange : logi TRUE .. ..$ zerolinewidth : num 1 .. ..$ ticklen : num 5 .. ..$ titlefont :List of 3 .. .. ..$ color : chr "" .. .. ..$ family: chr "" .. .. ..$ size : num 0 .. ..$ tickcolor : chr "#444" .. ..$ showline : logi FALSE .. ..$ showgrid : logi FALSE .. ..$ tickfont :List of 3 .. .. ..$ color : chr "" .. .. ..$ family: chr "" .. .. ..$ size : num 0 .. ..$ tickwidth : num 1 .. ..$ tick0 : num 0 .. ..$ tickangle : chr "auto" .. ..$ gridwidth : num 1 .. ..$ dtick : num 0.1 .. ..$ zerolinecolor : chr "#444" .. ..$ range : num [1:2] -0.002 0.648 .. ..$ position : num 0 .. ..$ anchor : chr "y" .. ..$ exponentformat: chr "B" ..$ bargroupgap : num 0 ..$ hidesources : logi FALSE ..$ showlegend : logi TRUE ..$ separators : chr ".," ..$ barmode : chr "group" ..$ boxgap : num 0.3 ..$ legend :List of 9 .. ..$ bordercolor: chr "#444" .. ..$ yanchor : chr "top" .. ..$ traceorder : chr "normal" .. ..$ xanchor : chr "left" .. ..$ bgcolor : chr "#fff" .. ..$ borderwidth: num 0 .. ..$ y : num 1 .. ..$ x : num 1.02 .. ..$ font :List of 3 .. .. ..$ color : chr "" .. .. ..$ family: chr "" .. .. ..$ size : num 0 ..$ yaxis :List of 33 .. ..$ showexponent : chr "all" .. ..$ showticklabels: logi TRUE .. ..$ domain : num [1:2] 0 1 .. ..$ gridcolor : chr "#eee" .. ..$ linecolor : chr "#444" .. ..$ mirror : logi FALSE .. ..$ autotick : logi TRUE .. ..$ linewidth : num 1 .. ..$ nticks : num 0 .. ..$ title : chr "counts" .. ..$ ticks : chr "" .. ..$ rangemode : chr "nonnegative" .. ..$ overlaying : logi FALSE .. ..$ zeroline : logi TRUE .. ..$ type : chr "linear" .. ..$ autorange : logi FALSE .. ..$ zerolinewidth : num 1 .. ..$ ticklen : num 5 .. ..$ titlefont :List of 3 .. .. ..$ color : chr "" .. .. ..$ family: chr "" .. .. ..$ size : num 0 .. ..$ tickcolor : chr "#444" .. ..$ showline : logi FALSE .. ..$ showgrid : logi TRUE .. ..$ tickfont :List of 3 .. .. ..$ color : chr "" .. .. ..$ family: chr "" .. .. ..$ size : num 0 .. ..$ tickwidth : num 1 .. ..$ tick0 : num 0 .. ..$ tickangle : chr "auto" .. ..$ gridwidth : num 1 .. ..$ dtick : num 5000 .. ..$ zerolinecolor : chr "#444" .. ..$ range : num [1:2] 0 23000 .. ..$ position : num 0 .. ..$ anchor : chr "x" .. ..$ exponentformat: chr "B" ..$ annotations :List of 2 .. ..$ :List of 23 .. .. ..$ yanchor : chr "auto" .. .. ..$ text : chr "Mendoza Line" .. .. ..$ bordercolor: chr "" .. .. ..$ arrowsize : num 1 .. .. ..$ textangle : num 0 .. .. ..$ borderwidth: num 1 .. .. ..$ ay : num -19.5 .. .. ..$ ax : num -48 .. .. ..$ font :List of 3 .. .. .. ..$ color : chr "" .. .. .. ..$ family: chr "" .. .. .. ..$ size : num 0 .. .. ..$ arrowcolor : chr "" .. .. ..$ xref : chr "x" .. .. ..$ arrowhead : num 1 .. .. ..$ bgcolor : chr "rgba(0,0,0,0)" .. .. ..$ borderpad : num 1 .. .. ..$ showarrow : logi TRUE .. .. ..$ opacity : num 1 .. .. ..$ xanchor : chr "auto" .. .. ..$ arrowwidth : num 0 .. .. ..$ yref : chr "y" .. .. ..$ align : chr "center" .. .. ..$ tag : chr "" .. .. ..$ y : num 19708 .. .. ..$ x : num 0.194 .. ..$ :List of 24 .. .. ..$ yanchor : chr "auto" .. .. ..$ uid : chr "5d7224" .. .. ..$ text : chr "R<sup>2</sup> = 0.9506<br>y = 807 + 2.09e+4exp((-(x - 0.248)^2) / (20.0547^2))" .. .. ..$ arrowsize : num 1 .. .. ..$ textangle : num 0 .. .. ..$ borderwidth: num 1 .. .. ..$ ay : num -94 .. .. ..$ ax : num 111 .. .. ..$ font : Named num 10 .. .. .. ..- attr(*, "names")= chr "size" .. .. ..$ arrowcolor : chr "#636363" .. .. ..$ xref : chr "x" .. .. ..$ arrowhead : num 3 .. .. ..$ bgcolor : chr "rgb(255, 255, 255)" .. .. ..$ borderpad : num 1 .. .. ..$ showarrow : logi TRUE .. .. ..$ opacity : num 0.8 .. .. ..$ xanchor : chr "auto" .. .. ..$ y : num 4136 .. .. ..$ arrowwidth : num 2 .. .. ..$ yref : chr "y" .. .. ..$ align : chr "left" .. .. ..$ tag : chr "" .. .. ..$ bordercolor: chr "" .. .. ..$ x : num 0.365 ..$ boxgroupgap : num 0.3 ..$ margin :List of 6 .. ..$ b : num 80 .. ..$ l : num 80 .. ..$ r : num 80 .. ..$ pad : num 0 .. ..$ t : num 100 .. ..$ autoexpand: logi TRUE $ data :List of 4 ..$ :List of 13 .. ..$ textfont: Named list() .. ..$ error_x : Named list() .. ..$ name : chr "Batting Averages" .. ..$ uid : chr "d2c52d" .. ..$ xsrc : chr "mkcor:304:367dfe" .. ..$ xbins : Named num [1:3] -0.002 0.644 0.05 .. .. ..- attr(*, "names")= chr [1:3] "start" "end" "size" .. ..$ marker :List of 1 .. .. ..$ line: Named list() .. ..$ x : num [1:67863] 0.28 0.314 0.328 0.322 0.326 ... .. ..$ line : Named list() .. ..$ type : chr "histogram" .. ..$ autobinx: logi FALSE .. ..$ ybins : Named list() .. ..$ error_y : Named list() ..$ :List of 13 .. ..$ textfont: Named list() .. ..$ error_x : Named list() .. ..$ name : chr "Good" .. ..$ uid : chr "a3a1f2" .. ..$ ysrc : chr "mkcor:304:095ab6" .. ..$ xsrc : chr "mkcor:304:5a03a8" .. ..$ marker :List of 1 .. .. ..$ line: Named list() .. ..$ mode : chr "lines" .. ..$ y : chr [1:2] "0" "25000" .. ..$ x : chr [1:2] "0.3" "0.3" .. ..$ line : Named chr "rgb(44, 160, 44)" .. .. ..- attr(*, "names")= chr "color" .. ..$ type : chr "scatter" .. ..$ error_y : Named list() ..$ :List of 13 .. ..$ textfont: Named list() .. ..$ error_x : Named list() .. ..$ name : chr "Bad" .. ..$ uid : chr "99885f" .. ..$ ysrc : chr "mkcor:304:9ff07b" .. ..$ xsrc : chr "mkcor:304:f2e76e" .. ..$ marker :List of 1 .. .. ..$ line: Named list() .. ..$ mode : chr "lines" .. ..$ y : chr [1:2] "0" "25000" .. ..$ x : chr [1:2] "0.2" "0.2" .. ..$ line : Named chr "rgb(255, 0, 0)" .. .. ..- attr(*, "names")= chr "color" .. ..$ type : chr "scatter" .. ..$ error_y : Named list() ..$ :List of 14 .. ..$ opacity : num 0.5 .. ..$ textfont: Named list() .. ..$ error_x : Named list() .. ..$ uid : chr "5d7224" .. ..$ fit :List of 12 .. .. ..$ outputxrange: chr [1:2] "" "" .. .. ..$ rms : num 1853 .. .. ..$ iterations : num 10 .. .. ..$ fitname : chr "Batting Averages - fit" .. .. ..$ funcStr : chr "a + h*exp( -(x-x0)^2/(2*w^2) )" .. .. ..$ parent : chr "d2c52d" .. .. ..$ weight : chr "" .. .. ..$ params :List of 4 .. .. .. ..$ :List of 4 .. .. .. .. ..$ value: num 807 .. .. .. .. ..$ hold : logi FALSE .. .. .. .. ..$ name : chr "a" .. .. .. .. ..$ err : num 692 .. .. .. ..$ :List of 4 .. .. .. .. ..$ value: num 20903 .. .. .. .. ..$ hold : logi FALSE .. .. .. .. ..$ name : chr "h" .. .. .. .. ..$ err : num 1702 .. .. .. ..$ :List of 4 .. .. .. .. ..$ value: num 0.0547 .. .. .. .. ..$ hold : logi FALSE .. .. .. .. ..$ name : chr "w" .. .. .. .. ..$ err : num 0.00557 .. .. .. ..$ :List of 4 .. .. .. .. ..$ value: num 0.248 .. .. .. .. ..$ hold : logi FALSE .. .. .. .. ..$ name : chr "x0" .. .. .. .. ..$ err : num 0.00493 .. .. ..$ corr : num 0.975 .. .. ..$ ninterp : chr "50" .. .. ..$ inputxrange : chr [1:2] "" "" .. .. ..$ uid : chr "5d7224" .. ..$ yaxis : chr "y" .. ..$ error_y : Named list() .. ..$ marker :List of 1 .. .. ..$ line: Named list() .. ..$ xaxis : chr "x" .. ..$ y : num [1:50] 812 818 833 864 926 ... .. ..$ x : num [1:50] 0.023 0.0352 0.0475 0.0597 0.072 ... .. ..$ line :List of 2 .. .. ..$ color: chr "rgb(55, 126, 184)" .. .. ..$ width: num 4 .. ..$ type : chr "scatter" .. ..$ name : chr "Batting Averages - fit"
enhanc_hist$data[[2]] # Data for second trace
$textfont named list() $error_x named list() $name [1] "Good" $uid [1] "a3a1f2" $ysrc [1] "mkcor:304:095ab6" $xsrc [1] "mkcor:304:5a03a8" $marker $marker$line named list() $mode [1] "lines" $y [1] "0" "25000" $x [1] "0.3" "0.3" $line color "rgb(44, 160, 44)" $type [1] "scatter" $error_y named list()
Say we have more ambition.
enhanc_hist$data[[2]]$name <- "Very Good"
enhanc_hist$data[[2]]$x[[1]] <- 0.35
enhanc_hist$data[[2]]$x[[2]] <- 0.35
enhanc_hist$data[[2]]$x
[1] "0.35" "0.35"
py$plotly(enhanc_hist$data, kwargs=list(layout=enhanc_hist$layout))
$url [1] "https://plot.ly/~ggplot2examples/210" $message [1] "" $warning [1] "" $filename [1] "from api (58) (1)" $error [1] ""
Visit the above URL.
ggplot2 also uses geom_bar to make histograms...
bar <- ggplot(data=batting_table) + geom_bar(aes(Avg), binwidth=0.05) + facet_wrap(~yearID)
bar
py$ggplotly(bar)
simpleWarning in gg2list(gg): You may want to use geom_histogram.
Join the fun! https://github.com/ropensci/plotly
# CSS styling within IPython notebook
display_html(getURL("https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css"))