#!/usr/bin/env python # coding: utf-8 # # Rmagic Functions Extension # ## Line magics # IPython has an `rmagic` extension that contains a some magic functions for working with R via rpy2. This extension can be loaded using the `%load_ext` magic as follows: # In[85]: get_ipython().run_line_magic('load_ext', 'rpy2.ipython') get_ipython().run_line_magic('', 'precision 3') # ## A simple example: scatter-plot # A typical use case one imagines is having some numpy arrays, wanting to compute some statistics of interest on these # arrays and return the result back to python. Let's suppose we just want to fit a simple linear model to a scatterplot. # In[86]: # First we generate and plot the data in Python import numpy as np import pylab X = np.array([0,1,2,3,4]) Y = np.array([3,5,4,6,7]) pylab.scatter(X, Y) # ## Pushing Python variables into R, and executing R-commands in IPython # # We can accomplish this by first pushing variables to R, fitting a model and returning the results. The line magic %Rpush copies its arguments to variables of the same name in rpy2. The %R line magic evaluates the string in rpy2 and returns the results. In this case, the coefficients of a linear model. # In[87]: get_ipython().run_line_magic('Rpush', 'X Y') get_ipython().run_line_magic('R', 'lm(Y~X)$coef') # In[88]: # Note: %Rpush is equivalent to calling %R with just -i and no trailing code. A = np.arange(20) get_ipython().run_line_magic('R', '-i A') get_ipython().run_line_magic('R', 'mean(A)') # In[89]: # We can check that the computation in R is correct fairly easily: Xr = X - X.mean() Yr = Y - Y.mean() slope = (Xr*Yr).sum() / (Xr**2).sum() intercept = Y.mean() - X.mean() * slope (intercept, slope) # In[90]: # It is also possible to return more than one value with %R. get_ipython().run_line_magic('R', 'resid(lm(Y~X)); coef(lm(X~Y))') # ### Capturing literal output from R into IPython # In[91]: # Unfortunately, the literal output from R does not work directly (yet): get_ipython().run_line_magic('R', 'summary(lm(Y~X))') # Therefore we have to use a trick: # * We first "capture" the output of the R-command # * And then we display it, line-by-line # # Note: "%%R" executes multiple R-commandlines # In[92]: get_ipython().run_cell_magic('R', '', 'X = c(1,2,3,4,5)\nY = c(1,2,3,6,5)\ns = capture.output(summary(lm(Y~X)))\n') # In[93]: # and here we pass the string into Python, and print it line-by-line a =get_ipython().run_line_magic('R', 's') for line in a[2:-1]: print(line) # ## Getting R-variables back into IPython # # There are two line magics, %Rpull and %Rget. Both are useful after some R code has been executed and there are variables # in the rpy2 namespace that one would like to retrieve. The main difference is that one returns the value (%Rget), while the other pulls it to self.shell.user_ns (%Rpull). # # Imagine we've stored the results of some calculation in the variable "a" in rpy2's namespace. By using the %R magic, we can obtain these results and store them in b. We can also pull them directly to user_ns with %Rpull. They are both views on the same data. # In[94]: b = get_ipython().run_line_magic('R', 'a=resid(lm(Y~X))') get_ipython().run_line_magic('Rpull', 'a') print(a) assert id(b.data) == id(a.data) get_ipython().run_line_magic('R', '-o a') # %Rpull is equivalent to calling %R with just -o # # In[95]: get_ipython().run_line_magic('R', 'd=resid(lm(Y~X)); e=coef(lm(Y~X))') get_ipython().run_line_magic('R', '-o d -o e') get_ipython().run_line_magic('Rpull', 'e') print(d) print(e) import numpy as np np.testing.assert_almost_equal(d, a) # In[96]: # The magic %Rget retrieves one variable from R. get_ipython().run_line_magic('Rget', 'A') # ## Plotting and capturing output # # R's console (i.e. its stdout() connection) is captured by ipython, as are any plots which are published as PNG files like the notebook with arguments --pylab inline. As a call to %R may produce a return value (see above) we must ask what happens to a magic like the one below. The R code specifies that something is published to the notebook. If anything is published to the notebook, that call to %R returns None. # In[97]: v1 = get_ipython().run_line_magic('R', 'plot(X,Y); print(summary(lm(Y~X))); vv=mean(X)*mean(Y)') print('v1 is:', v1) v2 = get_ipython().run_line_magic('R', 'mean(X)*mean(Y)') print('v2 is:', v2) # ## What value is returned from %R? # # Some calls have no particularly interesting return value, the magic %R will not return anything in this case. The return value in rpy2 is actually NULL so %R returns None. # Also, if the return value of a call to %R (in line mode) has just been printed to the console, then its value is also not returned. # In[98]: v = get_ipython().run_line_magic('R', 'plot(X)') assert (v is None) # But, if the last value did not print anything to console, the value is returned: # # In[99]: v = get_ipython().run_line_magic('R', 'print(summary(X)); X') print('v:', v) # The return value can be suppressed by a trailing ';' or an -n argument. # # In[100]: get_ipython().run_line_magic('R', '-n X') # In[101]: get_ipython().run_line_magic('R', 'X;') # ## Cell level magic # # Often, we will want to do more than a simple linear regression model. There may be several lines of R code that we want to # use before returning to python. This is the cell-level magic. # # # For the cell level magic, inputs can be passed via the -i or --inputs argument in the line. These variables are copied # from the shell namespace to R's namespace using rpy2.robjects.r.assign. It would be nice not to have to copy these into R: rnumpy ( http://bitbucket.org/njs/rnumpy/wiki/API ) has done some work to limit or at least make transparent the number of copies of an array. This seems like a natural thing to try to build on. Arrays can be output from R via the -o or --outputs argument in the line. All other arguments are sent to R's png function, which is the graphics device used to create the plots. # # We can redo the above calculations in one ipython cell. We might also want to add some output such as a summary # from R or perhaps the standard plotting diagnostics of the lm. # In[102]: get_ipython().run_cell_magic('R', '-i X,Y -o XYcoef', 'XYlm = lm(Y~X)\nXYcoef = coef(XYlm)\nprint(summary(XYlm))\npar(mfrow=c(2,2))\nplot(XYlm)\n') # ## Passing data back and forth # # Currently, data is passed through RMagics.pyconverter when going from python to R and RMagics.Rconverter when # going from R to python. These currently default to numpy.ndarray. Future work will involve writing better converters, most likely involving integration with http://pandas.sourceforge.net. # # Passing ndarrays into R seems to require a copy, though once an object is returned to python, this object is NOT copied, and it is possible to change its values. # # In[103]: seq1 = np.arange(10) # In[104]: get_ipython().run_cell_magic('R', '-i seq1 -o seq2', 'seq2 = rep(seq1, 2)\nprint(seq2)\n') # In[105]: seq2[::2] = 0 seq2 # In[106]: get_ipython().run_cell_magic('R', '', 'print(seq2)\n') # Once the array data has been passed to R, modifring its contents does not modify R's copy of the data. # In[107]: seq1[0] = 200 get_ipython().run_line_magic('R', 'print(seq1)') # But, if we pass data as both input and output, then the value of "data" in user_ns will be overwritten and the # new array will be a view of the data in R's copy. # In[108]: print(seq1) get_ipython().run_line_magic('R', '-i seq1 -o seq1') print(seq1) seq1[0] = 200 get_ipython().run_line_magic('R', 'print(seq1)') seq1_view = get_ipython().run_line_magic('R', 'seq1') assert(id(seq1_view.data) == id(seq1.data)) # ## Exception handling # # Exceptions are handled by passing back rpy2's exception and the line that triggered it. # In[109]: try: get_ipython().run_line_magic('R', '-n nosuchvar') except Exception as e: print(e.message) pass # ## R Graphics # In[110]: get_ipython().run_line_magic('R', 'plot(X,Y)')