#!/usr/bin/env python
# coding: utf-8

# # Rmagic Functions Extension

# ## Line magics

# IPython has an `rmagic` extension that contains a some magic functions for working with R via rpy2. This extension can be loaded using the `%load_ext` magic as follows:

# In[85]:


get_ipython().run_line_magic('load_ext', 'rpy2.ipython')
get_ipython().run_line_magic('', 'precision 3')


# ## A simple example: scatter-plot
# A typical use case one imagines is having some numpy arrays, wanting to compute some statistics of interest on these
#  arrays and return the result back to python. Let's suppose we just want to fit a simple linear model to a scatterplot.

# In[86]:


# First we generate and plot the data in Python

import numpy as np
import pylab
X = np.array([0,1,2,3,4])
Y = np.array([3,5,4,6,7])
pylab.scatter(X, Y)


# ## Pushing Python variables into R, and executing R-commands in IPython
# 
# We can accomplish this by first pushing variables to R, fitting a model and returning the results. The line magic %Rpush copies its arguments to variables of the same name in rpy2. The %R line magic evaluates the string in rpy2 and returns the results. In this case, the coefficients of a linear model.

# In[87]:


get_ipython().run_line_magic('Rpush', 'X Y')
get_ipython().run_line_magic('R', 'lm(Y~X)$coef')


# In[88]:


# Note: %Rpush is equivalent to calling %R with just -i and no trailing code.
A = np.arange(20)
get_ipython().run_line_magic('R', '-i A')
get_ipython().run_line_magic('R', 'mean(A)')


# In[89]:


# We can check that the computation in R is correct fairly easily:

Xr = X - X.mean()
Yr = Y - Y.mean()
slope = (Xr*Yr).sum() / (Xr**2).sum()
intercept = Y.mean() - X.mean() * slope
(intercept, slope)


# In[90]:


# It is also possible to return more than one value with %R.
get_ipython().run_line_magic('R', 'resid(lm(Y~X)); coef(lm(X~Y))')


# ### Capturing literal output from R into IPython

# In[91]:


# Unfortunately, the literal output from R does not work directly (yet):
get_ipython().run_line_magic('R', 'summary(lm(Y~X))')


# Therefore we have to use a trick:
# * We first "capture" the output of the R-command
# * And then we display it, line-by-line
# 
# Note: "%%R" executes multiple R-commandlines 

# In[92]:


get_ipython().run_cell_magic('R', '', 'X = c(1,2,3,4,5)\nY = c(1,2,3,6,5)\ns = capture.output(summary(lm(Y~X)))\n')


# In[93]:


# and here we pass the string into Python, and print it line-by-line
a =get_ipython().run_line_magic('R', 's')
for line in a[2:-1]:
         print(line)


# ## Getting R-variables back into IPython
# 
# There are two  line magics, %Rpull and %Rget. Both are useful after some R code has been executed and there are variables
# in the rpy2 namespace that one would like to retrieve. The main difference is that one returns the value (%Rget), while the other pulls it to self.shell.user_ns (%Rpull).
# 
# Imagine we've stored the results of some calculation in the variable "a" in rpy2's namespace. By using the %R magic, we can obtain these results and store them in b. We can also pull them directly to user_ns with %Rpull. They are both views on the same data.

# In[94]:


b = get_ipython().run_line_magic('R', 'a=resid(lm(Y~X))')
get_ipython().run_line_magic('Rpull', 'a')
print(a)
assert id(b.data) == id(a.data)
get_ipython().run_line_magic('R', '-o a')


# %Rpull is equivalent to calling %R with just -o
# 

# In[95]:


get_ipython().run_line_magic('R', 'd=resid(lm(Y~X)); e=coef(lm(Y~X))')
get_ipython().run_line_magic('R', '-o d -o e')
get_ipython().run_line_magic('Rpull', 'e')
print(d)
print(e)
import numpy as np
np.testing.assert_almost_equal(d, a)


# In[96]:


# The magic %Rget retrieves one variable from R.
get_ipython().run_line_magic('Rget', 'A')


# ## Plotting and capturing output
# 
# R's console (i.e. its stdout() connection) is captured by ipython, as are any plots which are published as PNG files like the notebook with arguments --pylab inline. As a call to %R may produce a return value (see above) we must ask what happens to a magic like the one below. The R code specifies that something is published to the notebook. If anything is published to the notebook, that call to %R returns None.

# In[97]:


v1 = get_ipython().run_line_magic('R', 'plot(X,Y); print(summary(lm(Y~X))); vv=mean(X)*mean(Y)')
print('v1 is:', v1)
v2 = get_ipython().run_line_magic('R', 'mean(X)*mean(Y)')
print('v2 is:', v2)


# ## What value is returned from %R?
# 
# Some calls have no particularly interesting return value, the magic %R will not return anything in this case. The return value in rpy2 is actually NULL so %R returns None.

# Also, if the return value of a call to %R (in line mode) has just been printed to the console, then its value is also not returned.

# In[98]:


v = get_ipython().run_line_magic('R', 'plot(X)')
assert (v is None)


# But, if the last value did not print anything to console, the value is returned:
# 

# In[99]:


v = get_ipython().run_line_magic('R', 'print(summary(X)); X')
print('v:', v)


# The return value can be suppressed by a trailing ';' or an -n argument.
# 

# In[100]:


get_ipython().run_line_magic('R', '-n X')


# In[101]:


get_ipython().run_line_magic('R', 'X;')


# ## Cell level magic
# 
# Often, we will want to do more than a simple linear regression model. There may be several lines of R code that we want to 
# use before returning to python. This is the cell-level magic.
# 
# 
# For the cell level magic, inputs can be passed via the -i or --inputs argument in the line. These variables are copied 
# from the shell namespace to R's namespace using rpy2.robjects.r.assign. It would be nice not to have to copy these into R: rnumpy ( http://bitbucket.org/njs/rnumpy/wiki/API ) has done some work to limit or at least make transparent the number of copies of an array. This seems like a natural thing to try to build on. Arrays can be output from R via the -o or --outputs argument in the line. All other arguments are sent to R's png function, which is the graphics device used to create the plots.
# 
# We can redo the above calculations in one ipython cell. We might also want to add some output such as a summary
#  from R or perhaps the standard plotting diagnostics of the lm.

# In[102]:


get_ipython().run_cell_magic('R', '-i X,Y -o XYcoef', 'XYlm = lm(Y~X)\nXYcoef = coef(XYlm)\nprint(summary(XYlm))\npar(mfrow=c(2,2))\nplot(XYlm)\n')


# ## Passing data back and forth
# 
# Currently, data is passed through RMagics.pyconverter when going from python to R and RMagics.Rconverter when 
# going from R to python. These currently default to numpy.ndarray. Future work will involve writing better converters, most likely involving integration with http://pandas.sourceforge.net.
# 
# Passing ndarrays into R seems to require a copy, though once an object is returned to python, this object is NOT copied, and it is possible to change its values.
# 

# In[103]:


seq1 = np.arange(10)


# In[104]:


get_ipython().run_cell_magic('R', '-i seq1 -o seq2', 'seq2 = rep(seq1, 2)\nprint(seq2)\n')


# In[105]:


seq2[::2] = 0
seq2


# In[106]:


get_ipython().run_cell_magic('R', '', 'print(seq2)\n')


# Once the array data has been passed to R, modifring its contents does not modify R's copy of the data.

# In[107]:


seq1[0] = 200
get_ipython().run_line_magic('R', 'print(seq1)')


# But, if we pass data as both input and output, then the value of "data" in user_ns will be overwritten and the
# new array will be a view of the data in R's copy.

# In[108]:


print(seq1)
get_ipython().run_line_magic('R', '-i seq1 -o seq1')
print(seq1)
seq1[0] = 200
get_ipython().run_line_magic('R', 'print(seq1)')
seq1_view = get_ipython().run_line_magic('R', 'seq1')
assert(id(seq1_view.data) == id(seq1.data))


# ## Exception handling
# 
# Exceptions are handled by passing back rpy2's exception and the line that triggered it.

# In[109]:


try:
    get_ipython().run_line_magic('R', '-n nosuchvar')
except Exception as e:
    print(e.message)
    pass


# ## R Graphics

# In[110]:


get_ipython().run_line_magic('R', 'plot(X,Y)')