In [1]:

%load_ext autoreload
%autoreload 2

In [2]:

import sys
sys.path.append("..")

In [3]:

from optimus import Optimus

In [4]:

op = Optimus("spark")

C:\Users\argenisleon\Anaconda3\lib\site-packages\statsmodels\iolib\foreign.py:651: DeprecationWarning: `np.long` is a deprecated alias for `np.compat.long`. To silence this warning, use `np.compat.long` by itself. In the likely event your code does not need to work on Python 2 you can use the builtin `int` for which `np.compat.long` is itself an alias. Doing this will not modify any behaviour and is safe. When replacing `np.long`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
_type_converters = {253 : np.long, 252 : int}
C:\Users\argenisleon\Anaconda3\lib\site-packages\patsy\constraint.py:13: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
from collections import Mapping
C:\Users\argenisleon\Anaconda3\lib\site-packages\statsmodels\stats\_lilliefors.py:163: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
size = np.array(sorted(cv_data), dtype=np.float)

You are using PySparkling of version 2.4.10, but your PySpark is of
version 3.1.1. Please make sure Spark and PySparkling versions are compatible.

In [6]:

op.load.csv("data/foo.csv")

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-0ff0ad0df6d0> in <module>
----> 1 op.load.csv("data/foo.csv")

~\Documents\Optimus\optimus\engines\spark\io\load.py in csv(path, sep, header, infer_schema, encoding, null_value, n_rows, error_bad_lines, *args, **kwargs)
    103                 sdf = sdf.limit(n_rows)
    104             # print(type(sdf))
--> 105             df = SparkDataFrame(sdf)
    106             df.meta = Meta.set(df.meta, "file_name", file_name)
    107         except IOError as error:

TypeError: Can't instantiate abstract class SparkDataFrame with abstract methods _base_to_dfd, encoding, visualize