In [1]:
import pandas as pd
import numpy as np
In [2]:
# dataframe with one whole column with np.nan and numeric data; 0 included

df1 = pd.DataFrame(data = [[1, np.nan, np.nan, 0],
                          [np.nan, 2, np.nan, 0],
                          [np.nan, np.nan, np.nan, 0],
                          [np.nan, np.nan, np.nan, np.nan]],
                 columns = ["col1", "col2", "col3", "col4"])
df1
Out[2]:
col1 col2 col3 col4
0 1.0 NaN NaN 0.0
1 NaN 2.0 NaN 0.0
2 NaN NaN NaN 0.0
3 NaN NaN NaN NaN
In [3]:
# datatype of each of the series in dataframe
df1.dtypes
Out[3]:
col1    float64
col2    float64
col3    float64
col4    float64
dtype: object
In [4]:
# here the default is bool_only = None, but our dataframe has only floats right now, 
# no column has boolean values, so the result matches the documentation


df1.any(axis = "columns", skipna = False)
Out[4]:
0    True
1    True
2    True
3    True
dtype: bool
In [5]:
# dataframe with one whole column as np.nan, numeric data; 0 replaced with True or False

df2 = pd.DataFrame(data = [[1, np.nan, np.nan, True],
                          [np.nan, 2, np.nan, True],
                          [np.nan, np.nan, np.nan, False],
                          [np.nan, np.nan, np.nan, np.nan]],
                 columns = ["col1", "col2", "col3", "col4"])
df2
Out[5]:
col1 col2 col3 col4
0 1.0 NaN NaN True
1 NaN 2.0 NaN True
2 NaN NaN NaN False
3 NaN NaN NaN NaN
In [6]:
# Col4 Series as a whole is object 

print(df2.dtypes)
print("\n")

# indivdual elements are still their repective types
print("col4 elements:", [type(each) for each in df2["col4"]])
col1    float64
col2    float64
col3    float64
col4     object
dtype: object


col4 elements: [<class 'bool'>, <class 'bool'>, <class 'bool'>, <class 'float'>]
In [7]:
# the result we are currently getting when bool_only = None (default) 
df2.any(axis = "columns", skipna = False)
Out[7]:
0      1
1    NaN
2    NaN
3    NaN
dtype: object
In [8]:
# the last index 3 which is completely None, now returns True
df2.any(axis = "columns", skipna = False, bool_only = False)
Out[8]:
0    True
1    True
2    True
3    True
dtype: bool
In [9]:
# we don't have any row that is completely Boolean so my understanding is this should result in empty series,
# but it give result exactly as bool_only = False option

df2.any(axis = "columns", skipna = False, bool_only = True)
Out[9]:
0    True
1    True
2    True
3    True
dtype: bool
In [10]:
df2.any(axis = "index", skipna = False)
Out[10]:
col1       1
col2     NaN
col3     NaN
col4    True
dtype: object
In [11]:
df2.any(axis = "index", skipna = False, bool_only = False)
Out[11]:
col1    True
col2    True
col3    True
col4    True
dtype: bool
In [12]:
df2.any(axis = "index", skipna = False, bool_only = True)
Out[12]:
Series([], dtype: bool)
In [13]:
# dataframe with np.nan, 0 and boolean

df3 = pd.DataFrame(data = [[np.nan, np.nan, np.nan, True],
                          [np.nan, np.nan, np.nan, False],
                          [0, 0, 0, 0],
                          [np.nan, np.nan, np.nan, np.nan],
                          [False, False, False, False],
                          [True, True, True, True]],
                 columns = ["col1", "col2", "col3", "col4"])
df3
Out[13]:
col1 col2 col3 col4
0 NaN NaN NaN True
1 NaN NaN NaN False
2 0 0 0 0
3 NaN NaN NaN NaN
4 False False False False
5 True True True True
In [14]:
df3.dtypes
Out[14]:
col1    object
col2    object
col3    object
col4    object
dtype: object
In [15]:
df3.any(axis = "columns", skipna = False)
Out[15]:
0      NaN
1      NaN
2        0
3      NaN
4    False
5     True
dtype: object
In [16]:
df3.any(axis = "columns", skipna = False, bool_only = False)
Out[16]:
0     True
1     True
2    False
3     True
4    False
5     True
dtype: bool
In [17]:
# I expected this to be empty but its giving exact result as above - This I can't really understand
df3.any(axis = "columns", skipna = False, bool_only = True)
Out[17]:
0     True
1     True
2    False
3     True
4    False
5     True
dtype: bool
In [18]:
df3.any(axis = "index", skipna = False)
Out[18]:
col1     NaN
col2     NaN
col3     NaN
col4    True
dtype: object
In [19]:
df3.any(axis = "index", skipna = False, bool_only = False)
Out[19]:
col1    True
col2    True
col3    True
col4    True
dtype: bool
In [20]:
# no Series with all Bool type elements so it returns empty? but then why not in code cell 15? This is baffling!!!!!
df3.any(axis = "index", skipna = False, bool_only = True)
Out[20]:
Series([], dtype: bool)