#!/usr/bin/env python
# coding: utf-8

# 
# The city is for Nominal Onehot Encoding.
# The Size is for Ordinla Encoding

# ### Nominal OneHotEncoding 

# In[144]:


import pandas as pd


# In[186]:


d = {'sales': [100000, 222000, 10000000, 525000, 111111, 200000, 75000, 9000, 109000, 10000],
    'city': ['Tampa', 'Tampa', 'Orlando', 'Jacksonville', 'Miami', 'Miami', 'Orlando', 'Jacksonville', 'Jacksonville', 'Orlando' ],
    'size':['Small', 'Medium', 'Large', 'Medium', 'Medium', 'Large', 'Small', 'Small','Medium', 'Small'],
    }


# In[187]:


df = pd.DataFrame(d)


# In[188]:


df.head()


# In[189]:


df['city'].unique()


# In[190]:


from sklearn.preprocessing import OneHotEncoder


# In[191]:


ohe = OneHotEncoder(handle_unknown = 'ignore', sparse=False)


# **In this code:**
# 
# - handle_unknown='ignore' specifies that if unknown categories are encountered during transform, they should be ignored.
# - sparse=False specifies that the output should be a dense array rather than a sparse matrix.

# In[192]:


ohe_transform_city = ohe_city.fit_transform(df[['city']])
# This fits the OneHotEncoder to the 'city' column of the DataFrame df and transforms it into a one-hot encoded representation.
ohe_transform_city


# In[193]:


feature_names_city = ohe_city.get_feature_names_out(input_features=['city'])
# This retrieves the feature names for the one-hot encoded 'city' column. It ensures that the column name is included in the feature names.
feature_names_city


# In[194]:


ohe_df_city = pd.DataFrame(ohe_transform_city, columns=feature_names_city)
# This converts the transformed array of the one-hot encoded 'city' column into a pandas DataFrame using the feature names obtained earlier.
ohe_df_city


# In[195]:


df_encoded = pd.concat([df.drop(columns=['city']), ohe_df_city], axis=1)
# This concatenates the original DataFrame df after dropping the 'city' column with the one-hot encoded 'city' DataFrame ohe_df_city, resulting in the final DataFrame df_encoded.
df_encoded


# In[196]:


df_encoded.drop('city_Tampa', axis=1)


# ### OR

# In[197]:


df_encoded = pd.get_dummies(df, columns=['city'])
df_encoded

# This is straight forward


# ### Nominal OneHotEncoding 

# In[211]:


df


# In[212]:


df['size'].unique()


# In[213]:


sizes = ['Small', 'Medium', 'Large']


# In[214]:


from sklearn.preprocessing import OrdinalEncoder


# In[219]:


enc = OrdinalEncoder(categories = [sizes])


# In[221]:


enc.fit_transform(df[['size']])


# In[217]:


df.head()


# In[222]:


df['size '] = enc.fit_transform(df[['size']])


# In[224]:


df.head(10)


# #####  To now convert the Nominal Data- City

# In[206]:


df_encoded = pd.get_dummies(df[['city']])


# In[225]:


df_encoded


# In[226]:


final_df = pd.concat([df, df_encoded], axis=1)
final_df


# In[232]:


# Select columns of type 'object' (string)
string_size_columns = final_df.select_dtypes(include=['object']).columns

# Drop the column containing string values, all string values
final_df = final_df.drop(columns=string_size_columns)


# In[233]:


final_df


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]: