In [57]:
import pandas as pd
import numpy as np

In [58]:
sides = pd.DataFrame(data = {"shape" : ["triangle", "square", "rectangle", "pentagon", "hexagon"],
"sides" : [3, 4, 4, 5, 6]})
sides

Out[58]:
shape sides
0 triangle 3
1 square 4
2 rectangle 4
3 pentagon 5
4 hexagon 6
In [59]:
# here I have interchanged the pentagon and hexagon positions

angles = pd.DataFrame(data = {"shape" : ["triangle", "square", "rectangle", "hexagon", "pentagon"],
"angles_in_degrees" : [180, 360, 360, 720, 540]})
angles

Out[59]:
angles_in_degrees shape
0 180 triangle
1 360 square
2 360 rectangle
3 720 hexagon
4 540 pentagon
In [60]:
"""simple merge - I allow pandas to decide what works best here - It identifies the shape name and merges
angles for pentagon and hexagon correctly"""

pd.merge(left = sides, right = angles)

Out[60]:
shape sides angles_in_degrees
0 triangle 3 180
1 square 4 360
2 rectangle 4 360
3 pentagon 5 540
4 hexagon 6 720
In [61]:
"""as present in your notebook:
merged = pd.merge(left = combined_service_cat, right = years, left_index = True, right_index = True)

And we get a different results since we said merge on both indexes. Since the values are different for index 3 and 4 in both
dataframes we get a different result"""

pd.merge(left = sides, right = angles, left_index = True, right_index = True)

Out[61]:
shape_x sides angles_in_degrees shape_y
0 triangle 3 180 triangle
1 square 4 360 square
2 rectangle 4 360 rectangle
3 pentagon 5 720 hexagon
4 hexagon 6 540 pentagon
In [62]:
"""simple concat - I didnt assign 'axis' parameter a value - so this results in 2nd dataframe stacked below the 1st dataframe

Please Note: This is what the solution for this project actually wants! DETE below TAFE or vice-versa"""

pd.concat([sides, angles])

Out[62]:
angles_in_degrees shape sides
0 NaN triangle 3.0
1 NaN square 4.0
2 NaN rectangle 4.0
3 NaN pentagon 5.0
4 NaN hexagon 6.0
0 180.0 triangle NaN
1 360.0 square NaN
2 360.0 rectangle NaN
3 720.0 hexagon NaN
4 540.0 pentagon NaN
In [63]:
"""as present in your notebook:
combined_service_cat = pd.concat([combined_service_cat, years], axis = 1)

I assign 'axis' parameter a value - so the results 2nd dataframe is now next to the 1st one.
However the column name and values are now duplicated for 'shape'
"""
pd.concat([sides, angles], axis = 1)

Out[63]:
shape sides angles_in_degrees shape
0 triangle 3 180 triangle
1 square 4 360 square
2 rectangle 4 360 rectangle
3 pentagon 5 720 hexagon
4 hexagon 6 540 pentagon
In [64]:
dummy_years = pd.DataFrame({"years": ["1-2", 1, 5, 10, "11-12", np.NaN]})
dummy_years

Out[64]:
years
0 1-2
1 1
2 5
3 10
4 11-12
5 NaN
In [65]:
pattern = r"(?P<First_Year>[0-9][0-9]?)-?(?P<Second_Year>[1-9][0-9]?)?"

# I added transform astype(str) to your code so now it's posible extract all years
dummy_extract = dummy_years["years"].astype(str).str.extractall(pattern)
dummy_extract

Out[65]:
First_Year Second_Year
match
0 0 1 2
1 0 1 NaN
2 0 5 NaN
3 0 10 NaN
4 0 11 12
In [66]:
dummy_new = dummy_extract.reset_index("match")
dummy_new

Out[66]:
match First_Year Second_Year
0 0 1 2
1 0 1 NaN
2 0 5 NaN
3 0 10 NaN
4 0 11 12
In [67]:
# Now we suppose we only want the first_year column, although in my proyect I calculated a new column as
# an arithmetic mean of the other two. That's why I extracted all years in my proyect.
dummy_new = dummy_new.drop(["match", "Second_Year"], axis = 1)

dummy_new

Out[67]:
First_Year
0 1
1 1
2 5
3 10
4 11
In [68]:
merge = pd.merge(left = dummy_years , right = dummy_new, left_index = True, right_index = True)

merge

Out[68]:
years First_Year
0 1-2 1
1 1 1
2 5 5
3 10 10
4 11-12 11