from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
# Import libraries
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import squarify
import plotly.express as px
import seaborn as sns
# Read in data
df = pd.read_excel("scottish_universities_subject_enrollments.xlsx", header=16)
# Drop UKPRN
df.drop(["UKPRN"], axis=1, inplace=True)
# Drop "Total" row
df.drop([18], inplace=True)
df
HE provider | Abbreviation | 01 Medicine and dentistry | 02 Subjects allied to medicine | 03 Biological and sport sciences | 04 Psychology | 05 Veterinary sciences | 06 Agriculture, food and related studies | 07 Physical sciences | 08 General and others in sciences | ... | 15 Social sciences | 16 Law | 17 Business and management | 18 Communications and media | 19 Language and area studies | 20 Historical, philosophical and religious studies | 21 Creative arts and design | 22 Education and teaching | 23 Combined and general studies | Total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Abertay University | AU | 0 | 260 | 310 | 400 | 0 | 160 | 75 | 0 | ... | 380 | 205 | 435 | 0 | 0 | 0 | 0 | 0 | 0 | 3665 |
1 | Edinburgh Napier University | ENU | 0 | 1880 | 520 | 250 | 180 | 30 | 0 | 0 | ... | 470 | 265 | 1905 | 255 | 365 | 0 | 810 | 0 | 30 | 9365 |
2 | Glasgow Caledonian University | GCU | 90 | 3990 | 105 | 445 | 0 | 70 | 170 | 0 | ... | 785 | 285 | 2675 | 325 | 40 | 0 | 315 | 0 | 0 | 12185 |
3 | Glasgow School of Art | GSoA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 15 | 0 | 0 | 0 | 5 | 225 | 1010 | 0 | 0 | 1605 |
4 | Heriot-Watt University | HWU | 0 | 0 | 330 | 300 | 0 | 60 | 440 | 5 | ... | 265 | 25 | 1600 | 0 | 290 | 0 | 630 | 0 | 0 | 7605 |
5 | Queen Margaret University, Edinburgh | QMU | 0 | 1165 | 60 | 235 | 0 | 0 | 0 | 0 | ... | 175 | 0 | 590 | 385 | 0 | 0 | 360 | 125 | 0 | 3105 |
6 | Robert Gordon University | RGU | 0 | 2325 | 155 | 0 | 0 | 0 | 125 | 0 | ... | 560 | 380 | 1715 | 465 | 0 | 0 | 680 | 0 | 0 | 8080 |
7 | Royal Conservatoire of Scotland | RCS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 805 | 100 | 0 | 900 |
8 | SRUC | SRUC | 0 | 0 | 0 | 0 | 60 | 900 | 0 | 50 | ... | 0 | 0 | 40 | 0 | 0 | 0 | 0 | 0 | 0 | 1150 |
9 | The University of Aberdeen | UoA | 1070 | 370 | 880 | 685 | 0 | 5 | 375 | 0 | ... | 1200 | 875 | 710 | 0 | 600 | 565 | 210 | 505 | 5 | 9510 |
10 | The University of Dundee | UoD | 1225 | 2335 | 300 | 435 | 0 | 0 | 135 | 0 | ... | 590 | 615 | 615 | 35 | 315 | 350 | 1180 | 415 | 10 | 10255 |
11 | The University of Edinburgh | UoE | 1145 | 580 | 1605 | 620 | 745 | 75 | 1530 | 10 | ... | 2595 | 895 | 1385 | 0 | 2365 | 2340 | 1200 | 520 | 60 | 22395 |
12 | The University of Glasgow | UoG | 1925 | 840 | 1295 | 545 | 730 | 45 | 1045 | 0 | ... | 1800 | 865 | 920 | 35 | 1675 | 1520 | 560 | 935 | 0 | 18555 |
13 | The University of St Andrews | UoStA | 575 | 75 | 460 | 475 | 0 | 0 | 835 | 35 | ... | 1350 | 0 | 360 | 95 | 1030 | 1110 | 0 | 0 | 100 | 7790 |
14 | The University of Stirling | UoS | 0 | 1045 | 985 | 665 | 0 | 45 | 5 | 20 | ... | 1070 | 390 | 1145 | 440 | 520 | 490 | 70 | 420 | 0 | 8100 |
15 | The University of Strathclyde | UoSc | 0 | 1015 | 430 | 420 | 0 | 0 | 915 | 15 | ... | 940 | 895 | 2040 | 130 | 615 | 325 | 135 | 810 | 95 | 13960 |
16 | The University of the West of Scotland | UoWoS | 0 | 2855 | 1150 | 515 | 0 | 0 | 250 | 0 | ... | 1750 | 190 | 2455 | 285 | 0 | 0 | 540 | 325 | 0 | 11900 |
17 | University of the Highlands and Islands | UoH&I | 40 | 570 | 315 | 105 | 0 | 135 | 120 | 75 | ... | 1030 | 0 | 925 | 40 | 155 | 305 | 755 | 65 | 0 | 5510 |
18 rows × 27 columns
# Wide to long data
melted_df = pd.melt(df, id_vars=['HE provider', 'Abbreviation'], value_vars=['01 Medicine and dentistry',
'02 Subjects allied to medicine', '03 Biological and sport sciences',
'04 Psychology', '05 Veterinary sciences',
'06 Agriculture, food and related studies', '07 Physical sciences',
'08 General and others in sciences', '09 Mathematical sciences',
'10 Engineering and technology', '11 Computing',
'12 Geographical and environmental studies (natural sciences)',
'13 Architecture, building and planning',
'12 Geographical and environmental studies (social sciences)',
'14 Humanities and liberal arts (non-specific)', '15 Social sciences',
'16 Law', '17 Business and management', '18 Communications and media',
'19 Language and area studies',
'20 Historical, philosophical and religious studies',
'21 Creative arts and design', '22 Education and teaching',
'23 Combined and general studies'],
var_name='Subject', value_name='Number of students')
The plot below orders the university and subsequently the subject area by number of student enrollments. Also, the darker the shade of blue and the larger the cube, the greater the number of students.
From the plot, we can see that The University of Edinburgh has the largest number of student enrollments (22,390) and the Royal Conservatoire of Scotland has the smallest number of student enrollments (905).
We can also see that "02 Subjects allied to medicine" at Glasgow Caledonian University has the greatest number of student enrollments out of the subject areas (3990), followed by "10 Engineering and technology" at The University of Strathclyde (3725).
Student enrollments in Computing subjects contributes to approx. 35% of Abertay University's total enrollments (1295/3655). While this is not the highest number of Computing student enrollments of a university (Edinburgh Napier University has 1440, approx. 15% of student enrollments).
# Treemap doesn't like zeros - get rid of them
melted_df = melted_df[melted_df['Number of students']!=0]
# Plot treemap
fig = px.treemap(melted_df, path=['HE provider', 'Subject'], values='Number of students', color='Number of students',
color_continuous_scale='PuBu', width=1200, height=600)
fig.show()
Here we can see which subjects have the greatest number of student enrollments and which universities are contributing the most.
From below, we can that "17 Business and Management" is the top subject with 19,515 student enrollments, followed closely behind by "02 Subjects allied to medicine" with 19,305 enrollments.
In terms of student enrollment, "11 Computing" is the fifth largest of all subject areas (10,245). The universities contributing most to this are Edinburgh Napier University (1440), Abertay University (1295), and Glasgow Caledonian Univesity (1195).
# Plot treemap
fig2 = px.treemap(melted_df, path=['Subject', 'HE provider'], values='Number of students', color='Number of students',
color_continuous_scale='PuBu', width=1200, height=600)
fig2.show()
fig.write_image("scottish_unis.png")
Which universities and higher education institutes are females studying at in Scotland? Is there a relationship between the number of female students and what universities are most well known for?
(e.g. I associate engineering and tech with the University of Strathclyde, sports with the University of Stirling, traditional subjects with the University of St. Andrews).
df_sex = pd.read_excel("scottish_universities_sex.xlsx", header=17)
df_sex.dropna(inplace=True)
df_sex.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 18 entries, 0 to 17 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 UKPRN 18 non-null float64 1 HE provider 18 non-null object 2 Abbreviation 18 non-null object 3 Female 18 non-null float64 4 Male 18 non-null float64 5 Other 18 non-null float64 6 Not known 18 non-null object 7 Total 18 non-null float64 dtypes: float64(5), object(3) memory usage: 1.3+ KB
# Convert from float to int
df_sex["UKPRN"] = df_sex["UKPRN"].fillna(0).astype("int64")
df_sex["Female"] = df_sex["Female"].fillna(0).astype("int64")
df_sex["Male"] = df_sex["Male"].fillna(0).astype("int64")
df_sex["Other"] = df_sex["Other"].fillna(0).astype("int64")
df_sex["Total"] = df_sex["Total"].fillna(0).astype("int64")
df_sex
UKPRN | HE provider | Abbreviation | Female | Male | Other | Not known | Total | |
---|---|---|---|---|---|---|---|---|
0 | 10007783 | The University of Aberdeen | AU | 5585 | 3910 | 20 | 0 | 9510 |
1 | 10007849 | Abertay University | ENU | 1740 | 1925 | 0 | 0 | 3665 |
2 | 10007852 | The University of Dundee | GCU | 6670 | 3585 | 5 | 0 | 10255 |
3 | 10007772 | Edinburgh Napier University | GSoA | 5320 | 4040 | 0 | 0 | 9365 |
4 | 10007790 | The University of Edinburgh | HWU | 13560 | 8815 | 20 | 0 | 22395 |
5 | 10007762 | Glasgow Caledonian University | QMU | 7120 | 5060 | 5 | 0 | 12185 |
6 | 10002681 | Glasgow School of Art | RGU | 1095 | 480 | 30 | 0 | 1605 |
7 | 10007794 | The University of Glasgow | RCS | 10830 | 7690 | 35 | 0 | 18555 |
8 | 10007764 | Heriot-Watt University | SRUC | 3115 | 4490 | 0 | 0 | 7605 |
9 | 10005337 | Queen Margaret University, Edinburgh | UoA | 2325 | 775 | 5 | 0 | 3105 |
10 | 10005500 | Robert Gordon University | UoD | 5115 | 2965 | 0 | 0 | 8080 |
11 | 10005561 | Royal Conservatoire of Scotland | UoE | 520 | 380 | 0 | 0 | 900 |
12 | 10007803 | The University of St Andrews | UoG | 4710 | 3075 | 5 | 0 | 7790 |
13 | 10005700 | SRUC | UoStA | 715 | 435 | 5 | 0 | 1150 |
14 | 10007804 | The University of Stirling | UoS | 5260 | 2830 | 15 | 0 | 8100 |
15 | 10007805 | The University of Strathclyde | UoSc | 6885 | 7050 | 20 | 0 | 13960 |
16 | 10007114 | University of the Highlands and Islands | UoWoS | 3325 | 2175 | 10 | 0 | 5510 |
17 | 10007800 | The University of the West of Scotland | UoH&I | 7605 | 4290 | 0 | 0 | 11900 |
# Drop "Not known" column
df_sex.drop(["Not known"], axis=1, inplace=True)
# Get percentage of females at that uni and percentage of females studying
df_sex["% of females at corresp. HE"] = round((df_sex["Female"]/df_sex["Total"])*100, 1)
df_sex["% of all females in HE"] = round((df_sex["Female"]/df_sex["Female"].sum())*100, 1)
df_sex.sort_values("% of females at corresp. HE", ascending=False)
UKPRN | HE provider | Abbreviation | Female | Male | Other | Total | % of females at corresp. HE | % of all females in HE | |
---|---|---|---|---|---|---|---|---|---|
9 | 10005337 | Queen Margaret University, Edinburgh | UoA | 2325 | 775 | 5 | 3105 | 74.9 | 2.5 |
6 | 10002681 | Glasgow School of Art | RGU | 1095 | 480 | 30 | 1605 | 68.2 | 1.2 |
2 | 10007852 | The University of Dundee | GCU | 6670 | 3585 | 5 | 10255 | 65.0 | 7.3 |
14 | 10007804 | The University of Stirling | UoS | 5260 | 2830 | 15 | 8100 | 64.9 | 5.7 |
17 | 10007800 | The University of the West of Scotland | UoH&I | 7605 | 4290 | 0 | 11900 | 63.9 | 8.3 |
10 | 10005500 | Robert Gordon University | UoD | 5115 | 2965 | 0 | 8080 | 63.3 | 5.6 |
13 | 10005700 | SRUC | UoStA | 715 | 435 | 5 | 1150 | 62.2 | 0.8 |
4 | 10007790 | The University of Edinburgh | HWU | 13560 | 8815 | 20 | 22395 | 60.5 | 14.8 |
12 | 10007803 | The University of St Andrews | UoG | 4710 | 3075 | 5 | 7790 | 60.5 | 5.1 |
16 | 10007114 | University of the Highlands and Islands | UoWoS | 3325 | 2175 | 10 | 5510 | 60.3 | 3.6 |
0 | 10007783 | The University of Aberdeen | AU | 5585 | 3910 | 20 | 9510 | 58.7 | 6.1 |
5 | 10007762 | Glasgow Caledonian University | QMU | 7120 | 5060 | 5 | 12185 | 58.4 | 7.8 |
7 | 10007794 | The University of Glasgow | RCS | 10830 | 7690 | 35 | 18555 | 58.4 | 11.8 |
11 | 10005561 | Royal Conservatoire of Scotland | UoE | 520 | 380 | 0 | 900 | 57.8 | 0.6 |
3 | 10007772 | Edinburgh Napier University | GSoA | 5320 | 4040 | 0 | 9365 | 56.8 | 5.8 |
15 | 10007805 | The University of Strathclyde | UoSc | 6885 | 7050 | 20 | 13960 | 49.3 | 7.5 |
1 | 10007849 | Abertay University | ENU | 1740 | 1925 | 0 | 3665 | 47.5 | 1.9 |
8 | 10007764 | Heriot-Watt University | SRUC | 3115 | 4490 | 0 | 7605 | 41.0 | 3.4 |
# Wide to long data
melted_df_sex = pd.melt(df_sex, id_vars=['HE provider', 'Abbreviation'], value_vars=['Female', 'Male', 'Other'],
var_name='Sex', value_name='Number of students')
import plotly.express as px
# Treemap doesn't like zeros - get rid of them
melted_df_sex = melted_df_sex[melted_df_sex['Number of students']!=0]
# Plot treemap
fig_sex = px.treemap(melted_df_sex, path=['HE provider', 'Sex'], values='Number of students', color='Number of students',
color_continuous_scale='PuBu', width=1200, height=600)
fig_sex.show()
Filters:
Scottish universities with the greatest proportion of female students:
The top two universities are traditionally non-tech/non-engineering/non-computing universities (Glasgow School of Art however does offer architectural and design courses which would require computing skills). The University of Dundee's top subjects are "Subjects allied to medicine", "Medicine and dentistry", and "Creative arts and design".
Scottish universities with the smallest proportion of female students:
The subjects with the greatest student enrollment (irrespective of sex) at these universities are Engineering and Technology (Heriot-Watt University and The University of Strathclyde) and Computing (Abertay University).
computing = df[["HE provider", "Abbreviation", "11 Computing", "Total"]].copy()
computing.sort_values(by="11 Computing")
HE provider | Abbreviation | 11 Computing | Total | |
---|---|---|---|---|
8 | SRUC | SRUC | 0 | 1150 |
7 | Royal Conservatoire of Scotland | RCS | 0 | 900 |
5 | Queen Margaret University, Edinburgh | QMU | 0 | 3105 |
3 | Glasgow School of Art | GSoA | 30 | 1605 |
9 | The University of Aberdeen | UoA | 160 | 9510 |
17 | University of the Highlands and Islands | UoH&I | 280 | 5510 |
10 | The University of Dundee | UoD | 365 | 10255 |
13 | The University of St Andrews | UoStA | 395 | 7790 |
14 | The University of Stirling | UoS | 440 | 8100 |
4 | Heriot-Watt University | HWU | 445 | 7605 |
6 | Robert Gordon University | RGU | 600 | 8080 |
15 | The University of Strathclyde | UoSc | 630 | 13960 |
12 | The University of Glasgow | UoG | 910 | 18555 |
16 | The University of the West of Scotland | UoWoS | 1000 | 11900 |
11 | The University of Edinburgh | UoE | 1060 | 22395 |
2 | Glasgow Caledonian University | GCU | 1195 | 12185 |
0 | Abertay University | AU | 1295 | 3665 |
1 | Edinburgh Napier University | ENU | 1440 | 9365 |
# Drop universities where no computing
computing.drop([5, 7, 8], inplace=True)
# Customise color
cmap = matplotlib.cm.PuBu
mini=min(computing["11 Computing"])
maxi=max(computing["11 Computing"])
norm = matplotlib.colors.Normalize(vmin=mini, vmax=maxi)
colors = [cmap(norm(value)) for value in computing["11 Computing"]]
# Plot
fig, ax = plt.subplots(figsize=(10, 6))
squarify.plot(sizes=computing["11 Computing"],
label=df['Abbreviation'],
alpha=.8,
color=colors,
pad=False,
text_kwargs={'color':'black', 'weight':'bold', 'size':12})
plt.axis('off')
plt.title("Proportion of students enrolled in Computing UG degrees at Scottish universities (total: 10,245)", size=14)
plt.show()
sum_computing_students = computing["11 Computing"].sum()
# print(sum_computing_students)
Filters:
Total number of students enrolled in Computing UG degrees 2019/2020: 10,245
Scottish universities with the most number of students enrolled in Computing UG degree subjects:
Scottish universities with the least number of students enrolled in Computing UG degree subjects:
Scottish universities which do not offer UG degree in Computing subjects:
# Total number of students
total_students = melted_df["Number of students"].sum()
print("Total number of UG full-time students:", total_students)
# Number students studying Computing
total_computing_students = computing["11 Computing"].sum()
print("Total number of UG full-time Computing students:", total_computing_students)
# Percentage of students studying Computing
percent_of_cs = round((total_computing_students/total_students)*100, 2)
print("Percentage of UG full-time students studying Computing:", percent_of_cs)
Total number of UG full-time students: 155640 Total number of UG full-time Computing students: 10245 Percentage of UG full-time students studying Computing: 6.58
melted_df
HE provider | Abbreviation | Subject | Number of students | |
---|---|---|---|---|
2 | Glasgow Caledonian University | GCU | 01 Medicine and dentistry | 90 |
9 | The University of Aberdeen | UoA | 01 Medicine and dentistry | 1070 |
10 | The University of Dundee | UoD | 01 Medicine and dentistry | 1225 |
11 | The University of Edinburgh | UoE | 01 Medicine and dentistry | 1145 |
12 | The University of Glasgow | UoG | 01 Medicine and dentistry | 1925 |
... | ... | ... | ... | ... |
423 | The University of Aberdeen | UoA | 23 Combined and general studies | 5 |
424 | The University of Dundee | UoD | 23 Combined and general studies | 10 |
425 | The University of Edinburgh | UoE | 23 Combined and general studies | 60 |
427 | The University of St Andrews | UoStA | 23 Combined and general studies | 100 |
429 | The University of Strathclyde | UoSc | 23 Combined and general studies | 95 |
254 rows × 4 columns
melted_df.groupby(["Subject", "HE provider"]).sum().head(50)
Number of students | ||
---|---|---|
Subject | HE provider | |
01 Medicine and dentistry | Glasgow Caledonian University | 90 |
The University of Aberdeen | 1070 | |
The University of Dundee | 1225 | |
The University of Edinburgh | 1145 | |
The University of Glasgow | 1925 | |
The University of St Andrews | 575 | |
University of the Highlands and Islands | 40 | |
02 Subjects allied to medicine | Abertay University | 260 |
Edinburgh Napier University | 1880 | |
Glasgow Caledonian University | 3990 | |
Queen Margaret University, Edinburgh | 1165 | |
Robert Gordon University | 2325 | |
The University of Aberdeen | 370 | |
The University of Dundee | 2335 | |
The University of Edinburgh | 580 | |
The University of Glasgow | 840 | |
The University of St Andrews | 75 | |
The University of Stirling | 1045 | |
The University of Strathclyde | 1015 | |
The University of the West of Scotland | 2855 | |
University of the Highlands and Islands | 570 | |
03 Biological and sport sciences | Abertay University | 310 |
Edinburgh Napier University | 520 | |
Glasgow Caledonian University | 105 | |
Heriot-Watt University | 330 | |
Queen Margaret University, Edinburgh | 60 | |
Robert Gordon University | 155 | |
The University of Aberdeen | 880 | |
The University of Dundee | 300 | |
The University of Edinburgh | 1605 | |
The University of Glasgow | 1295 | |
The University of St Andrews | 460 | |
The University of Stirling | 985 | |
The University of Strathclyde | 430 | |
The University of the West of Scotland | 1150 | |
University of the Highlands and Islands | 315 | |
04 Psychology | Abertay University | 400 |
Edinburgh Napier University | 250 | |
Glasgow Caledonian University | 445 | |
Heriot-Watt University | 300 | |
Queen Margaret University, Edinburgh | 235 | |
The University of Aberdeen | 685 | |
The University of Dundee | 435 | |
The University of Edinburgh | 620 | |
The University of Glasgow | 545 | |
The University of St Andrews | 475 | |
The University of Stirling | 665 | |
The University of Strathclyde | 420 | |
The University of the West of Scotland | 515 | |
University of the Highlands and Islands | 105 |
computing = melted_df[melted_df["Subject"] == "11 Computing"].copy()
computing.sort_values(by="Number of students")
HE provider | Abbreviation | Subject | Number of students | |
---|---|---|---|---|
183 | Glasgow School of Art | GSoA | 11 Computing | 30 |
189 | The University of Aberdeen | UoA | 11 Computing | 160 |
197 | University of the Highlands and Islands | UoH&I | 11 Computing | 280 |
190 | The University of Dundee | UoD | 11 Computing | 365 |
193 | The University of St Andrews | UoStA | 11 Computing | 395 |
194 | The University of Stirling | UoS | 11 Computing | 440 |
184 | Heriot-Watt University | HWU | 11 Computing | 445 |
186 | Robert Gordon University | RGU | 11 Computing | 600 |
195 | The University of Strathclyde | UoSc | 11 Computing | 630 |
192 | The University of Glasgow | UoG | 11 Computing | 910 |
196 | The University of the West of Scotland | UoWoS | 11 Computing | 1000 |
191 | The University of Edinburgh | UoE | 11 Computing | 1060 |
182 | Glasgow Caledonian University | GCU | 11 Computing | 1195 |
180 | Abertay University | AU | 11 Computing | 1295 |
181 | Edinburgh Napier University | ENU | 11 Computing | 1440 |
# Read in data
df = pd.read_excel("computing_student_numbers.xlsx", header=8)
# Wide to long data
melted_df = pd.melt(df, id_vars=['HE provider'], var_name="Academic Year", value_name='Number of students')
# Fillna with "Total"
melted_df.fillna("Total", inplace=True)
# Drop Total rows
melted_df = melted_df[melted_df["HE provider"] != "Total"].copy()
melted_df.sort_values(by="Academic Year", inplace=True)
plt.figure(figsize=(16, 6))
g = sns.lineplot(x="Academic Year", y="Number of students", hue="HE provider", palette="Paired", data=melted_df)
plt.title("Number of first year enrolments in Computer Science degrees in Scotland")
plt.ylabel("Enrolments")
# Put the legend out of the figure
g.legend(loc='center right', bbox_to_anchor=(1.3, 0.5), ncol=1)
plt.show()