import pandas as pd
import os as os
import numpy as np
from ebmdatalab import bq, maps, charts
pd.set_option('display.float_format', lambda x: '%.2f' % x)
sql = '''
WITH
bnf_tab AS (
SELECT
DISTINCT chemical,
chemical_code
FROM
ebmdatalab.hscic.bnf )
SELECT
SUBSTR(presc.bnf_code, 0, 9) AS chemical_code,
chemical,
SUM(case when month = "2019-03-01" then items else 0 END) as items_2019,
SUM(case when month = "2020-03-01" then items else 0 END) as items_2020
FROM
ebmdatalab.hscic.normalised_prescribing AS presc
LEFT JOIN
bnf_tab
ON
chemical_code=SUBSTR(presc.bnf_code,0,9)
WHERE
month BETWEEN TIMESTAMP('2019-03-01')
AND TIMESTAMP('2020-03-01')
GROUP BY
chemical_code,
chemical
ORDER BY
items_2020 DESC
'''
df_chemical = bq.cached_read(sql, csv_path=os.path.join('..','data','df_chemical.csv'))
df_chemical.head(5)
chemical_code | chemical | items_2019 | items_2020 | |
---|---|---|---|---|
0 | 0212000B0 | Atorvastatin | 3697616 | 4382962 |
1 | 0602010V0 | Levothyroxine sodium | 2707630 | 3050351 |
2 | 0103050P0 | Omeprazole | 2606609 | 2973978 |
3 | 0206020A0 | Amlodipine | 2509145 | 2866241 |
4 | 0301011R0 | Salbutamol | 1792093 | 2776025 |
df_march_diff = df_chemical.copy()
df_march_diff["increase"] = (df_march_diff.items_2020 - df_march_diff.items_2019).fillna(0)
df_march_diff["per_diff"] = 100*((df_march_diff.items_2020 - df_march_diff.items_2019)/df_march_diff.items_2019)
df_march_diff.head(5)
chemical_code | chemical | items_2019 | items_2020 | increase | per_diff | |
---|---|---|---|---|---|---|
0 | 0212000B0 | Atorvastatin | 3697616 | 4382962 | 685346 | 18.53 |
1 | 0602010V0 | Levothyroxine sodium | 2707630 | 3050351 | 342721 | 12.66 |
2 | 0103050P0 | Omeprazole | 2606609 | 2973978 | 367369 | 14.09 |
3 | 0206020A0 | Amlodipine | 2509145 | 2866241 | 357096 | 14.23 |
4 | 0301011R0 | Salbutamol | 1792093 | 2776025 | 983932 | 54.90 |
df_march_diff.sort_values("increase", ascending=False).head(26)
chemical_code | chemical | items_2019 | items_2020 | increase | per_diff | |
---|---|---|---|---|---|---|
4 | 0301011R0 | Salbutamol | 1792093 | 2776025 | 983932 | 54.90 |
0 | 0212000B0 | Atorvastatin | 3697616 | 4382962 | 685346 | 18.53 |
14 | 0302000C0 | Beclometasone dipropionate | 926886 | 1429334 | 502448 | 54.21 |
2 | 0103050P0 | Omeprazole | 2606609 | 2973978 | 367369 | 14.09 |
3 | 0206020A0 | Amlodipine | 2509145 | 2866241 | 357096 | 14.23 |
6 | 0103050L0 | Lansoprazole | 2168631 | 2515835 | 347204 | 16.01 |
1 | 0602010V0 | Levothyroxine sodium | 2707630 | 3050351 | 342721 | 12.66 |
5 | 0205051R0 | Ramipril | 2416608 | 2687812 | 271204 | 11.22 |
7 | 0204000H0 | Bisoprolol fumarate | 2024810 | 2278618 | 253808 | 12.53 |
12 | 0407010H0 | Paracetamol | 1465480 | 1708362 | 242882 | 16.57 |
13 | 0403030Q0 | Sertraline hydrochloride | 1354400 | 1588133 | 233733 | 17.26 |
9 | 0601022B0 | Metformin hydrochloride | 1839377 | 2044905 | 205528 | 11.17 |
8 | 0906040G0 | Colecalciferol | 2002353 | 2153331 | 150978 | 7.54 |
24 | 0603020T0 | Prednisolone | 589864 | 735107 | 145243 | 24.62 |
37 | 0208020Z0 | Apixaban | 456303 | 578660 | 122357 | 26.81 |
19 | 0205052N0 | Losartan potassium | 841604 | 960710 | 119106 | 14.15 |
17 | 0403010B0 | Amitriptyline hydrochloride | 1155943 | 1253092 | 97149 | 8.40 |
21 | 0403040X0 | Mirtazapine | 784122 | 878032 | 93910 | 11.98 |
54 | 0302000K0 | Budesonide | 303376 | 396757 | 93381 | 30.78 |
15 | 0407010F0 | Co-codamol (Codeine phosphate/paracetamol) | 1255617 | 1348040 | 92423 | 7.36 |
45 | 0302000N0 | Fluticasone propionate (Inhalation) | 389084 | 478602 | 89518 | 23.01 |
20 | 0209000C0 | Clopidogrel | 829796 | 915861 | 86065 | 10.37 |
16 | 0403030D0 | Citalopram hydrobromide | 1177608 | 1253828 | 76220 | 6.47 |
26 | 0704010U0 | Tamsulosin hydrochloride | 621205 | 696692 | 75487 | 12.15 |
29 | 0205052C0 | Candesartan cilexetil | 584322 | 659461 | 75139 | 12.86 |
10 | 0209000A0 | Aspirin | 1854585 | 1928371 | 73786 | 3.98 |
high_volume_diff = df_march_diff.loc[(df_march_diff["items_2020"] >= 50000)].sort_values("per_diff", ascending=False)
high_volume_diff.head(26)
chemical_code | chemical | items_2019 | items_2020 | increase | per_diff | |
---|---|---|---|---|---|---|
201 | 214800001 | DUMMY CHEMICAL SUBSTANCE 214800001 | 16947 | 74021 | 57074 | 336.78 |
170 | 0301011AB | Beclometdiprop/formoterol/glycopyrronium | 44292 | 102306 | 58014 | 130.98 |
163 | 0208020AA | Edoxaban | 49447 | 109435 | 59988 | 121.32 |
4 | 0301011R0 | Salbutamol | 1792093 | 2776025 | 983932 | 54.90 |
129 | 0601023AN | Empagliflozin | 97381 | 150548 | 53167 | 54.60 |
14 | 0302000C0 | Beclometasone dipropionate | 926886 | 1429334 | 502448 | 54.21 |
109 | 0302000V0 | Fluticasone furoate (Inhalation) | 125770 | 190100 | 64330 | 51.15 |
224 | 0301011V0 | Terbutaline sulphate | 41821 | 60974 | 19153 | 45.80 |
200 | 0301040W0 | Umeclidinium bromide/vilanterol | 51199 | 74370 | 23171 | 45.26 |
228 | 0601023AQ | Dulaglutide | 40536 | 58702 | 18166 | 44.81 |
235 | 238030680 | DUMMY CHEMICAL SUBSTANCE 238030680 | 38758 | 55909 | 17151 | 44.25 |
138 | 0604011G0 | Estradiol | 96276 | 137885 | 41609 | 43.22 |
98 | 0601011A0 | Insulin aspart | 159936 | 212354 | 52418 | 32.77 |
252 | 0601023AM | Canagliflozin | 37901 | 50224 | 12323 | 32.51 |
141 | 0601023AK | Alogliptin | 104687 | 136923 | 32236 | 30.79 |
54 | 0302000K0 | Budesonide | 303376 | 396757 | 93381 | 30.78 |
125 | 210109007 | DUMMY CHEMICAL SUBSTANCE 210109007 | 122514 | 158535 | 36021 | 29.40 |
68 | 0501030I0 | Doxycycline hyclate | 249319 | 321744 | 72425 | 29.05 |
66 | 0303020G0 | Montelukast | 256996 | 326129 | 69133 | 26.90 |
37 | 0208020Z0 | Apixaban | 456303 | 578660 | 122357 | 26.81 |
159 | 0601023AG | Dapagliflozin | 89667 | 113493 | 23826 | 26.57 |
83 | 0212000AA | Rosuvastatin calcium | 201901 | 254981 | 53080 | 26.29 |
24 | 0603020T0 | Prednisolone | 589864 | 735107 | 145243 | 24.62 |
122 | 0601012V0 | Insulin glargine | 130806 | 161726 | 30920 | 23.64 |
45 | 0302000N0 | Fluticasone propionate (Inhalation) | 389084 | 478602 | 89518 | 23.01 |
145 | 0304010W0 | Promethazine hydrochloride | 103469 | 126182 | 22713 | 21.95 |
df_abx_a = df_march_diff[df_march_diff["chemical_code"].str.startswith("050")].sort_values("increase", ascending=False)
df_abx = df_abx_a.loc[(df_abx_a["items_2020"] >= 5)]
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
df_abx
chemical_code | chemical | items_2019 | items_2020 | increase | per_diff | |
---|---|---|---|---|---|---|
68 | 0501030I0 | Doxycycline hyclate | 249319 | 321744 | 72425 | 29.05 |
27 | 0501013B0 | Amoxicillin | 667132 | 694726 | 27594 | 4.14 |
92 | 0501011P0 | Phenoxymethylpenicillin (Penicillin V) | 206107 | 224298 | 18191 | 8.83 |
195 | 0501050A0 | Azithromycin | 64255 | 77958 | 13703 | 21.33 |
108 | 0501050B0 | Clarithromycin | 178630 | 191549 | 12919 | 7.23 |
185 | 0503021C0 | Aciclovir | 79217 | 87733 | 8516 | 10.75 |
378 | 0501015P0 | Pivmecillinam hydrochloride | 17535 | 21850 | 4315 | 24.61 |
333 | 0504010T0 | Quinine bisulfate | 24764 | 28479 | 3715 | 15.00 |
216 | 0501021L0 | Cefalexin | 60915 | 64358 | 3443 | 5.65 |
414 | 0501080D0 | Co-trimoxazole(Trimethoprim/sulfamethoxazole) | 14131 | 17483 | 3352 | 23.72 |
160 | 0501013K0 | Co-amoxiclav (Amoxicillin/clavulanic acid) | 109668 | 112440 | 2772 | 2.53 |
518 | 0501130H0 | Methenamine hippurate | 7654 | 10049 | 2395 | 31.29 |
630 | 0501070AE | Fosfomycin trometamol | 3921 | 5591 | 1670 | 42.59 |
591 | 0501070X0 | Rifaximin | 5681 | 6794 | 1113 | 19.59 |
701 | 0503021A0 | Valaciclovir | 3372 | 4000 | 628 | 18.62 |
736 | 0501070I0 | Colistimethate sodium | 2916 | 3445 | 529 | 18.14 |
763 | 0501120P0 | Ofloxacin | 2883 | 3063 | 180 | 6.24 |
831 | 0501120X0 | Levofloxacin | 2178 | 2338 | 160 | 7.35 |
1248 | 0501070U0 | Vancomycin hydrochloride | 277 | 409 | 132 | 47.65 |
828 | 0501100H0 | Dapsone | 2226 | 2353 | 127 | 5.71 |
852 | 0501030V0 | Tetracycline | 2063 | 2165 | 102 | 4.94 |
1204 | 0501120Y0 | Moxifloxacin | 438 | 476 | 38 | 8.68 |
1507 | 0501070W0 | Linezolid | 87 | 124 | 37 | 42.53 |
1722 | 0503040A0 | Zanamivir | 10 | 39 | 29 | 290.00 |
1445 | 0501110G0 | Tinidazole | 132 | 156 | 24 | 18.18 |
938 | 0501090R0 | Rifampicin | 1491 | 1503 | 12 | 0.80 |
1533 | 0503010H0 | Tenofovir disoproxil | 100 | 111 | 11 | 11.00 |
1693 | 0504080A0 | Atovaquone | 35 | 46 | 11 | 31.43 |
1440 | 0501090S0 | Rifampicin combined preparations | 151 | 161 | 10 | 6.62 |
1748 | 0505060I0 | Ivermectin | 25 | 33 | 8 | 32.00 |
1784 | 0505030A0 | Albendazole | 19 | 27 | 8 | 42.11 |
1480 | 0504010F0 | Chloroquine phosphate | 129 | 136 | 7 | 5.43 |
1806 | 0504010N0 | Pyrimethamine | 15 | 22 | 7 | 46.67 |
1892 | 0501070F0 | Chloramphenicol | 5 | 11 | 6 | 120.00 |
1842 | 0501070AC | Fidaxomicin | 12 | 17 | 5 | 41.67 |
1523 | 0503031B0 | Entecavir | 110 | 115 | 5 | 4.55 |
1864 | 0502020A0 | Ketoconazole | 9 | 14 | 5 | 55.56 |
2013 | 0503010Z0 | Abacavir and lamivudine | 1 | 5 | 4 | 400.00 |
1885 | 0501022B0 | Ertapenem sodium | 8 | 12 | 4 | 50.00 |
1861 | 0501013C0 | Amoxicillin sodium | 10 | 14 | 4 | 40.00 |
1831 | 0503021D0 | Inosine pranobex | 15 | 18 | 3 | 20.00 |
1733 | 0501040N0 | Neomycin sulfate | 33 | 36 | 3 | 9.09 |
1967 | 0501090N0 | Pyrazinamide | 4 | 7 | 3 | 75.00 |
1920 | 0501021H0 | Ceftazidime pentahydrate | 7 | 9 | 2 | 28.57 |
2007 | 0501120Q0 | Norfloxacin | 3 | 5 | 2 | 66.67 |
2012 | 0503010AC | Darunavir | 3 | 5 | 2 | 66.67 |
1843 | 0502010A0 | Posaconazole | 16 | 17 | 1 | 6.25 |
1932 | 0501100C0 | Clofazimine | 7 | 8 | 1 | 14.29 |
1706 | 0502010D0 | Voriconazole | 42 | 43 | 1 | 2.38 |
1850 | 0501090Q0 | Rifabutin | 16 | 16 | 0 | 0.00 |
1757 | 0501080J0 | Sulfadiazine | 31 | 31 | 0 | 0.00 |
1980 | 0502030A0 | Amphotericin | 6 | 6 | 0 | 0.00 |
1894 | 0503010AA | Emtricitabine and tenofovir disoproxil | 12 | 11 | -1 | -8.33 |
1830 | 0501022A0 | Meropenem | 21 | 19 | -2 | -9.52 |
1997 | 0503010AN | Efavirenz/emtricitabine/tenofovir disoproxil | 8 | 5 | -3 | -37.50 |
1525 | 0501013E0 | Ampicillin | 118 | 113 | -5 | -4.24 |
1498 | 0501021C0 | Cefixime | 134 | 129 | -5 | -3.73 |
1800 | 0505020L0 | Levamisole hydrochloride | 30 | 24 | -6 | -20.00 |
2011 | 0503010AE | Raltegravir | 14 | 5 | -9 | -64.29 |
1719 | 0503022B0 | Valganciclovir hydrochloride | 50 | 41 | -9 | -18.00 |
1995 | 0501014S0 | Piperacillin sodium/tazobactam sodium | 14 | 5 | -9 | -64.29 |
1537 | 0501040U0 | Tobramycin | 122 | 109 | -13 | -10.66 |
1337 | 0501040H0 | Gentamicin sulfate | 291 | 278 | -13 | -4.47 |
1911 | 0501070M0 | Fusidic acid | 25 | 10 | -15 | -60.00 |
1257 | 0501021K0 | Cefuroxime axetil | 410 | 394 | -16 | -3.90 |
1131 | 0501021A0 | Cefaclor | 724 | 697 | -27 | -3.73 |
1178 | 0503021E0 | Famciclovir | 592 | 559 | -33 | -5.57 |
1776 | 0501011J0 | Benzylpenicillin sodium (Penicillin G) | 65 | 28 | -37 | -56.92 |
1723 | 0501070T0 | Teicoplanin | 76 | 39 | -37 | -48.68 |
1320 | 0503010Q0 | Lamivudine | 339 | 298 | -41 | -12.09 |
1619 | 0501021G0 | Ceftriaxone sodium | 108 | 66 | -42 | -38.89 |
1455 | 0504040M0 | Mepacrine hydrochloride | 196 | 150 | -46 | -23.47 |
1735 | 0504010L0 | Mefloquine hydrochloride | 83 | 36 | -47 | -56.63 |
1231 | 0501090H0 | Ethambutol hydrochloride | 475 | 426 | -49 | -10.32 |
1432 | 0501070N0 | Sodium fusidate | 235 | 166 | -69 | -29.36 |
886 | 0501021M0 | Cefradine | 1935 | 1851 | -84 | -4.34 |
1665 | 0504010U0 | Proguanil hydrochloride with atovaquone | 137 | 53 | -84 | -61.31 |
1325 | 0502050B0 | Griseofulvin | 380 | 295 | -85 | -22.37 |
1222 | 0501030F0 | Demeclocycline hydrochloride | 530 | 444 | -86 | -16.23 |
1630 | 0501021B0 | Cefadroxil | 169 | 61 | -108 | -63.91 |
1052 | 0501030Z0 | Doxycycline monohydrate | 1208 | 972 | -236 | -19.54 |
601 | 0501060D0 | Clindamycin hydrochloride | 6823 | 6568 | -255 | -3.74 |
875 | 0501030P0 | Minocycline hydrochloride | 2288 | 1936 | -352 | -15.38 |
982 | 0501090K0 | Isoniazid | 1633 | 1247 | -386 | -23.64 |
1620 | 0501013L0 | Co-fluampicil(Flucloxacillin/ampicillin) | 746 | 66 | -680 | -91.15 |
1116 | 0501050N0 | Erythromycin stearate | 1463 | 751 | -712 | -48.67 |
576 | 0502010C0 | Itraconazole | 8810 | 7309 | -1501 | -17.04 |
237 | 0502030B0 | Nystatin | 55812 | 54252 | -1560 | -2.80 |
1069 | 0503040B0 | Oseltamivir phosphate | 2601 | 899 | -1702 | -65.44 |
293 | 0501050C0 | Erythromycin | 37889 | 35717 | -2172 | -5.73 |
402 | 0501050H0 | Erythromycin ethylsuccinate | 20685 | 18138 | -2547 | -12.31 |
180 | 0501030L0 | Lymecycline | 93736 | 91125 | -2611 | -2.79 |
617 | 0505010D0 | Mebendazole | 8975 | 5967 | -3008 | -33.52 |
354 | 0501030T0 | Oxytetracycline | 28759 | 25657 | -3102 | -10.79 |
327 | 0502050C0 | Terbinafine hydrochloride | 32872 | 29564 | -3308 | -10.06 |
274 | 0502010B0 | Fluconazole | 45563 | 42104 | -3459 | -7.59 |
61 | 0501130R0 | Nitrofurantoin | 343045 | 339497 | -3548 | -1.03 |
265 | 0501110C0 | Metronidazole | 49444 | 45051 | -4393 | -8.88 |
287 | 0501120L0 | Ciprofloxacin | 43977 | 38881 | -5096 | -11.59 |
96 | 0504010Y0 | Quinine sulfate | 223215 | 213430 | -9785 | -4.38 |
144 | 0501080W0 | Trimethoprim | 141992 | 129198 | -12794 | -9.01 |
76 | 0501012G0 | Flucloxacillin sodium | 302816 | 282352 | -20464 | -6.76 |
sql2 = '''
SELECT
month,
SUM(items) AS items,
SUM(actual_cost) AS cost
FROM
ebmdatalab.hscic.normalised_prescribing
WHERE
month BETWEEN TIMESTAMP('2015-01-01')
AND TIMESTAMP('2020-03-01') #2014 seems to be duplicated so setting argument to eliminate
GROUP BY
month
ORDER BY
items DESC
'''
df_overall = bq.cached_read(sql2, csv_path=os.path.join('..','data','df_overall.csv'))
df_overall.head(5)
month | items | cost | |
---|---|---|---|
0 | 2020-03-01 00:00:00+00:00 | 99835832 | 790848491.37 |
1 | 2019-10-01 00:00:00+00:00 | 99722084 | 758784655.33 |
2 | 2018-10-01 00:00:00+00:00 | 97798558 | 719422788.30 |
3 | 2020-01-01 00:00:00+00:00 | 96246794 | 719143800.18 |
4 | 2015-12-01 00:00:00+00:00 | 96043609 | 761761833.10 |
df_overall.groupby("month").sum().plot(kind='line', title="Trens in items and cost per month since 2015")
<matplotlib.axes._subplots.AxesSubplot at 0x7fd83d7f2370>
df_march_2020 = df_overall.loc[(df_overall["month"] == "2020-03-01 00:00:00+00:00")]
df_march_2019 = df_overall.loc[(df_overall["month"] == "2019-03-01 00:00:00+00:00")]
df_march_2020
month | items | cost | |
---|---|---|---|
0 | 2020-03-01 00:00:00+00:00 | 99835832 | 790848491.37 |
df_march_2019
month | items | cost | |
---|---|---|---|
33 | 2019-03-01 00:00:00+00:00 | 91320874 | 672356539.25 |