using DataFrames using DataStructures using Gadfly using Color colors = distinguishable_colors(8) gajomidark = Theme(default_color = color("black"), major_label_color = color("black"),major_label_font_size=18px, minor_label_color = color("black"),minor_label_font_size=14px, panel_stroke = color("black"), default_point_size = 0.5mm, line_width = 1mm, grid_line_width = 0px, highlight_width = 0px) set_default_plot_size(20cm, 15cm) download("https://data.cityofchicago.org/api/views/8yq3-m6wp/rows.csv?accessType=DOWNLOAD","energy.csv") download("https://data.cityofchicago.org/api/views/kn9c-c2s2/rows.csv?accessType=DOWNLOAD","socio.csv") fulleconsummary = readtable("socio.csv") map(display,names(fullecondata)); fullnames = [:COMMUNITY_AREA_NAME,:PERCENT_HOUSEHOLDS_BELOW_POVERTY, :PER_CAPITA_INCOME] shortnames = [:community,:percent_poverty,:per_capita_income] econsummary = sort!(names!(fullecondata[:,fullnames],shortnames),cols=:percent_poverty,rev=true) plot(econsummary,x="percent_poverty",y="per_capita_income",label=:community,Geom.label,Geom.point,gajomidark) fullenergydetail = readtable("energy.csv") map(display,names(fullenergydetail)); fullnames = [:COMMUNITY_AREA_NAME,:CENSUS_BLOCK,:TOTAL_POPULATION,:BUILDING_TYPE,:THERMS_TOTAL_SQFT,:TOTAL_KWH,:TOTAL_THERMS] shortnames = [:community,:block,:population,:building_type,:sqft,:kwh,:therms] energydetail = complete_cases!(names!(fullenergydetail[:,fullnames],shortnames)) dupcounts = counter(energydetail[:block]) rows = (&)(array(energydetail[:building_type].=="Residential"), array(energydetail[:population].>0), array(map(i->dupcounts[i],energydetail[:block]).==1)) energydetail = delete!(energydetail[rows,:],[:block,:building_type]) per(denominator::Symbol,numerator::Symbol,data::DataFrame) = data[numerator]./data[denominator] #add stats energydata[:kwh_per_capita] = per(:population,:kwh,energydata) energydata[:therms_per_capita] = per(:population,:therms,energydata) energydata[:kwh_per_sqft] = per(:sqft,:kwh,energydata) energydata[:therms_per_sqft] = per(:sqft,:therms,energydata) energydata stats = [:kwh_per_capita,:therms_per_capita,:kwh_per_sqft,:therms_per_sqft] energysummary = aggregate(energydata[[:community, stats...]],:community, mean) summary = join(econdata, energysummary, on = :community) set_default_plot_size(25cm, 10cm) hstack( plot(fulldata,x = :percent_poverty, y = :therms_per_sqft_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark), plot(fulldata,x=:per_capita_income,y = :therms_per_sqft_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark) ) set_default_plot_size(25cm, 10cm) hstack( plot(fulldata,x=:per_capita_income,y = :kwh_per_sqft_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark), plot(fulldata,x = :percent_poverty, y = :kwh_per_sqft_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark) ) set_default_plot_size(25cm, 10cm) hstack( plot(fulldata,x = :percent_poverty, y = :kwh_per_capita_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark), plot(fulldata,x= :per_capita_income,y = :kwh_per_capita_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark) ) set_default_plot_size(25cm, 10cm) hstack( plot(fulldata,x = :percent_poverty, y = :therms_per_capita_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark), plot(fulldata,x=:per_capita_income,y = :therms_per_capita_mean,label=:community,Geom.label,Geom.point,Scale.y_log10,gajomidark) ) set_default_plot_size(20cm, 15cm) plot(fulldata,x=:therms_per_sqft_mean,y = :kwh_per_sqft_mean,color=:percent_poverty,label=:community, Geom.label,Geom.point,Scale.ContinuousColorScale(p -> RGB(1-p,1-p,1-p)),Scale.y_log10,Scale.x_log10,gajomidark) plot(fulldata,x=:therms_per_capita_mean,y = :kwh_per_capita_mean,color=:percent_poverty,label=:community, Geom.label,Geom.point,Scale.ContinuousColorScale(p -> RGB(1-p,1-p,1-p)),Scale.y_log10,Scale.x_log10,gajomidark) #an ugly hack to get Gadfly to plot bulk statstic alongside lect communities unlabeledall = deepcopy(energydata) unlabeledall[:community] = utf8("All Chicago") indexes = findin(energydata[:community],["Oakland","Washington Park","Riverdale"]) labledsubset = deepcopy(energydata[indexes,:]) superhackyappendeddata = append!(unlabeledall,labledsubset); #indexes = findin(energydata[:community],["Lincoln Park","Hyde Park","Washington Park"]) plots = [[plot(superhackyappendeddata,x = units*"_per_"*normalizer,color = :community, Scale.discrete_color_manual("black",colors[6:end]...),Geom.density,Scale.x_log10,gajomidark) for units in ["kwh","therms"]] for normalizer in ["capita","sqft"]] vstack(hstack(plots[1]...),hstack(plots[2]...))