using CSV, DataFramesMeta, Statistics, Dates # DataFrame(column=data) df = DataFrame(id=1:8, first_name=["Michael", "Dwight", "Angela", "Jim", "Pam", "Oscar", "Meredith", "Creed"], last_name=["Scott", "Schrute", "Martin", "Halpert", "Beesly", "Nunez", "Palmer", "Bratton"], department=["Management & Admin", "Sales", "Accounting", "Sales", "Management & Admin", "Accounting", "Purchasing", "Purchasing"], salary=[5100, 4200, 3750, 4300, 2200, 3400, 3300, 3200]) # Parse dates as Date objects dates = ["1-2-2006", "1-29-2006", "2-1-2006", "2-14-2006", "3-1-2006", "3-20-2006"] dates = parse.(Date, dates, dateformat"m-d-y") db_sales = DataFrame(id=1:6, transaction_date=dates, employee_id=[4, 2, 4, 2, 4, 2], quantity=[100, 500, 600, 200, 400, 250], customer=["Dunmore High School", "Harper Collins", "Blue Cross of Pennsylvania", "Apex Technology", "Blue Cross of Pennsylvania", "Stone, Cooper, and Grandy: Attorneys at Law"]) db_sales |> print #DataFrames subset(df, :id => ByRow(x -> x >= 1), :first_name => ByRow(x -> x =="Michael") ) #With DataFramesMeta it's easier to layer on complexity like do 'or' statements a little easier, plus it's more terse @rsubset(df, (:id == 2) | (:first_name =="Michael") ) #DataFrames subset(df, :department => ByRow(x -> occursin("Admin", x))) #DataFramesMeta @rsubset(df, occursin("Admin", :department)) gd = groupby(df, :department) #DataFrames combine(gd, :salary => mean => :"Average Salary", :department => length => :count) #DataFramesMeta @combine(gd, :"Average Salary" = mean(:salary), :count = length(:department)) @by(df, :department, :"Average Salary" = mean(:salary), :count=length(:salary)) #DataFrames transform(gd, :salary => mean => :"Average Salary") #DataFramesMeta @transform(gd, :"Average Salary"=mean(:salary)) leftjoin(df, db_sales, on=[:id=>:employee_id], makeunique=true) sales = @chain db_sales begin groupby(:employee_id) @combine(:total_quantity=sum(:quantity), :number_of_customers=length(:customer)) end result = @chain df begin @select(:id, :first_name, :last_name, :department) @rsubset(:department=="Sales") leftjoin(sales, on=[:id=>:employee_id]) @orderby(sort(:total_quantity, rev=true)) # order by descending end