for dep in ["HTTP","JSON","GitHub","LightGraphs","ProgressMeter"] Pkg.add(dep) end using HTTP using JSON using GitHub using LightGraphs using ProgressMeter # authenticate with GitHub to increase query limits mytoken = ENV["GITHUB_AUTH"] myauth = GitHub.authenticate(mytoken) # find all packages in METADATA pkgs = readdir(Pkg.dir("METADATA")) filterfunc = p -> isdir(joinpath(Pkg.dir("METADATA"), p)) && p ∉ [".git",".test"] pkgs = filter(filterfunc, pkgs) # assign each package an id pkgdict = Dict{String,Int}() for (i,pkg) in enumerate(pkgs) push!(pkgdict, pkg => i) end pkgdict # build DAG DAG = DiGraph(length(pkgs)) @showprogress 1 "Building graph..." for pkg in pkgs children = Pkg.dependents(pkg) for c in children add_edge!(DAG, pkgdict[pkg], pkgdict[c]) end end # find (indirect) descendents descendents = [] for pkg in pkgs gdists = gdistances(DAG, pkgdict[pkg]) desc = [Dict("id"=>pkgs[v], "level"=>gdists[v]) for v in find(gdists .> 0)] push!(descendents, desc) end # find contributors pkgcontributors = [] hostnames = [] @showprogress 1 "Finding contributors..." for pkg in pkgs url = Pkg.Read.url(pkg) m = match(r".*://([a-z.]*)/(.*)\.git.*", url) hostname = m[1]; reponame = m[2] if hostname == "github.com" users, _ = contributors(reponame, auth=myauth) usersdata = map(u -> (u["contributor"].login, u["contributions"]), users) pkgcontrib = [Dict("id"=>u, "contributions"=>c) for (u,c) in usersdata] push!(pkgcontributors, pkgcontrib) push!(hostnames, hostname) else push!(pkgcontributors, []) push!(hostnames, hostname) end end # find required Julia version juliaversion = [] for pkg in pkgs versiondir = joinpath(Pkg.dir("METADATA"), pkg, "versions") if isdir(versiondir) latestversion = readdir(versiondir)[end] reqfile = joinpath(versiondir, latestversion, "requires") reqs = Pkg.Reqs.parse(reqfile) if "julia" ∈ keys(reqs) vinterval = reqs["julia"].intervals[1] vmin = vinterval.lower vmax = vinterval.upper majorminor = "v$(vmin.major).$(vmin.minor)" push!(juliaversion, Dict("min"=>string(vinterval.lower), "max"=>string(vinterval.upper), "majorminor"=>majorminor)) else push!(juliaversion, Dict("min"=>"NA", "max"=>"NA", "majorminor"=>"NA")) end else push!(juliaversion, Dict("min"=>"BOGUS", "max"=>"BOGUS", "majorminor"=>"BOGUS")) end end # construct JSON nodes = [Dict("id"=>pkgs[v], "indegree"=>indegree(DAG,v), "outdegree"=>outdegree(DAG,v), "juliaversion"=>juliaversion[v], "descendents"=>descendents[v], "contributors"=>pkgcontributors[v]) for v in vertices(DAG)] links = [Dict("source"=>pkgs[src(e)], "target"=>pkgs[dst(e)]) for e in edges(DAG)] data = Dict("nodes"=>nodes, "links"=>links) # write to file open("DAG-Julia-Pkgs.json", "w") do f JSON.print(f, data, 2) end # find Julians on Github julians = [] for pkgcontrib in pkgcontributors append!(julians, [julian["id"].value for julian in pkgcontrib]) end julians = sort(unique(julians)) # assign each Julian an id juliandict = Dict{String,Int}() for (i,julian) in enumerate(julians) push!(juliandict, julian => i) end juliandict # build the social network socialnet = Graph(length(julians)) contribdict = Dict{String,Int}() for pkgcontrib in pkgcontributors ids = [julian["id"].value for julian in pkgcontrib] contribs = [julian["contributions"] for julian in pkgcontrib] for i=1:length(ids) contribdict[ids[i]] = get(contribdict, ids[i], 0) + contribs[i] end for i=1:length(ids), j=1:i-1 add_edge!(socialnet, juliandict[ids[i]], juliandict[ids[j]]) end end njulians = nv(socialnet) nconnections = ne(socialnet) info("$njulians Julians and $nconnections connections") # HTTP requests on https://api.github.com juliansinfo = [] @showprogress 1 "Retrieving Julian info..." for julian in julians resp = HTTP.get("https://api.github.com/users/$julian?access_token=$mytoken") htmlbody = identity(String(resp.body)) push!(juliansinfo, JSON.Parser.parse(htmlbody)) end locnames = [] latitudes = [] longitudes = [] countries = [] @showprogress 1 "Geocoding Julian address..." for julian in juliansinfo address = julian["location"] if address ≠ nothing address = replace(address, "–", "") address = replace(address, " ", "+") resp = HTTP.get("http://maps.google.com/maps/api/geocode/json?address=$address") htmlbody = identity(String(resp.body)) results = JSON.Parser.parse(htmlbody)["results"] if length(results) > 0 geoinfo = results[1] locname = geoinfo["formatted_address"] loccoords = geoinfo["geometry"]["location"] push!(locnames, locname) push!(latitudes, loccoords["lat"]) push!(longitudes, loccoords["lng"]) for comp in geoinfo["address_components"] if "country" ∈ comp["types"] push!(countries, comp["long_name"]) end end else push!(locnames, nothing) push!(latitudes, nothing) push!(longitudes, nothing) push!(countries, nothing) end else push!(locnames, nothing) push!(latitudes, nothing) push!(longitudes, nothing) push!(countries, nothing) end end # construct JSON usernodes = [Dict("id"=>julian["login"], "name"=>julian["name"], "avatar_url"=>julian["avatar_url"], "contributions"=>contribdict[julian["login"]], "location"=>locnames[i], "latitude"=>latitudes[i], "longitude"=>longitudes[i], "country"=>countries[i]) for (i,julian) in enumerate(juliansinfo)] userlinks = [Dict("source"=>julians[src(e)], "target"=>julians[dst(e)]) for e in edges(socialnet)] userdata = Dict("nodes"=>usernodes, "links"=>userlinks) # write to file open("Julians.json", "w") do f JSON.print(f, userdata, 2) end