%pwd
'/home/cyrille/minibook/chapter1'
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip
%ls
facebook.zip [...]
!unzip facebook.zip
%ls
facebook facebook.zip [...]
%cd facebook
/home/cyrille/minibook/chapter1/facebook
%bookmark fbdata
%ls
0.circles 1684.circles 3437.circles 3980.circles 686.circles 0.edges 1684.edges 3437.edges 3980.edges 686.edges 107.circles 1912.circles 348.circles 414.circles 698.circles 107.edges 1912.edges 348.edges 414.edges 698.edges
files = !ls -1 -S | grep .edges
files
['1912.edges', '107.edges', '1684.edges', '3437.edges', '348.edges', '0.edges', '414.edges', '686.edges', '698.edges', '3980.edges']
import os
from operator import itemgetter
# Get the name and file size of all .edges files.
files = [(file, os.stat(file).st_size)
for file in os.listdir('.')
if file.endswith('.edges')]
# Sort the list with the second item (file size),
# in decreasing order.
files = sorted(files,
key=itemgetter(1),
reverse=True)
# Only keep the first item (file name), in the same order.
files = [file for (file, size) in files]
!head -n5 {files[0]}
2290 2363 2346 2025 2140 2428 2201 2506 2425 2557
%lsmagic
Available line magics: %alias %alias_magic %autocall %automagic %autosave %bookmark %cat %cd %clear %colors %config %connect_info %cp %debug %dhist %dirs %doctest_mode %ed %edit %env %gui %hist %history %install_default_config %install_ext %install_profiles %killbgscripts %ldir %less %lf %lk %ll %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %lx %macro %magic %man %matplotlib %mkdir %more %mv %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %popd %pprint %precision %profile %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %rep %rerun %reset %reset_selective %rm %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode Available cell magics: %%! %%HTML %%SVG %%bash %%capture %%debug %%file %%html %%javascript %%latex %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile Automagic is ON, % prefix IS NOT needed for line magics.
%history?
%history -l 5
files = !ls -1 -S | grep .edges files !head -n5 {files[0]} %lsmagic %history?
# how many minutes in a day?
24 * 60
1440
# and in a year?
_ * 365
525600
%%capture output
%ls
output.stdout
0.circles 1684.circles 3437.circles 3980.circles 686.circles 0.edges 1684.edges 3437.edges 3980.edges 686.edges 107.circles 1912.circles 348.circles 414.circles 698.circles 107.edges 1912.edges 348.edges 414.edges 698.edges
%%bash
cd ..
touch _HEY
ls
rm _HEY
cd facebook
_HEY facebook facebook.zip [...]
%%script ghci
putStrLn "Hello world!"
GHCi, version 7.6.3: http://www.haskell.org/ghc/ :? for help Loading package ghc-prim ... linking ... done. Loading package integer-gmp ... linking ... done. Loading package base ... linking ... done. Prelude> Hello world! Prelude> Leaving GHCi.
%%writefile myfile.txt
Hello world!
Writing myfile.txt
!more myfile.txt
Hello world!
!rm myfile.txt
%cd fbdata
%ls
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook /home/cyrille/minibook/chapter1/facebook 0.circles 1684.circles 3437.circles 3980.circles 686.circles 0.edges 1684.edges 3437.edges 3980.edges 686.edges 107.circles 1912.circles 348.circles 414.circles 698.circles 107.edges 1912.edges 348.edges 414.edges 698.edges
from IPython.display import YouTubeVideo
YouTubeVideo('j9YpkSX7NNM')
from ipywidgets import interact # IPython.html.widgets before IPython 4.0
@interact(x=(0, 10))
def square(x):
return("The square of %d is %d." % (x, x**2))
'The square of 7 is 49.'
%cd fbdata
%cd ..
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook /home/cyrille/minibook/chapter1/facebook
%%writefile egos.py
import sys
import os
# We retrieve the folder as the first positional argument
# to the command-line call
if len(sys.argv) > 1:
folder = sys.argv[1]
# We list all files in the specified folder
files = os.listdir(folder)
# ids contains the list of idenfitiers
identifiers = [int(file.split('.')[0]) for file in files]
# Finally, we remove duplicates with set(), and sort the list
# with sorted().
ids = sorted(set(identifiers))
Overwriting egos.py
%run egos.py facebook
ids
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]
folder = 'facebook'
%run egos.py
%run -i egos.py
ids
[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]
import networkx
networkx.Graph?
%cd fbdata
(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook /home/cyrille/minibook/chapter1/facebook
import networkx
graph = networkx.read_edgelist('107.edges')
len(graph.nodes()), len(graph.edges())
(1034, 26749)
networkx.is_connected(graph)
True
%timeit networkx.is_connected(graph)
100 loops, best of 3: 5.92 ms per loop
import networkx
def ncomponents(file):
graph = networkx.read_edgelist(file)
return networkx.number_connected_components(graph)
import glob
def ncomponents_files():
return [(file, ncomponents(file))
for file in sorted(glob.glob('*.edges'))]
for file, n in ncomponents_files():
print(file.ljust(12), n, 'component(s)')
0.edges 5 component(s) 107.edges 1 component(s) 1684.edges 4 component(s) 1912.edges 2 component(s) 3437.edges 2 component(s) 348.edges 1 component(s) 3980.edges 4 component(s) 414.edges 2 component(s) 686.edges 1 component(s) 698.edges 3 component(s)
%timeit ncomponents_files()
1 loops, best of 3: 634 ms per loop
%prun -s cumtime ncomponents_files()
2391070 function calls in 1.038 seconds Ordered by: cumulative time ncalls tottime percall cumtime percall filename:lineno(function) 1 0.000 0.000 1.038 1.038 {built-in method exec} 1 0.000 0.000 1.038 1.038 <string>:1(<module>) 10 0.000 0.000 0.995 0.100 <string>:1(read_edgelist) 10 0.000 0.000 0.995 0.100 decorators.py:155(_open_file) 10 0.376 0.038 0.995 0.099 edgelist.py:174(parse_edgelist) 170174 0.279 0.000 0.350 0.000 graph.py:648(add_edge) 170184 0.059 0.000 0.095 0.000 edgelist.py:366(<genexpr>) 10 0.000 0.000 0.021 0.002 connected.py:98(number_connected_components) 35 0.001 0.000 0.021 0.001 connected.py:22(connected_components)