#!/usr/bin/env python # coding: utf-8 # In[1]: import ipyparallel as ipp rc = ipp.Client() engines = rc[:] # Approximate π with random $x,y$ samples on a unit square: # # # $$ # x \in [0, 1],\\ # y \in [0, 1] # $$ # # where the probability that any given point $x,y$ is inside the unit circle is # the relative area of the circle within the square: # # $$ # P(x^2 + y^2 \leq 1) = \frac{\pi}{4} # $$ # # so we can approximate π by collecting random samples on the unit square and counting how many are inside the unit circle: # # $$ # \pi \approx \frac{4}{N} \sum_{i=1}^N \left( x_i^2 + y_i^2 \right) \leq 1 # $$ # In[2]: def mc_pi(n): """Monte Carlo approximation of π Throw darts uniformly distributed on a square, count how many land in circumscribed circle. Fraction inside circle approaches π / 4 """ import random samples = [] for i in range(n): x = random.random() y = random.random() in_circle = (x * x) + (y * y) <= 1 samples.append(in_circle) return 4 * sum(samples) / n # Run our tests for a series of sample sizes in serial and parallel, # comparing the times of each to see how the parallel performance # improves over serial. # In[3]: n_engines = len(engines) print(f"Using {n_engines} processes") for n_samples in [100, 1000, 10_000, 100_000, 1_000_000]: if n_samples % len(engines): n_samples += len(engines) samples_per_engine = n_samples // n_engines print(f"\nMonte Carlo sampling of π: {n_samples} samples ({samples_per_engine} per engine)") print("serial:") tr_serial = get_ipython().run_line_magic('timeit', '-o mc_pi(n_samples)') print("parallel:") tr_parallel = get_ipython().run_line_magic('timeit', '-o sum(engines.apply(mc_pi, samples_per_engine)) / len(engines)') print(f"speedup: {tr_serial.average / tr_parallel.average:.2f}x") # We see that in this example with four local engines that the parallel implementation # is slower up to about 100,000 samples, where the serial case takes about 30 milliseconds. # # The overhead of scheduling and waiting for a single parallel task # means that parallelism on tasks quicker than several milliseconds will overwhelm the benefit of the the concurrent workload. # # Tasks must take long enough for the overhead of managing parallel and distributed tasks to be worth it. # In the case of IPython Parallel, that's on the order of 10s of milliseconds per task. # If there are many tasks that can be queued concurrently, the overhead can be pipelined and shared, # reducing the effective per-task overhead.