!conda install -c conda-forge msprime
Collecting package metadata (current_repodata.json): done Solving environment: done # All requested packages already installed.
import msprime
from IPython.display import SVG, display
import matplotlib.pyplot as plt
t = msprime.sim_ancestry(samples=2, population_size=100, sequence_length = 1e4, random_seed=1234)
SVG(t.draw_svg(y_axis=True))
tree = t.first()
SVG(t.draw_svg(y_axis=True))
tree.root
6
tree.sample_size
4
tree.time(5)
120.61070914045273
tree.parent(5)
6
tree.parent(6)
-1
tree.children(5)
(0, 3)
tree.children(3)
()
mt = msprime.sim_mutations(t, rate=2e-6, random_seed=5678)
SVG(mt.draw_svg(y_axis=True))
SVG(mt.draw_svg(y_axis=True))
simplified_mt = mt.simplify(samples=[0, 1])
SVG(simplified_mt.draw_svg(y_axis=True))
mt.genotype_matrix()
array([[1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0], [1, 0, 0, 1], [0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0], [0, 1, 1, 0], [1, 0, 0, 1], [0, 0, 0, 1], [1, 0, 0, 1], [0, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]], dtype=int8)
for v in mt.variants():
print(v)
Variant(site=Site(id=0, position=212.0, ancestral_state='G', mutations=[Mutation(id=0, site=0, node=5, derived_state='T', parent=-1, metadata=b'', time=162.73164164891477)], metadata=b''), alleles=('G', 'T'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=1, position=582.0, ancestral_state='A', mutations=[Mutation(id=1, site=1, node=5, derived_state='G', parent=-1, metadata=b'', time=273.4289356037654)], metadata=b''), alleles=('A', 'G'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=2, position=1526.0, ancestral_state='C', mutations=[Mutation(id=2, site=2, node=4, derived_state='A', parent=-1, metadata=b'', time=330.7743973658487)], metadata=b''), alleles=('C', 'A'), genotypes=array([0, 1, 1, 0], dtype=int8)) Variant(site=Site(id=3, position=1624.0, ancestral_state='C', mutations=[Mutation(id=3, site=3, node=5, derived_state='G', parent=-1, metadata=b'', time=215.69834625510288)], metadata=b''), alleles=('C', 'G'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=4, position=2815.0, ancestral_state='A', mutations=[Mutation(id=4, site=4, node=4, derived_state='C', parent=-1, metadata=b'', time=317.3471232076626)], metadata=b''), alleles=('A', 'C'), genotypes=array([0, 1, 1, 0], dtype=int8)) Variant(site=Site(id=5, position=4053.0, ancestral_state='A', mutations=[Mutation(id=5, site=5, node=5, derived_state='T', parent=-1, metadata=b'', time=395.92398096637646)], metadata=b''), alleles=('A', 'T'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=6, position=4187.0, ancestral_state='C', mutations=[Mutation(id=6, site=6, node=5, derived_state='G', parent=-1, metadata=b'', time=356.3635251689442)], metadata=b''), alleles=('C', 'G'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=7, position=4447.0, ancestral_state='T', mutations=[Mutation(id=7, site=7, node=3, derived_state='A', parent=-1, metadata=b'', time=72.15234218743394)], metadata=b''), alleles=('T', 'A'), genotypes=array([0, 0, 0, 1], dtype=int8)) Variant(site=Site(id=8, position=4538.0, ancestral_state='C', mutations=[Mutation(id=8, site=8, node=5, derived_state='A', parent=-1, metadata=b'', time=314.69819193176147)], metadata=b''), alleles=('C', 'A'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=9, position=4796.0, ancestral_state='C', mutations=[Mutation(id=9, site=9, node=4, derived_state='G', parent=-1, metadata=b'', time=275.6542439484776)], metadata=b''), alleles=('C', 'G'), genotypes=array([0, 1, 1, 0], dtype=int8)) Variant(site=Site(id=10, position=7667.0, ancestral_state='A', mutations=[Mutation(id=10, site=10, node=4, derived_state='C', parent=-1, metadata=b'', time=89.62351434550857)], metadata=b''), alleles=('A', 'C'), genotypes=array([0, 1, 1, 0], dtype=int8)) Variant(site=Site(id=11, position=8193.0, ancestral_state='G', mutations=[Mutation(id=11, site=11, node=5, derived_state='C', parent=-1, metadata=b'', time=417.3071787381794)], metadata=b''), alleles=('G', 'C'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=12, position=8413.0, ancestral_state='C', mutations=[Mutation(id=12, site=12, node=3, derived_state='G', parent=-1, metadata=b'', time=51.93671895636832)], metadata=b''), alleles=('C', 'G'), genotypes=array([0, 0, 0, 1], dtype=int8)) Variant(site=Site(id=13, position=8720.0, ancestral_state='G', mutations=[Mutation(id=13, site=13, node=5, derived_state='C', parent=-1, metadata=b'', time=126.33347761384506)], metadata=b''), alleles=('G', 'C'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=14, position=9025.0, ancestral_state='A', mutations=[Mutation(id=14, site=14, node=3, derived_state='T', parent=-1, metadata=b'', time=21.54517968015363)], metadata=b''), alleles=('A', 'T'), genotypes=array([0, 0, 0, 1], dtype=int8)) Variant(site=Site(id=15, position=9560.0, ancestral_state='C', mutations=[Mutation(id=15, site=15, node=5, derived_state='A', parent=-1, metadata=b'', time=423.31534386644086)], metadata=b''), alleles=('C', 'A'), genotypes=array([1, 0, 0, 1], dtype=int8)) Variant(site=Site(id=16, position=9688.0, ancestral_state='G', mutations=[Mutation(id=16, site=16, node=4, derived_state='C', parent=-1, metadata=b'', time=44.08689039854816)], metadata=b''), alleles=('G', 'C'), genotypes=array([0, 1, 1, 0], dtype=int8))
mt.write_vcf(output = "mt.vcf")
mt.write_fasta(output = "mt.fa")
ts = msprime.sim_ancestry(samples=2, population_size=100, sequence_length = 1e4, recombination_rate=1e-6, random_seed = 1234)
SVG(ts.draw_svg(y_axis=False))
mts = msprime.sim_mutations(ts, rate=2e-6, random_seed=5678)
SVG(mts.draw_svg(y_axis=False))
for tree in mts.trees():
print(tree.interval, tree.span)
Interval(left=0.0, right=3829.0) 3829.0 Interval(left=3829.0, right=4884.0) 1055.0 Interval(left=4884.0, right=5107.0) 223.0 Interval(left=5107.0, right=10000.0) 4893.0
tree = mts.at(3000)
SVG(tree.draw_svg(y_axis = False))
tree = mts.at_index(3)
SVG(tree.draw_svg(y_axis = False))
SVG(mts.draw_svg(y_axis=True))
simplified_mts = mts.simplify([0, 1])
SVG(simplified_mts.draw_svg(y_axis=True))
rate_map = msprime.RateMap(position=[0, 5000, 10000], rate=[0, 2e-6])
rate_map
left | right | mid | span | rate |
---|---|---|---|---|
0 | 5000 | 2500 | 5000 | 0.00 |
5000 | 10000 | 7500 | 5000 | 0.00 |
ts = msprime.sim_ancestry(samples=2, population_size=100, sequence_length = 1e4, recombination_rate=rate_map, random_seed = 1234)
SVG(ts.draw_svg(y_axis=False))
model = msprime.JC69()
mts = msprime.sim_mutations(ts, rate = 2e-6, model = model, random_seed = 5678)
for v in mts.variants():
print(v)
Variant(site=Site(id=0, position=714.0, ancestral_state='A', mutations=[Mutation(id=0, site=0, node=3, derived_state='T', parent=-1, metadata=b'', time=68.25016156236117)], metadata=b''), alleles=('A', 'T'), genotypes=array([0, 0, 0, 1], dtype=int8)) Variant(site=Site(id=1, position=1786.0, ancestral_state='T', mutations=[Mutation(id=1, site=1, node=2, derived_state='A', parent=-1, metadata=b'', time=79.27543024570147)], metadata=b''), alleles=('T', 'A'), genotypes=array([0, 0, 1, 0], dtype=int8)) Variant(site=Site(id=2, position=1946.0, ancestral_state='A', mutations=[Mutation(id=2, site=2, node=6, derived_state='C', parent=-1, metadata=b'', time=122.10487666342334)], metadata=b''), alleles=('A', 'C'), genotypes=array([0, 0, 1, 1], dtype=int8)) Variant(site=Site(id=3, position=4306.0, ancestral_state='C', mutations=[Mutation(id=3, site=3, node=2, derived_state='G', parent=-1, metadata=b'', time=102.56540921302496)], metadata=b''), alleles=('C', 'G'), genotypes=array([0, 0, 1, 0], dtype=int8)) Variant(site=Site(id=4, position=5982.0, ancestral_state='T', mutations=[Mutation(id=4, site=4, node=2, derived_state='C', parent=-1, metadata=b'', time=95.61316426426593)], metadata=b''), alleles=('T', 'C'), genotypes=array([0, 0, 1, 0], dtype=int8)) Variant(site=Site(id=5, position=7926.0, ancestral_state='T', mutations=[Mutation(id=5, site=5, node=1, derived_state='G', parent=-1, metadata=b'', time=126.08816531269409)], metadata=b''), alleles=('T', 'G'), genotypes=array([0, 1, 0, 0], dtype=int8)) Variant(site=Site(id=6, position=9206.0, ancestral_state='A', mutations=[Mutation(id=6, site=6, node=0, derived_state='C', parent=-1, metadata=b'', time=183.60210513371226)], metadata=b''), alleles=('A', 'C'), genotypes=array([1, 0, 0, 0], dtype=int8)) Variant(site=Site(id=7, position=9971.0, ancestral_state='C', mutations=[Mutation(id=7, site=7, node=0, derived_state='G', parent=-1, metadata=b'', time=97.05225292158964)], metadata=b''), alleles=('C', 'G'), genotypes=array([1, 0, 0, 0], dtype=int8))
model = msprime.BinaryMutationModel()
mts = msprime.sim_mutations(ts, rate = 2e-6, model = model, random_seed = 5678)
for v in mts.variants():
print(v)
Variant(site=Site(id=0, position=714.0, ancestral_state='0', mutations=[Mutation(id=0, site=0, node=3, derived_state='1', parent=-1, metadata=b'', time=68.25016156236117)], metadata=b''), alleles=('0', '1'), genotypes=array([0, 0, 0, 1], dtype=int8)) Variant(site=Site(id=1, position=1786.0, ancestral_state='0', mutations=[Mutation(id=1, site=1, node=2, derived_state='1', parent=-1, metadata=b'', time=79.27543024570147)], metadata=b''), alleles=('0', '1'), genotypes=array([0, 0, 1, 0], dtype=int8)) Variant(site=Site(id=2, position=1946.0, ancestral_state='0', mutations=[Mutation(id=2, site=2, node=6, derived_state='1', parent=-1, metadata=b'', time=122.10487666342334)], metadata=b''), alleles=('0', '1'), genotypes=array([0, 0, 1, 1], dtype=int8)) Variant(site=Site(id=3, position=4306.0, ancestral_state='0', mutations=[Mutation(id=3, site=3, node=2, derived_state='1', parent=-1, metadata=b'', time=102.56540921302496)], metadata=b''), alleles=('0', '1'), genotypes=array([0, 0, 1, 0], dtype=int8)) Variant(site=Site(id=4, position=5982.0, ancestral_state='0', mutations=[Mutation(id=4, site=4, node=2, derived_state='1', parent=-1, metadata=b'', time=95.61316426426593)], metadata=b''), alleles=('0', '1'), genotypes=array([0, 0, 1, 0], dtype=int8)) Variant(site=Site(id=5, position=7926.0, ancestral_state='0', mutations=[Mutation(id=5, site=5, node=1, derived_state='1', parent=-1, metadata=b'', time=126.08816531269409)], metadata=b''), alleles=('0', '1'), genotypes=array([0, 1, 0, 0], dtype=int8)) Variant(site=Site(id=6, position=9206.0, ancestral_state='0', mutations=[Mutation(id=6, site=6, node=0, derived_state='1', parent=-1, metadata=b'', time=183.60210513371226)], metadata=b''), alleles=('0', '1'), genotypes=array([1, 0, 0, 0], dtype=int8)) Variant(site=Site(id=7, position=9971.0, ancestral_state='0', mutations=[Mutation(id=7, site=7, node=0, derived_state='1', parent=-1, metadata=b'', time=97.05225292158964)], metadata=b''), alleles=('0', '1'), genotypes=array([1, 0, 0, 0], dtype=int8))
demography = msprime.Demography()
demography.add_population(name="A", initial_size=1000)
demography.add_population(name="B", initial_size=1000)
demography.add_population(name="C", initial_size=1000)
demography.add_population_split(time=20000, derived=["A", "B"], ancestral="C")
ts = msprime.sim_ancestry(samples={"A": 2, "B": 2}, demography=demography, random_seed=1234)
demography
id | name | description | initial_size | growth_rate | default_sampling_time | extra_metadata |
---|---|---|---|---|---|---|
0 | A | 1000.0 | 0 | 0 | {} | |
1 | B | 1000.0 | 0 | 0 | {} | |
2 | C | 1000.0 | 0 | 2e+04 | {} |
time | type | parameters | effect |
---|---|---|---|
2e+04 | Population Split | derived=[A, B], ancestral=C | Moves all lineages from derived populations 'A' and 'B' to the ancestral 'C' population. Also set the derived populations to inactive, and all migration rates to and from the derived populations to zero. |
SVG(ts.draw_svg(y_axis=True))
demography = msprime.Demography()
demography.add_population(name="A", initial_size=1000, growth_rate=0.01)
Population(initial_size=1000, growth_rate=0.01, name='A', description='', extra_metadata={}, default_sampling_time=None, initially_active=None, id=0)
demography
id | name | description | initial_size | growth_rate | default_sampling_time | extra_metadata |
---|---|---|---|---|---|---|
0 | A | 1000.0 | 0.01 | 0 | {} |
demography = msprime.Demography()
demography.add_population(name="A", initial_size=100)
demography.add_population(name="B", initial_size=100)
demography.add_population(name="ADMIX", initial_size=100)
demography.add_population(name="ANC", initial_size=100)
demography.add_admixture(
time=10, derived="ADMIX", ancestral=["A", "B"], proportions=[0.25, 0.75])
demography.add_population_split(time=20, derived=["A", "B"], ancestral="ANC")
PopulationSplit(time=20, derived=['A', 'B'], ancestral='ANC')
demography
id | name | description | initial_size | growth_rate | default_sampling_time | extra_metadata |
---|---|---|---|---|---|---|
0 | A | 100.0 | 0 | 0 | {} | |
1 | B | 100.0 | 0 | 0 | {} | |
2 | ADMIX | 100.0 | 0 | 0 | {} | |
3 | ANC | 100.0 | 0 | 20 | {} |
time | type | parameters | effect |
---|---|---|---|
10 | Admixture | derived=ADMIX ancestral=[A, B] proportions=[0.25, 0.75] | Moves all lineages from admixed population 'ADMIX' to ancestral populations. Lineages move to 'A' with proba 0.25; 'B' with proba 0.75. Set 'ADMIX' to inactive, and all migration rates to and from 'ADMIX' to zero. |
20 | Population Split | derived=[A, B], ancestral=ANC | Moves all lineages from derived populations 'A' and 'B' to the ancestral 'ANC' population. Also set the derived populations to inactive, and all migration rates to and from the derived populations to zero. |
demography = msprime.Demography.isolated_model([100, 100])
demography.set_migration_rate(source=0, dest=1, rate=0.1)
demography
id | name | description | initial_size | growth_rate | default_sampling_time | extra_metadata |
---|---|---|---|---|---|---|
0 | pop_0 | 100.0 | 0 | 0 | {} | |
1 | pop_1 | 100.0 | 0 | 0 | {} |
pop_0 | pop_1 | |
---|---|---|
pop_0 | 0 | 0.1 |
pop_1 | 0 | 0 |