import numpy as np
import pandas as pd
noobs = 100
pure = np.array([True if np.random.random() < 0.5 else False for i in range(noobs)])
pure
array([ True, True, True, False, False, True, False, True, True, False, False, True, False, False, True, True, True, False, False, False, False, False, True, False, False, False, False, True, False, False, True, False, False, True, True, False, True, False, True, True, False, False, True, True, False, True, False, False, True, True, False, False, False, False, False, True, True, True, False, True, False, True, False, False, False, True, True, False, True, True, True, False, False, False, False, True, False, False, True, True, True, True, False, True, True, False, True, False, True, True, False, False, True, True, False, False, False, False, False, True])
age = np.around(np.random.normal(4, 2, noobs)).astype(np.int)
age[age <= 0] = 0
age
array([3, 7, 6, 3, 3, 5, 8, 3, 4, 5, 5, 6, 3, 3, 1, 4, 3, 8, 7, 2, 5, 3, 6, 5, 3, 4, 4, 8, 2, 5, 4, 2, 9, 2, 3, 2, 2, 3, 1, 4, 3, 0, 8, 8, 4, 7, 3, 3, 5, 5, 3, 2, 2, 6, 6, 6, 5, 2, 8, 4, 5, 5, 4, 5, 7, 4, 0, 5, 1, 2, 3, 5, 2, 0, 3, 0, 2, 3, 6, 4, 6, 6, 3, 4, 5, 6, 4, 2, 3, 6, 7, 6, 1, 6, 4, 4, 4, 6, 3, 7])
weight = np.random.normal(15, 5, noobs)
weight = np.absolute(weight)
weight = np.around(weight, 2)
weight
array([ 7.94, 5.05, 16.15, 15.5 , 12.17, 14.11, 9.51, 15.6 , 12.36, 19.18, 15.98, 4.85, 19.52, 11.21, 14.08, 14.96, 6.84, 15.76, 17.78, 21.49, 16.26, 17.82, 7.9 , 18.26, 18.56, 11.95, 11.09, 15.57, 6.97, 19.78, 18.58, 19.68, 16.92, 3.19, 21.78, 13.49, 7.29, 11.74, 17.27, 13.89, 15.38, 12.44, 17.66, 12.55, 11.32, 15.95, 10.89, 22.06, 8.11, 12.89, 14.57, 15.37, 20.71, 16.72, 20.06, 16.1 , 16.87, 9.41, 19.5 , 16.18, 18.06, 15.5 , 14.61, 4.82, 10.92, 9.79, 21.48, 16.14, 15.26, 16.36, 15.49, 8.84, 13.85, 17.1 , 16.12, 3.67, 14.98, 8.28, 16.25, 21.81, 21.88, 14.21, 5.05, 19.55, 6.6 , 13.39, 20.9 , 8.02, 19.38, 16.52, 9.02, 13.38, 21.5 , 10.55, 11. , 20.76, 17.4 , 15.6 , 8.37, 9.09])
height = np.random.normal(50, 10, noobs)
height = np.absolute(height)
height = np.around(height, 1)
height
array([49.6, 63.5, 68.2, 64.7, 59.7, 26.9, 49.2, 49.9, 48.1, 54.2, 57.2, 56.7, 43. , 57.2, 47.7, 59.3, 42.5, 46.8, 39.8, 59.5, 42.8, 45.1, 44.1, 50.4, 61. , 54.9, 46.1, 48. , 40.1, 49. , 48.2, 50.3, 39.9, 47.9, 41.6, 41. , 55.8, 45.2, 55.2, 47.1, 47.6, 55.9, 37.2, 58.6, 48.3, 56. , 47.6, 57. , 34.4, 36.5, 68.4, 39.1, 45.8, 48.3, 61.3, 53.4, 51.3, 48.6, 51.9, 36.9, 41.9, 66.8, 55.5, 44.9, 43.2, 52.3, 17.2, 55.8, 63.5, 55.2, 41.9, 53.4, 36. , 60.3, 28.4, 46.9, 46.7, 41.3, 44.6, 52. , 43. , 63.4, 55.7, 41. , 48.1, 65.1, 52. , 48.2, 63.5, 51.5, 58.3, 63.3, 55.7, 44.6, 43.7, 52.2, 47.5, 51.4, 54.1, 50.3])
lifespan = np.around(np.random.normal(5, 0.1, noobs) + ((~pure).astype(np.int) * 1.2) + (np.absolute(40.0 - weight)**2 / 100.0)).astype(np.int)
lifespan
array([15, 17, 11, 12, 14, 12, 15, 11, 13, 10, 12, 17, 10, 14, 12, 11, 16, 12, 11, 10, 12, 11, 15, 11, 11, 14, 15, 11, 17, 10, 10, 11, 11, 19, 8, 13, 16, 14, 10, 12, 12, 14, 10, 13, 14, 11, 15, 10, 15, 12, 13, 12, 10, 11, 10, 11, 10, 14, 10, 11, 11, 11, 13, 19, 14, 14, 8, 12, 11, 11, 11, 16, 13, 12, 12, 18, 12, 16, 11, 8, 8, 12, 18, 9, 16, 13, 9, 16, 9, 11, 16, 13, 8, 14, 15, 10, 11, 12, 16, 14])
df = pd.DataFrame({'pure': pure, 'age': age, 'weight': weight, 'height': height, 'lifespan': lifespan})
df
pure | age | weight | height | lifespan | |
---|---|---|---|---|---|
0 | True | 3 | 7.94 | 49.6 | 15 |
1 | True | 7 | 5.05 | 63.5 | 17 |
2 | True | 6 | 16.15 | 68.2 | 11 |
3 | False | 3 | 15.50 | 64.7 | 12 |
4 | False | 3 | 12.17 | 59.7 | 14 |
... | ... | ... | ... | ... | ... |
95 | False | 4 | 20.76 | 52.2 | 10 |
96 | False | 4 | 17.40 | 47.5 | 11 |
97 | False | 6 | 15.60 | 51.4 | 12 |
98 | False | 3 | 8.37 | 54.1 | 16 |
99 | True | 7 | 9.09 | 50.3 | 14 |
100 rows × 5 columns
df.to_csv("data/dogs.csv")