Step 1: import the packages

In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.version as ver
from biogeme.expressions import Beta

Check the version of Biogeme

In [2]:
ver.getVersion()
Out[2]:
'3.2.8'

Step 2: prepare the data

In [3]:
df = pd.read_csv('swissmetro.dat', '\t')
df
/Users/michelbierlaire/opt/anaconda3/envs/python39/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3441: FutureWarning: In a future version of pandas all arguments of read_csv except for the argument 'filepath_or_buffer' will be keyword-only
  exec(code_obj, self.user_global_ns, self.user_ns)
Out[3]:
GROUP SURVEY SP ID PURPOSE FIRST TICKET WHO LUGGAGE AGE ... TRAIN_TT TRAIN_CO TRAIN_HE SM_TT SM_CO SM_HE SM_SEATS CAR_TT CAR_CO CHOICE
0 2 0 1 1 1 0 1 1 0 3 ... 112 48 120 63 52 20 0 117 65 2
1 2 0 1 1 1 0 1 1 0 3 ... 103 48 30 60 49 10 0 117 84 2
2 2 0 1 1 1 0 1 1 0 3 ... 130 48 60 67 58 30 0 117 52 2
3 2 0 1 1 1 0 1 1 0 3 ... 103 40 30 63 52 20 0 72 52 2
4 2 0 1 1 1 0 1 1 0 3 ... 130 36 60 63 42 20 0 90 84 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
10723 3 1 1 1192 4 1 7 1 0 5 ... 148 13 30 93 17 30 0 156 56 2
10724 3 1 1 1192 4 1 7 1 0 5 ... 148 12 30 96 16 10 0 96 70 3
10725 3 1 1 1192 4 1 7 1 0 5 ... 148 16 60 93 16 20 0 96 56 3
10726 3 1 1 1192 4 1 7 1 0 5 ... 178 16 30 96 17 30 0 96 91 2
10727 3 1 1 1192 4 1 7 1 0 5 ... 148 13 60 96 21 30 0 120 70 3

10728 rows × 28 columns

In [4]:
df.describe()
Out[4]:
GROUP SURVEY SP ID PURPOSE FIRST TICKET WHO LUGGAGE AGE ... TRAIN_TT TRAIN_CO TRAIN_HE SM_TT SM_CO SM_HE SM_SEATS CAR_TT CAR_CO CHOICE
count 10728.000000 10728.000000 10728.0 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 ... 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000 10728.000000
mean 2.630034 0.630034 1.0 596.500000 2.914430 0.470638 2.888423 1.493289 0.678691 2.898490 ... 166.626025 514.335477 70.100671 87.466350 670.340697 20.020507 0.118568 123.795209 78.742077 2.152778
std 0.482818 0.482818 0.0 344.116678 1.147443 0.499160 2.191100 0.708293 0.603388 1.031726 ... 77.353284 1088.931881 37.431633 53.550371 1441.594614 8.161895 0.323295 88.710743 55.263663 0.632293
min 2.000000 0.000000 1.0 1.000000 1.000000 0.000000 1.000000 0.000000 0.000000 1.000000 ... 31.000000 4.000000 30.000000 8.000000 6.000000 10.000000 0.000000 0.000000 0.000000 0.000000
25% 2.000000 0.000000 1.0 298.750000 2.000000 0.000000 1.000000 1.000000 0.000000 2.000000 ... 109.000000 58.000000 30.000000 55.000000 70.000000 10.000000 0.000000 70.000000 40.000000 2.000000
50% 3.000000 1.000000 1.0 596.500000 3.000000 0.000000 3.000000 1.000000 1.000000 3.000000 ... 157.000000 94.000000 60.000000 78.000000 111.000000 20.000000 0.000000 120.000000 76.000000 2.000000
75% 3.000000 1.000000 1.0 894.250000 3.250000 1.000000 3.000000 2.000000 1.000000 4.000000 ... 209.000000 170.000000 120.000000 109.000000 209.000000 30.000000 0.000000 176.000000 112.000000 3.000000
max 3.000000 1.000000 1.0 1192.000000 9.000000 1.000000 10.000000 3.000000 3.000000 6.000000 ... 1049.000000 5040.000000 120.000000 796.000000 6720.000000 30.000000 1.000000 1560.000000 520.000000 3.000000

8 rows × 28 columns

In [5]:
database = db.Database("swissmetro",df)

Define the name of the variables as Python variables

In [6]:
globals().update(database.variables)

Remove some observations

In [7]:
database.getSampleSize()
Out[7]:
10728
In [8]:
exclude = ((PURPOSE != 1) *
           (PURPOSE != 3) +
           (CHOICE == 0)) > 0
database.remove(exclude)
In [9]:
database.getSampleSize()
Out[9]:
6768

Model specification

Parameters to be estimated

In [10]:
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 1)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)

Definition of new variables

In [11]:
SM_COST = SM_CO * (GA == 0)
TRAIN_COST = TRAIN_CO * (GA == 0)
CAR_AV_SP = CAR_AV * (SP != 0)
TRAIN_AV_SP = TRAIN_AV * (SP != 0)
TRAIN_TT_SCALED = TRAIN_TT / 100
TRAIN_COST_SCALED = TRAIN_COST / 100
SM_TT_SCALED = SM_TT / 100
SM_COST_SCALED = SM_COST / 100
CAR_TT_SCALED = CAR_TT / 100
CAR_CO_SCALED = CAR_CO / 100

Specification of the utility functions

In [12]:
V1 = ASC_TRAIN + \
     B_TIME * TRAIN_TT_SCALED + \
     B_COST * TRAIN_COST_SCALED
V2 = ASC_SM + \
     B_TIME * SM_TT_SCALED + \
     B_COST * SM_COST_SCALED
V3 = ASC_CAR + \
     B_TIME * CAR_TT_SCALED + \
     B_COST * CAR_CO_SCALED

Associate the utility functions with the numbering of the alternatives

In [13]:
V = {1: V1,
     2: V2,
     3: V3}

Associate the availability conditions with the alternatives

In [14]:
av = {1: TRAIN_AV_SP,
      2: SM_AV,
      3: CAR_AV_SP}

The contribution to the log likelihood function is the logarithm of a logit model

In [15]:
logprob = models.loglogit(V, av, CHOICE)

Biogeme

In [16]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = '01logit'

Running the estimation

In [17]:
results = biogeme.estimate()

Read the results

In [18]:
pandasResults = results.getEstimatedParameters()
pandasResults
Out[18]:
Value Std err t-test p-value Rob. Std err Rob. t-test Rob. p-value
ASC_CAR -0.154633 0.043235 -3.576524 0.000348 0.058163 -2.658590 0.007847
ASC_TRAIN -0.701187 0.054874 -12.778150 0.000000 0.082562 -8.492857 0.000000
B_COST -1.083790 0.051830 -20.910405 0.000000 0.068225 -15.885521 0.000000
B_TIME -1.277859 0.056883 -22.464561 0.000000 0.104254 -12.257120 0.000000
In [19]:
print(results)
Results for model 01logit
Output file (HTML):			01logit~00.html
Nbr of parameters:		4
Sample size:			6768
Excluded data:			3960
Init log likelihood:		-5331.252
Final log likelihood:		-5331.252
Likelihood ratio test (init):		-0
Rho square (init):			0
Rho bar square (init):			-0.00075
Akaike Information Criterion:	10670.5
Bayesian Information Criterion:	10697.78
Final gradient norm:		0.0006288285
ASC_CAR        : -0.155[0.0432 -3.58 0.000348][0.0582 -2.66 0.00785]
ASC_TRAIN      : -0.701[0.0549 -12.8 0][0.0826 -8.49 0]
B_COST         : -1.08[0.0518 -20.9 0][0.0682 -15.9 0]
B_TIME         : -1.28[0.0569 -22.5 0][0.104 -12.3 0]
('ASC_TRAIN', 'ASC_CAR'):	0.00138	0.58	-11.9	0	0.0039	0.812	-11.2	0
('B_COST', 'ASC_CAR'):	0.000485	0.216	-15.5	0	2.86e-05	0.00722	-10.4	0
('B_COST', 'ASC_TRAIN'):	8.22e-06	0.00289	-5.08	3.85e-07	-0.000831	-0.147	-3.34	0.000842
('B_TIME', 'ASC_CAR'):	-0.00144	-0.585	-12.6	0	-0.00482	-0.796	-7.27	3.72e-13
('B_TIME', 'ASC_TRAIN'):	-0.00225	-0.722	-5.56	2.69e-08	-0.0076	-0.883	-3.18	0.00147
('B_TIME', 'B_COST'):	0.00055	0.187	-2.79	0.0052	0.0022	0.309	-1.84	0.0658