Intro¶

This notebook demonstrates an example of how to use the Primavera P6 xer files reader library for data preparation.

The XER file contains the same data presented in the notebook which is shown in the "Optimizing Project Planning and Material transportation" post.

By obtaining the file via URL link, the required data—task names, durations, precedences, and release dates (intervals between each house's earliest starting date)—is transformed into Python objects.

Installing xerparser¶

In [ ]:

import importlib

# Check if xerparser is already installed
try:
    importlib.import_module('xerparser')
    print("xerparser is already installed.")
except ImportError:
    try:
        # Attempt installation
        %pip install xerparser
    except Exception as e:
        print("An error occurred while installing xerparser:", e)

xerparser is already installed.

Getting the XER file¶

In [ ]:

import requests

url = "https://raw.githubusercontent.com/inigmat/exupery/main/files/schedule/MDL4D.xer"

try:
    response = requests.get(url)
    if response.status_code == 200:
        print("Request successful!")
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

Request successful!

Read the downloaded file with checking for errors¶

In [ ]:

from xerparser import Xer, CorruptXerFile

try:
    xer = Xer(response.text)
except CorruptXerFile as e:
    for error in e.errors:
        print(error)

Getting the project data¶

In [ ]:

project = list(xer.projects.values())[0]

We have to get the following data

NbWorkers = 3
NbHouses  = 5

TaskNames = ("masonry","carpentry","plumbing",
             "ceiling","roofing","painting",
             "windows","facade","garden","moving")

Duration =  [35, 15, 40, 15, 5, 10, 5, 10, 5, 5]

ReleaseDate = [31, 0, 90, 120, 90]

Precedences = [("masonry", "carpentry"), ("masonry", "plumbing"), ("masonry", "ceiling"),
               ("carpentry", "roofing"), ("ceiling", "painting"), ("roofing", "windows"),
               ("roofing", "facade"), ("plumbing", "facade"), ("roofing", "garden"),
               ("plumbing", "garden"), ("windows", "moving"), ("facade", "moving"),
               ("garden", "moving"), ("painting", "moving")]

Start with getting the WBS (house numbers)¶

We skip the first two levels using WBS_LVL variable

In [ ]:

WBS_LVL = 2
houses = [obj.name for obj in project.wbs_nodes[WBS_LVL:]]
NbHouses= len(houses)
houses

Out[ ]:

['House 1', 'House 2', 'House 3', 'House 4', 'House 5']

Obtain the labor resources of the project to get the number of workers¶

In [ ]:

workers = {}
for res in project.resources:
  if res.rsrc_type == "RT_Labor":
    workers[res.rsrc_id] = res.resource.name
NbWorkers = len(workers)

Prepare pandas dataframe to check out the tasks data such as 'TASK ID','Name','Type', 'WBS ID', 'Duration', 'Successors (ID, Link, Lag)'¶

In [ ]:

import pandas as pd

tasks_df = pd.DataFrame(columns=['TASK ID','Name','Type', 'WBS ID', 'Duration', 'Successors (ID, Link, Lag)'])

for task in project.tasks:
    task_sucs = []
    for pred_link in task.successors:
        task_suc = pred_link.task.uid
        task_link = pred_link.link
        task_lag = pred_link.lag
        task_sucs.append((task_suc, task_link, task_lag))

    tasks_df = pd.concat([tasks_df, pd.DataFrame(
        {'TASK ID': [task.uid],
         'Name': [task.name],
         'Type': [task.type],
         'WBS ID': [task.wbs_id],
         'Duration': [task.duration],
         'Successors (ID, Link, Lag)': [task_sucs],
         })], ignore_index=True)

In [ ]:

tasks_df.head(15)

Out[ ]:

	TASK ID	Name	Type	WBS ID	Duration	Successors (ID, Link, Lag)
0	104836	House 1 Start	TaskType.TT_Mile	26152	0	[]
1	104837	Masonry	TaskType.TT_Task	26154	35	[(104838, FS, 0), (104839, FS, 0), (104840, FS...
2	104838	Carpentry	TaskType.TT_Task	26154	15	[(104841, FS, 0)]
3	104839	Plumbing	TaskType.TT_Task	26154	40	[(104844, FS, 0), (104845, FS, 0)]
4	104840	Ceiling	TaskType.TT_Task	26154	15	[(104842, FS, 0)]
5	104841	Roofing	TaskType.TT_Task	26154	5	[(104843, FS, 0), (104844, FS, 0), (104845, FS...
6	104842	Painting	TaskType.TT_Task	26154	10	[(104846, FS, 0)]
7	104843	Windows	TaskType.TT_Task	26154	5	[(104846, FS, 0)]
8	104844	Facade	TaskType.TT_Task	26154	10	[(104846, FS, 0)]
9	104845	Garden	TaskType.TT_Task	26154	5	[(104846, FS, 0)]
10	104846	Moving	TaskType.TT_Rsrc	26154	5	[(104883, FF, 0)]
11	104847	House 2 Start	TaskType.TT_Mile	26152	0	[]
12	104848	Masonry	TaskType.TT_Task	26155	35	[(104849, FS, 0), (104850, FS, 0), (104851, FS...
13	104849	Carpentry	TaskType.TT_Task	26155	15	[(104852, FS, 0)]
14	104850	Plumbing	TaskType.TT_Task	26155	40	[(104855, FS, 0), (104856, FS, 0)]

Get the list of task names. Filter by wbs_id due to the repeating structure of activities on each house¶

In [ ]:

def get_task_dict_by_wbs_id(df, wbs_id):
    filtered_df = df[df['WBS ID'] == wbs_id]
    task_dict = dict(zip(filtered_df['TASK ID'], filtered_df['Name']))
    return task_dict

wbs_id = '26154' # wbs_id of activites on House 1 (taken from the dataframe above)
task_dict = get_task_dict_by_wbs_id(tasks_df, wbs_id)
TaskNames = tuple(task_dict.values())

Get the tasks duration¶

In [ ]:

durations = {task_id: tasks_df.loc[tasks_df['TASK ID'] == task_id, 'Duration'].values[0] for task_id in task_dict.keys()}
Duration =  list(durations.values())

Get the precedences of the tasks¶

In [ ]:

prec = []
for task in project.tasks:
  if task.uid in task_dict:
    for pred_link in task.successors:
      suc_uid = pred_link.task.uid
      if suc_uid in task_dict:
        suc_name = pred_link.task.name
        prec.append((task.name,suc_name))
Precedences = prec

Get the release dates. Dates ordered by the house number¶

In [ ]:

lags = {}
for rel in project.relationships:
    if str(rel.predecessor.type) == 'TaskType.TT_Mile':
      lags[rel.successor.wbs.name] = rel.lag
lags

Out[ ]:

{'House 2': 0, 'House 1': 31, 'House 3': 90, 'House 4': 120, 'House 5': 90}

In [ ]:

ReleaseDate_dict = {key: lags.get(key) for key in sorted(lags)}
ReleaseDate = list(ReleaseDate_dict.values())

Finally obtain the project data¶

In [ ]:

print("NbWorkers =", NbWorkers, "\n")
print("NbHouses =", NbHouses, "\n")
print("TaskNames =", TaskNames, "\n")
print("Duration =", Duration, "\n")
print("ReleaseDate =", ReleaseDate, "\n")
print("Precedences =", Precedences, "\n")

NbWorkers = 3 

NbHouses = 5 

TaskNames = ('Masonry', 'Carpentry', 'Plumbing', 'Ceiling', 'Roofing', 'Painting', 'Windows', 'Facade', 'Garden', 'Moving') 

Duration = [35, 15, 40, 15, 5, 10, 5, 10, 5, 5] 

ReleaseDate = [31, 0, 90, 120, 90] 

Precedences = [('Masonry', 'Carpentry'), ('Masonry', 'Plumbing'), ('Masonry', 'Ceiling'), ('Carpentry', 'Roofing'), ('Plumbing', 'Facade'), ('Plumbing', 'Garden'), ('Ceiling', 'Painting'), ('Roofing', 'Windows'), ('Roofing', 'Facade'), ('Roofing', 'Garden'), ('Painting', 'Moving'), ('Windows', 'Moving'), ('Facade', 'Moving'), ('Garden', 'Moving')]