import datetime import petl.interactive as etl print etl.__version__ data = """user_id,date_begin,date_end,nb_days,notes,projet_id user1,2014-07-31,2014-08-07,5.5,5 days + am,cp user2,2014-07-31,2014-08-07,5.5,5 days + pm,cp user3,2014-07-31,2014-08-06,5,5 days,cp """ day = datetime.timedelta(days=1) def split_partial_days(row): if isinstance(row.nb_days, float): # split out partial days into separate row if 'am' in row.notes: # full days yield (row.user_id, row.date_begin, row.date_end - day, int(row.nb_days), row.notes.split('+')[0].strip(), row.projet_id) # partial days yield (row.user_id, row.date_end, row.date_end, row.nb_days - int(row.nb_days), row.notes.split('+')[1].strip(), row.projet_id) if 'pm' in row.notes: # partial days yield (row.user_id, row.date_begin, row.date_begin, row.nb_days - int(row.nb_days), row.notes.split('+')[1].strip(), row.projet_id) # full days yield (row.user_id, row.date_begin + day, row.date_end, int(row.nb_days), row.notes.split('+')[0].strip(), row.projet_id) else: # do nothing yield row tbl = (etl .fromcsv(etl.StringSource(data)) .convert(('date_begin', 'date_end'), etl.dateparser('%Y-%m-%d')) .convert('nb_days', etl.parsenumber) .rowmapmany(split_partial_days, fields=['user_id', 'date_begin', 'date_end', 'nb_days', 'notes', 'projet_id']) ) tbl