data = """type,price,quantity
Apples
Cortland,0.30,24
Red Delicious,0.40,24
Oranges
Navel,0.50,12
"""
import petl.interactive as etl
from petl.io import StringSource
tbl1 = (etl
.fromcsv(StringSource(data))
)
tbl1
type | price | quantity |
---|---|---|
Apples | ||
Cortland | 0.30 | 24 |
Red Delicious | 0.40 | 24 |
Oranges | ||
Navel | 0.50 | 12 |
def make_room_for_category(row):
if len(row) == 1:
return (row[0], 'X', 'X', 'X')
else:
return (None,) + tuple(row)
tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity'])
tbl2
category | type | price | quantity |
---|---|---|---|
Apples | X | X | X |
None | Cortland | 0.30 | 24 |
None | Red Delicious | 0.40 | 24 |
Oranges | X | X | X |
None | Navel | 0.50 | 12 |
tbl3 = tbl2.filldown()
tbl3
category | type | price | quantity |
---|---|---|---|
Apples | X | X | X |
Apples | Cortland | 0.30 | 24 |
Apples | Red Delicious | 0.40 | 24 |
Oranges | X | X | X |
Oranges | Navel | 0.50 | 12 |
tbl4 = tbl3.ne('type', 'X')
tbl4
category | type | price | quantity |
---|---|---|---|
Apples | Cortland | 0.30 | 24 |
Apples | Red Delicious | 0.40 | 24 |
Oranges | Navel | 0.50 | 12 |
class CustomTransformer(object):
def __init__(self, source):
self.source = source
def __iter__(self):
it = iter(self.source)
# construct new header
source_fields = it.next()
out_fields = ('category',) + tuple(source_fields)
yield out_fields
# transform data
current_category = None
for row in it:
if len(row) == 1:
current_category = row[0]
else:
yield (current_category,) + tuple(row)
tbl5 = CustomTransformer(tbl1)
# just so it formats nicely as HTML in the notebook...
etl.wrap(tbl5)
category | type | price | quantity |
---|---|---|---|
Apples | Cortland | 0.30 | 24 |
Apples | Red Delicious | 0.40 | 24 |
Oranges | Navel | 0.50 | 12 |