There is currently no in-built support for resolving conflicts within a table using petl, however this notebook gives an example of a workaround strategy.
import sys
sys.version_info
sys.version_info(major=3, minor=4, micro=2, releaselevel='final', serial=0)
import petl as etl
etl.__version__
'1.0.6'
data_master = b"""id name value age
1 Tressa 1203 42
2 Phil 23997
3 Darius . 78
4 Delinda 96501 64
5 Adelina 96508 50
"""
tbl_master = (
etl
.fromtext(etl.MemorySource(data_master))
.split('lines', r'\s+')
.skip(1)
.replaceall('.', None)
.addfield('master_age', lambda row: row.age)
)
tbl_master
id | name | value | age | master_age |
---|---|---|---|---|
1 | Tressa | 1203 | 42 | 42 |
2 | Phil | 23997 | None | None |
3 | Darius | None | 78 | 78 |
4 | Delinda | 96501 | 64 | 64 |
5 | Adelina | 96508 | 50 | 50 |
data_other = b"""id name value age
2 Phil . 53
3 Darius 5000 76
"""
tbl_other = (
etl
.fromtext(etl.MemorySource(data_other))
.split('lines', r'\s+')
.skip(1)
.replaceall('.', None)
)
tbl_other
id | name | value | age |
---|---|---|---|
2 | Phil | None | 53 |
3 | Darius | 5000 | 76 |
tbl_merge = etl.merge(tbl_master, tbl_other, key='id')
tbl_merge
id | name | value | age | master_age |
---|---|---|---|---|
1 | Tressa | 1203 | 42 | 42 |
2 | Phil | 23997 | 53 | None |
3 | Darius | 5000 | Conflict({'76', '78'}) | 78 |
4 | Delinda | 96501 | 64 | 64 |
5 | Adelina | 96508 | 50 | 50 |
tbl_merge_resolved = (
tbl_merge
.convert('age', lambda v, row: (row.master_age if isinstance(v, etl.Conflict) else v),
pass_row=True)
.cutout('master_age')
)
tbl_merge_resolved
id | name | value | age |
---|---|---|---|
1 | Tressa | 1203 | 42 |
2 | Phil | 23997 | 53 |
3 | Darius | 5000 | 78 |
4 | Delinda | 96501 | 64 |
5 | Adelina | 96508 | 50 |