#the tools we need
import requests
import pandas as pd
import numpy as np
import pysal
pd.set_option('display.max_columns', None)
#fetch new SU data from kobotoolbox via the old API. Probably this will change when they finish the new API
url = "https://kc.kobotoolbox.org/api/v1/data/300075"
headers = {
'content-type': "multipart/form-data",
'Authorization': "Token 0239eed59b53ae940ca8835302b6ad360c8fc540",
}
#querystring = {"group_mf1hs18/Stratigraphic_Reliability":"poor"}
kobo = requests.get(url, headers=headers)
#response_json = response.json()
#print(response.text)
#print first 4000 characters of response
print(kobo.text[:1000])
#print(response_json)
[{"group_ak4kk32/Colour":"brown","_notes":[],"group_mo76v47/Total_bulk_finds":"5","_bamboo_dataset_id":"","_tags":[],"group_ak4kk32/Soil_Composition_Percentage_Clay":"20","group_bu1ca84_row_1/group_bu1ca84_row_1_column":"Few","group_ye2st39/Filled_out_by":"edward_stewart","group_bu1ca84_row_2/group_bu1ca84_row_2_column_1":"No Tufo was found","group_bu1ca84_row_2/group_bu1ca84_row_2_column":"None","group_mf1hs18/group_qy7um66/Photo_models":"no","group_mf1hs18/Position":"all around the trench","_xform_id_string":"aFQDtwfYLpWjyYb6Uc5nMp","group_ye2st39/SU_Closed":"no","group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU2":"99999","group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU1":"9999","group_mf1hs18/Formation_Process":"accumulation","group_ak4kk32/Soil_Composition_Percentage_Silt":"40","group_ak4kk32/Matrix":"cohesive","group_mf1hs18/Approximate_date_of_SU":"21st C AD","end":"2019-06-27T04:06:58.479-04:00","group_qu1tl04/Eastern_limit":"excavation_lim","group_mf1hs18/g
#optional step: back up new data to a local file.
new_file=open("C:\\temp\\kobo.json",mode="w",encoding="utf-8")
new_file.write(kobo.text)
new_file.close()
#convert kobo json to a pandas dataframe
kobo_su=pd.read_json(kobo.text)
kobo_su.head()
Area | Definition | Excavation_Year | Formation_Process | Publishing_Status | SU_001 | SU_Type | Stratigraphic_Reliability | __version__ | _attachments | _bamboo_dataset_id | _geolocation | _id | _notes | _status | _submission_time | _submitted_by | _tags | _uuid | _validation_status | _xform_id_string | end | formhub/uuid | group_aa6pz93/Alignment | group_aa6pz93/Binding_Agent | group_aa6pz93/Building_Technique | group_aa6pz93/Dimensions | group_aa6pz93/Floor_Type | group_aa6pz93/Foundations | group_aa6pz93/Structure_Description | group_aa6pz93/Wall_Facing | group_aa6pz93/Wall_Finishing | group_ak4kk32/Colour | group_ak4kk32/Compaction | group_ak4kk32/Matrix | group_ak4kk32/Soil_Composition_Percentage_Clay | group_ak4kk32/Soil_Composition_Percentage_Sand | group_ak4kk32/Soil_Composition_Percentage_Silt | group_bu1ca84_row/group_bu1ca84_row_column | group_bu1ca84_row/group_bu1ca84_row_column_1 | group_bu1ca84_row_1/group_bu1ca84_row_1_column | group_bu1ca84_row_1/group_bu1ca84_row_1_column_1 | group_bu1ca84_row_2/group_bu1ca84_row_2_column | group_bu1ca84_row_2/group_bu1ca84_row_2_column_1 | group_cp91a70/Inclusion_Observations | group_cp91a70/Interface | group_cp91a70/Surface | group_cp91a70/Thickness_Observations | group_dn7bg68 | group_ey32a12/Observations | group_ey32a12/Position_001 | group_ey32a12/Shape | group_inclusions | group_iz3xs59 | group_lg3yr63/Non_Soil_Sampling | group_lg3yr63/Sieving | group_lg3yr63/Soil_Sampling | group_matrix | group_mf1hs18/Approximate_date_of_SU | group_mf1hs18/Area | group_mf1hs18/Chronological_Period | group_mf1hs18/Date_of_Layer_observations | group_mf1hs18/Definition | group_mf1hs18/End_of_Use_Phase | group_mf1hs18/Excavation_Year | group_mf1hs18/Formation_Phase | group_mf1hs18/Formation_Process | group_mf1hs18/Layer_distinguished_by | group_mf1hs18/Loss_phase | group_mf1hs18/Position | group_mf1hs18/SU_Type | group_mf1hs18/Stratigraphic_Reliability | group_mf1hs18/group_qy7um66/Photo_models | group_mf1hs18/group_qy7um66/Photos | group_mo76v47/Finds_Observations | group_mo76v47/Finds_Storage_Notes | group_mo76v47/Total_bulk_finds | group_mo76v47/Total_bulk_finds_weight | group_mo76v47/group_ht7hf81 | group_mo76v47/group_jr3of51 | group_mo76v47/group_lq6ss10 | group_qu1tl04/Depth_Limit | group_qu1tl04/Eastern_limit | group_qu1tl04/Northern_Limit | group_qu1tl04/Southern_Limit | group_qu1tl04/Unit_Limit_Notes | group_qu1tl04/Western_Limit | group_sb6os40_currentrelate/group_sb6os40_currentrelate_Harris_relationship | group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU1 | group_sb6os40_currentrelate/group_sb6os40_currentrelate_SU2 | group_xg9ph94 | group_ye2st39/Date_of_Finds_Study | group_ye2st39/Edited_by | group_ye2st39/Edited_on | group_ye2st39/Filled_out_by | group_ye2st39/Filled_out_on | group_ye2st39/Finds_Studied_by | group_ye2st39/SU_Closed | group_yp0fv41/Cut_bottom | group_yp0fv41/Cut_bottom_edge | group_yp0fv41/Cut_edges | group_yp0fv41/Cut_observations | group_yp0fv41/Cut_sides | group_yp0fv41/Cut_top_edge | meta/deprecatedID | meta/instanceID | start | today | username | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | area_i | A test SU for the purpose of this form's creat... | 2019.0 | intentional_de | beta | 9999 | anthropogenic | good | vASoWNKpGrMiWMYtnspLZ9 | [] | [None, None] | 30823853 | [] | submitted_via_web | 2019-06-23 19:29:45 | NaN | [] | 4f0fd312-ecc5-49a5-93a2-fe3a668d3c97 | {'by_whom': 'eddiecharlesstewart', 'timestamp'... | aFQDtwfYLpWjyYb6Uc5nMp | 2019-06-27T04:06:58.479-04:00 | 43a407dc130741098c714bd8b8620040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | brown | friable | cohesive | 20.0 | 40.0 | 40.0 | Many | many fragments of terra Sigilata were uncovere... | Few | A few tile fragments were uncovered | None | No Tufo was found | NaN | commingled | grassy | NaN | NaN | it is grassy, excavated with steel toe capped ... | NaN | NaN | [{'group_inclusions/inclusionsfreqency': 'medi... | NaN | no | no | no | [{'group_matrix/harrisrelationship': 'covers',... | 21st C AD | area_i | modern | The grass is probably not that old | Grass | NaN | 2019.0 | NaN | accumulation | colour composition | NaN | all around the trench | natural | poor | no | no | Lots of change was found here | It is now in my pocket | 5.0 | 5.0 | [{'group_mo76v47/group_ht7hf81/Details': '5x 1... | [{'group_mo76v47/group_jr3of51/Spot_Dates': '2... | [{'group_mo76v47/group_lq6ss10/Bases': '1', 'g... | original | excavation_lim | excavation_lim | excavation_lim | NaN | excavation_lim | covers | 9999.0 | 99999.0 | [{'group_xg9ph94/Interpretations': 'This SU is... | NaN | NaN | NaN | edward_stewart | 2019-06-27 | NaN | no | NaN | NaN | NaN | NaN | NaN | NaN | uuid:3214a397-fc61-4eb7-b9c0-e33931ed6995 | uuid:4f0fd312-ecc5-49a5-93a2-fe3a668d3c97 | 2019-06-23T20:24:52.369+01:00 | NaN | NaN | |
1 | NaN | NaN | NaN | NaN | NaN | 9999 | NaN | NaN | vT5AVpWUbRN4hXkW6KpKek | [] | [None, None] | 31149181 | [] | submitted_via_web | 2019-06-27 08:23:34 | NaN | [] | 37aae4fb-30e8-4adb-baff-c4b732fd8d5f | {'by_whom': 'eddiecharlesstewart', 'timestamp'... | aFQDtwfYLpWjyYb6Uc5nMp | 2019-06-27T03:53:46.686-04:00 | 43a407dc130741098c714bd8b8620040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | [{'group_mo76v47/group_jr3of51/Spot_Dates': '2... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | uuid:37aae4fb-30e8-4adb-baff-c4b732fd8d5f | 2019-06-27T03:52:51.969-04:00 | NaN | NaN | |
2 | NaN | NaN | NaN | NaN | beta | 9998 | NaN | NaN | vjuQCNvQx2tVSXENUzPoHx | [] | [None, None] | 31152598 | [] | submitted_via_web | 2019-06-27 09:13:16 | NaN | [] | c77bcd48-d876-4b17-b33e-7f1f9947192b | {'by_whom': 'eddiecharlesstewart', 'timestamp'... | aFQDtwfYLpWjyYb6Uc5nMp | 2019-06-27T05:32:53.854-04:00 | 43a407dc130741098c714bd8b8620040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | grey | friable | cohesive | 50.0 | 30.0 | 20.0 | NaN | NaN | NaN | NaN | NaN | NaN | lots of broken house things | sharp | lumpy | quite hefty | [{'group_dn7bg68/Insert_link_to_connected_form... | Excavated with a toothpick | centre of house structure | round in plan | [{'group_inclusions/inclusionsfreqency': 'freq... | NaN | yes | yes | yes | [{'group_matrix/harrisrelationship': 'abuts', ... | 2019 | area_i | julio_claudian | 02/12/1997 | A large deposit of rubble | phase_a_3 | 2019.0 | phase_a_1 | collapse | compaction composition | phase_a_4a | in centre of house structure | anthropogenic | fair | yes | yes | lots of broken house stuff | stored in a garden shed in michigan | 27.0 | 348.0 | [{'group_mo76v47/group_ht7hf81/Details': 'arch... | NaN | [{'group_mo76v47/group_lq6ss10/Maximum_Size_in... | original | original | original | original | SU is cut to the west by a quarrying cut | not_original | NaN | NaN | NaN | [{'group_xg9ph94/Interpretations': 'A layer fo... | 2019-06-19 | abe_thompson | 2019-06-26 | abigail_trowbr | 2019-06-03 | aida_ali | yes | NaN | NaN | NaN | NaN | NaN | NaN | uuid:5bbece0f-a5f5-4998-8ef2-31ec22e99117 | uuid:c77bcd48-d876-4b17-b33e-7f1f9947192b | 2019-06-27T04:57:19.848-04:00 | NaN | NaN | |
3 | NaN | NaN | NaN | NaN | beta | 1314 | NaN | NaN | vjuQCNvQx2tVSXENUzPoHx | [] | [None, None] | 31154436 | [] | submitted_via_web | 2019-06-27 09:52:48 | NaN | [] | 56908e9d-bc9f-47f0-989a-d98de51813d0 | {'by_whom': 'eddiecharlesstewart', 'timestamp'... | aFQDtwfYLpWjyYb6Uc5nMp | 2019-06-27T05:50:50.334-04:00 | 43a407dc130741098c714bd8b8620040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | sharp | NaN | NaN | NaN | There was a strong gale and heavy rain during ... | Near the large well shaft | elephant shaped cut | NaN | NaN | no | no | no | [{'group_matrix/harrisrelationship': 'cuts', '... | 1066AD | area_i | medieval_perio | This date is based off of the finding of a spo... | A spoliation cut | phase_a_3 | 2018.0 | phase_a_1 | cutting | compaction composition | phase_a_4c | Next to a wall in the west of area I | anthropogenic | poor | no | yes | NaN | NaN | NaN | NaN | NaN | [{'group_mo76v47/group_jr3of51/Spot_Dates': 'c... | NaN | original | original | original | original | NaN | original | NaN | NaN | NaN | [{'group_xg9ph94/Interpretations': 'It is a la... | 2019-06-17 | abigail_trowbr | 2019-06-17 | david_yelsey | 2019-06-25 | evelyn_adkins | no | concave | sharp | rounded | NaN | concave | sharp | NaN | uuid:56908e9d-bc9f-47f0-989a-d98de51813d0 | 2019-06-27T05:33:50.763-04:00 | NaN | NaN | |
4 | NaN | NaN | NaN | NaN | NaN | 9999 | NaN | NaN | vPzHY9gqVnLowgERod6kw8 | [{'mimetype': 'image/jpeg', 'download_url': 'h... | [None, None] | 31482851 | [] | submitted_via_web | 2019-07-01 14:09:57 | NaN | [] | b67f09c8-ecaa-42a9-a06b-3ebfb780f760 | {'by_whom': 'eddiecharlesstewart', 'timestamp'... | aFQDtwfYLpWjyYb6Uc5nMp | 2019-07-01T15:09:48.963+01:00 | 43a407dc130741098c714bd8b8620040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | [{'group_iz3xs59/Insert_a_photo': 'DSC_1141-15... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | uuid:b67f09c8-ecaa-42a9-a06b-3ebfb780f760 | 2019-07-01T15:02:30.493+01:00 | 2019-07-01 | username not found |
#read in legacy data from the ARK
legacy_su = pd.read_csv('https://raw.githubusercontent.com/ropitz/gabii_experiments/master/data/su_report.csv',sep=';')
legacy_su.head()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-171-55adee1c50ae> in <module>() 2 legacy_su = pd.read_csv('https://raw.githubusercontent.com/ropitz/gabii_experiments/master/data/su_report.csv',sep=';') 3 legacy_su.head() ----> 4 legacy_su.set_option('display.max_columns', None) ~\Anaconda3\lib\site-packages\pandas\core\generic.py in __getattr__(self, name) 4370 if self._info_axis._can_hold_identifiers_and_holds_name(name): 4371 return self[name] -> 4372 return object.__getattribute__(self, name) 4373 4374 def __setattr__(self, name, value): AttributeError: 'DataFrame' object has no attribute 'set_option'
#optional: select the parts of the kobo SU sheet you want by their column names
kobo_su_sub = kobo_su[["SU_001","Area","Definition","Publishing_Status"]]
kobo_su_sub
SU_001 | Area | Definition | Publishing_Status | |
---|---|---|---|---|
0 | 9999 | area_i | A test SU for the purpose of this form's creat... | beta |
1 | 9999 | NaN | NaN | NaN |
2 | 9998 | NaN | NaN | beta |
3 | 1314 | NaN | NaN | beta |
4 | 9999 | NaN | NaN | NaN |
5 | 7800 | NaN | NaN | NaN |
6 | 9987 | NaN | NaN | beta |
7 | 7234 | NaN | NaN | NaN |
8 | 9999 | NaN | NaN | published |
#select the parts of the legacy SU sheet you want by their column names. This should be the same set as the kobo selection
legacy_su_sub = legacy_su[["ID","EXCAV_AREA","DEFINITION","PUB_STATUS"]]
legacy_su_sub.head()
ID | EXCAV_AREA | DEFINITION | PUB_STATUS | |
---|---|---|---|---|
0 | 0 | A | NaN | Published |
1 | 1 | A | Bedrock identified in 2009 | Beta |
2 | 3 | A | Fill of cut SU4 | Beta |
3 | 4 | A | Rectangular cut in bedrock | Beta |
4 | 5 | A | Layer of brown, silty clay | Beta |
#rename any silly column names for each dataframe
kobo_su_sub.rename(columns={'SU_001':'SU'},
inplace=True)
kobo_su_sub.head()
C:\Users\gabii4\Anaconda3\lib\site-packages\pandas\core\frame.py:3778: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy return super(DataFrame, self).rename(**kwargs)
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
0 | 9999 | area_i | A test SU for the purpose of this form's creat... | beta |
1 | 9999 | NaN | NaN | NaN |
2 | 9998 | NaN | NaN | beta |
3 | 1314 | NaN | NaN | beta |
4 | 9999 | NaN | NaN | NaN |
#rename any silly column names for each dataframe
legacy_su_sub.rename(columns={'ID':'SU','EXCAV_AREA':'Area','DEFINITION':'Definition','PUB_STATUS':'Publishing_Status'},
inplace=True)
legacy_su_sub.head()
C:\Users\gabii4\Anaconda3\lib\site-packages\pandas\core\frame.py:3778: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy return super(DataFrame, self).rename(**kwargs)
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
0 | 0 | A | NaN | Published |
1 | 1 | A | Bedrock identified in 2009 | Beta |
2 | 3 | A | Fill of cut SU4 | Beta |
3 | 4 | A | Rectangular cut in bedrock | Beta |
4 | 5 | A | Layer of brown, silty clay | Beta |
#stack the two together to integrate, preview the first twenty entries
su_stack = pd.concat([kobo_su_sub, legacy_su_sub], axis=0)
su_stack.head(20)
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
0 | 9999 | area_i | A test SU for the purpose of this form's creat... | beta |
1 | 9999 | NaN | NaN | NaN |
2 | 9998 | NaN | NaN | beta |
3 | 1314 | NaN | NaN | beta |
4 | 9999 | NaN | NaN | NaN |
5 | 7800 | NaN | NaN | NaN |
6 | 9987 | NaN | NaN | beta |
7 | 7234 | NaN | NaN | NaN |
8 | 9999 | NaN | NaN | published |
0 | 0 | A | NaN | Published |
1 | 1 | A | Bedrock identified in 2009 | Beta |
2 | 3 | A | Fill of cut SU4 | Beta |
3 | 4 | A | Rectangular cut in bedrock | Beta |
4 | 5 | A | Layer of brown, silty clay | Beta |
5 | 6 | A | Cut in bedrock | Beta |
6 | 7 | A | Layer of rocks mixed with brown clay | Beta |
7 | 8 | A | Cluster of mortar on Northwest side of cut | Beta |
8 | 9 | A | Cluster of mortar in SE side of SU4 | Beta |
9 | 10 | A | Cluster of roof tiles in SW corner of SU4 | Beta |
10 | 11 | A | Layer of brown sandy clay | Published |
#filter on field values from both lists
published = ['Published','published']
published_su = su_stack.loc[su_stack['Publishing_Status'].isin(published)]
published_su.head(20)
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
8 | 9999 | NaN | NaN | published |
0 | 0 | A | NaN | Published |
10 | 11 | A | Layer of brown sandy clay | Published |
360 | 363 | A | N-S wall | Published |
457 | 462 | A | Oval cut filled by SU451 | Published |
617 | 999 | D | Beater SU. This SU is a test | Published |
634 | 1016 | B | Post-abandonment colluvial layer | Published |
676 | 1058 | B | Wall running N-S in western part of area B - s... | Published |
742 | 1124 | B | upper fill of cappucina tomb Lordy Gaga | Published |
753 | 1135 | B | rubble wall - perpendicular to 1058 - west to ... | Published |
774 | 1156 | B | rubble layer | Published |
776 | 1158 | B | surface inside 'room' at S edge of excavation ... | Published |
780 | 1162 | B | perpendicular continuation of rubble wall 1135 | Published |
781 | 1163 | B | rubble wall continuation of 1135 | Published |
783 | 1165 | B | yellowish, silty soil south | Published |
786 | 1168 | B | layer containing soil and gravel | Published |
787 | 1169 | B | yellowish layer north of wall 1135 | Published |
788 | 1170 | B | ditch for rubble wall 1135 | Published |
789 | 1171 | B | cut for trench built wall 1163 | Published |
791 | 1173 | B | Crushed tufo floor | Published |
#go back a step and clean up values in the columns to match between tables
su_stack.replace({'area_i':'I','published':'Published'},inplace=True)
su_stack.head(20)
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
0 | 9999 | I | A test SU for the purpose of this form's creat... | beta |
1 | 9999 | NaN | NaN | NaN |
2 | 9998 | NaN | NaN | beta |
3 | 1314 | NaN | NaN | beta |
4 | 9999 | NaN | NaN | NaN |
5 | 7800 | NaN | NaN | NaN |
6 | 9987 | NaN | NaN | beta |
7 | 7234 | NaN | NaN | NaN |
8 | 9999 | NaN | NaN | Published |
0 | 0 | A | NaN | Published |
1 | 1 | A | Bedrock identified in 2009 | Beta |
2 | 3 | A | Fill of cut SU4 | Beta |
3 | 4 | A | Rectangular cut in bedrock | Beta |
4 | 5 | A | Layer of brown, silty clay | Beta |
5 | 6 | A | Cut in bedrock | Beta |
6 | 7 | A | Layer of rocks mixed with brown clay | Beta |
7 | 8 | A | Cluster of mortar on Northwest side of cut | Beta |
8 | 9 | A | Cluster of mortar in SE side of SU4 | Beta |
9 | 10 | A | Cluster of roof tiles in SW corner of SU4 | Beta |
10 | 11 | A | Layer of brown sandy clay | Published |
#merge multiple rows with the same SU id to merge changes. Come up with a better way to do this.
su_stack_clean = su_stack.groupby(['SU']).agg(lambda x: ';;'.join(x.astype(str))).reset_index()
su_stack_clean.head()
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
0 | 0 | A | nan | Published |
1 | 1 | A | Bedrock identified in 2009 | Beta |
2 | 3 | A | Fill of cut SU4 | Beta |
3 | 4 | A | Rectangular cut in bedrock | Beta |
4 | 5 | A | Layer of brown, silty clay | Beta |
#choose a row with a merged info set to see what it looks like
su_stack_clean.loc[su_stack_clean['SU'] == 9999]
SU | Area | Definition | Publishing_Status | |
---|---|---|---|---|
4581 | 9999 | I;;nan;;nan;;nan;;H | A test SU for the purpose of this form's creat... | beta;;nan;;nan;;Published;;Beta |
legacy_su.count()
ID 4594 PUB_STATUS 4177 PUB_YEAR 4594 DEFINITION 4578 FORMATION_PROCESS 4526 LAYER_DATE_START 4594 LAYER_DATE_START_SUFFIX 1502 LAYER_DATE_END 4594 LAYER_DATE_END_SUFFIX 1497 LAYER_DATE_OBSERVATIONS 1041 STRAT_RELIABILITY 4316 EXCAV_YEAR 4594 EXCAV_AREA 4581 EXCAV_SECTOR 2 SU_TYPE 4480 ELEV_MAX 4594 ELEV_MIN 4594 IN_CROSS_SECTION 4594 IN_ELEVATION_DRAWING 4594 HAS_PHOTOS 4594 HAS_PHOTOMODELS 4594 FILLED_BY 4594 FILLED_ON 2723 REVISED_BY 4594 REVISED_ON 2259 CLAY 4594 SILT 4594 SAND 4594 SOIL_MATRIX 2204 SOIL_COMPACTION 2318 ... ORIG_PDF_DATE 1191 ORIG_SIGNED_DATE 678 ORIG_FINDS_SIGNED_DATE 137 RECORD_FILLED 4594 RECORD_REVISED 4594 RECORD_FILLED_DATE 1264 RECORD_REVISED_DATE 4594 FINDS_BY 528 FINDS_ON 494 GEOLOGIC_INCLUSIONS 1663 ORGANIC_INCLUSIONS 1717 ANTHROPIC_INCLUSIONS 2122 LAYER_DISTINGUISHED_BY 3358 CHRONOLOGICAL_PERIOD 95 LIMITS_MULTI 4465 CUTS_MULTI 870 STRUCTURAL_REMAINS_MULTI 959 MATRIX1 4080 MATRIX2 4422 ABUTS 696 IS_ABUTTED_BY 539 COVERS 2296 IS_COVERED_BY 3528 CUTS 792 IS_CUT_BY 907 FILLS 1236 IS_FILLED_BY 867 IS_BOUND_TO 87 BINDS_TO 87 EQUALS 275 Length: 105, dtype: int64
token is: 0239eed59b53ae940ca8835302b6ad360c8fc540 project is: aFQDtwfYLpWjyYb6Uc5nMp
form is: 300227 gabii dev form is: 300075 gabii SU