import pandas as pd
from soc_module import *
import re
import datetime
from datetime import date, datetime
Load in alameda geojson file
alameda = geojson.load(open("data/alameda-2010.geojson"))
Write function to filter any old index cols in data
def filter_old_index(name):
if re.match(r"Unnamed: 0.*", name):
return False
return True
Load in survey data
data = pd.read_csv('data/ORIGINAL_RESPONSES.csv', usecols=filter_old_index)
data.head()
Timestamp | Census Tract | On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how many empty beer or liquor bottles are visible in streets, yards, or alleys? | On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how many cigarette or cigar butts or discarded cigarette packages are on the sidewalk or in the gutters? | On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how many condoms are present on the sidewalk, in the gutters, or street of block face? | On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how much garbage, litter, or broken glass in the street or on the sidewalks? | Are there abandoned cars in the neighborhood? How many do you see? | On a scale of 1-5 where 1 is "Friendly Responses / Greetings / Helpful" and 5 is "Treated with Suspicion", How were you regarded by the people in the block face? | On a scale of 1 - 4, where 1 is "Very well kept / good condition" and 4 is "Poor / badly deteriorated condition", in general, how would you rate the condition of buildings on the block face? (includes residential buildings, recreational facilities, manufacturing plants, business / industrial headquarters, etc) | Is there graffiti or evidence of graffiti that has been painted over on buildings, signs, or walls? (Questions 22-23) | ... | Other thoughts or comments for Image #2 | Image #3 | Full Address of Block Face in Image #3 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704. | Other thoughts or comments for Image #3 | Image #4 | Other thoughts or comments for Image #4 | Full Address of Block Face in Image #4 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704. | Image #5 | Full Address of Block Face in Image #5 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704. | Other thoughts or comments for Image #5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10/25/2019 12:52:40 | 4030.0 | 1 | 2 | 1 | 2 | 0 | 4 | 4 | Yes | ... | The Salvation Army was closed on this particul... | https://drive.google.com/open?id=1KY1Mvgc9-eLq... | 601 Webster Street, Oakland, CA, 94609 | I should have gone in here to try the food, i... | https://drive.google.com/open?id=1keCgzxOTujwn... | UGS crew --- possibly a gang or street dance t... | 601 Webster St, Oakland, CA, 94609 | NaN | NaN | NaN |
1 | 10/25/2019 12:54:25 | 4205.0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | No | ... | NaN | https://drive.google.com/open?id=1-eg5yVHgMiRt... | 1391 Solano Ave, Albany, CA, 94706 | NaN | https://drive.google.com/open?id=1mouPFYSJ16OW... | NaN | 1391 Solano Ave, Albany, CA, 94706 | https://drive.google.com/open?id=1ymFvN6smtsGK... | 708 Solano Ave, Albany, CA, 94706 | NaN |
2 | 10/25/2019 14:32:16 | 4214.0 | 1 | 1 | 1 | 1 | 0 | 2 | 2 | No | ... | Another example of a well-maintained house on ... | https://drive.google.com/open?id=12Fqi0YrnKfV9... | 1136 Spruce Street, Berkeley, CA, 94704 | One of many houses that’s currently being reno... | https://drive.google.com/open?id=189j-3sZAefhs... | PG&E is currently working on Spruce Street, wi... | 1128 Spruce Street, Berkeley, CA, 94704 | https://drive.google.com/open?id=1Hn70fF7xfZh1... | 1133 Spruce Street, Berkeley, CA, 94704 | This house is located at the east side of Spru... |
3 | 10/25/2019 15:27:57 | 4030.0 | 1 | 2 | 1 | 3 | 0 | 3 | 2 | Yes | ... | NaN | https://drive.google.com/open?id=1ykCymHyvIELa... | 376 8th Street, Oakland, CA, 94607 | NaN | https://drive.google.com/open?id=1rfVrj-7QZkbf... | NaN | 800 Franklin Street, Oakland, CA, 94607 | NaN | NaN | NaN |
4 | 10/25/2019 21:24:53 | 4213.0 | 1 | 1 | 1 | 1 | 0 | 2 | 4 | No | ... | For the stretch from the intersection of Carlo... | https://drive.google.com/open?id=1S_BJ03A64lP9... | 1647 Hopkins Street, Berkeley, CA, 94707 | NaN | https://drive.google.com/open?id=1a4rgJSw2qIMk... | I felt that this residence was fairly represen... | 1288 Carlotta Avenue, Berkeley, CA, 94707 | https://drive.google.com/open?id=1clCPXj-QBzMp... | 1611 Hopkins Street, Berkeley, CA, 94707 | This building had a mattress leaning on one of... |
5 rows × 32 columns
Separate new responses from old ones (bc get_coords takes a long time)
old = pd.read_csv("data/all-responses.csv", usecols=filter_old_index)
new = data[~(data["Timestamp"].isin(old["Timestamp"]))]
Get image coordinates - this takes a while, 38 mins when doing it on all ~150 rows
%%time
new_coords = get_coords(new, alameda, "sociology-130ac")
/Users/xt/Documents/Academia/Fall 2019/dsmodule/SOC-130AC/soc_module.py:71: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy data['Census Tract'] = data['Census Tract'].apply(fix_tract) /Users/xt/Documents/Academia/Fall 2019/dsmodule/SOC-130AC/soc_module.py:98: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy data['Image #' + str(j)+ ' coordinates'] = image_coords
bring new coords back into old data, write to csv
data = pd.concat([old, new_coords])#.iloc[1:,]
data.to_csv("data/all-responses.csv", index=False)
Define mapping to rename cols
new_col_names = {
'On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how many empty beer or liquor bottles are visible in streets, yards, or alleys? ': "Amount of Beer/Liquor Bottles (1 to 5 scale)",
'On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how many cigarette or cigar butts or discarded cigarette packages are on the sidewalk or in the gutters? ': "Amount of Cigarette/Cigar Butts (1 to 5 scale)",
'On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how many condoms are present on the sidewalk, in the gutters, or street of block face? ': "Amount of Condoms (1 to 5 scale)",
'On a scale of 1 - 5, where 1 is "None" and 5 is "A Lot", how much garbage, litter, or broken glass in the street or on the sidewalks? ': "Amount of Garbage (1 to 5 scale)",
' Are there abandoned cars in the neighborhood? How many do you see? ': "Abandoned Cars (Y?N)",
'On a scale of 1-5 where 1 is "Friendly Responses / Greetings / Helpful" and 5 is "Treated with Suspicion", How were you regarded by the people in the block face?': "Treatment (1 (Friendly) to 5 (Suspicion) scale)",
'On a scale of 1 - 4, where 1 is "Very well kept / good condition" and 4 is "Poor / badly deteriorated condition", in general, how would you rate the condition of buildings on the block face? (includes residential buildings, recreational facilities, manufacturing plants, business / industrial headquarters, etc)': "Condition of Buildings (1 (Good) to 4 (Poor) scale)",
'Is there graffiti or evidence of graffiti that has been painted over on buildings, signs, or walls? (Questions 22-23)': "Graffiti (Y/N)",
'On a scale of 1 - 4, where 1 is "No fencing" and 4 is "High mesh fencing with barbed wire or spiked tops", is there fencing and what kind? (includes all property)': "Fenching (1 (None) to 4 (High mesh) scale)",
'Are any commercial/residential buildings being renovated?': "Renovations (Y/N)",
'What kinds of establishments are there on the block face? Select all that apply.': "Types of Establishments",
'On a scale of 1-3, where 1 is "Few or none" and 3 is a "Most/all of it", how many trees are linking the street of the block face? ': "Amount of Trees Linked the Block Fence (1 (Few) to 3 (Most) scale)",
'Is there public transportation available in the block face? ': "Public Transporation Available (Y/N)",
'Are private security guards visible?': "Private Security Visible (Y/N)",
'Is there a police officer visible? ': "Police Visible (Y/N)",
'Full Address of Block Face in Image #1 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704.': "Image #1 Address",
'Full Address of Block Face in Image #2 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704.': "Image #2 Address",
'Full Address of Block Face in Image #3 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704.': "Image #3 Address",
'Full Address of Block Face in Image #4 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704.': "Image #4 Address",
'Full Address of Block Face in Image #5 (Street Number, Street Name, City, State, Zip Code). E.g.: 2128 Oxford Street, Berkeley, CA, 94704.': "Image #5 Address",
}
Extract class_data and image_data
class_data = data.copy()
class_data = class_data.rename(new_col_names, axis=1)
class_data['Timestamp'] = class_data['Timestamp'].astype('str')
class_data = class_data.iloc[1:,:] # why??
# rekey yes and no responses
for c in class_data.columns:
try:
if "Yes" in set(class_data[c]):
class_data[c] = class_data[c].map({"Yes": 1, "No": 0})
except:
print("skipped")
# limit to submissions from Fall 2019
class_data['Timestamp'] = [datetime.strptime(time, '%m/%d/%Y %H:%M:%S') for time in class_data['Timestamp']]
class_data = class_data[[time.year == 2019 for time in class_data['Timestamp']]]
class_data = class_data.reset_index(drop=True)
image_data = class_data.iloc[:,17:]
image_data['Census Tract'] = class_data['Census Tract']
image_data.head()
skipped skipped skipped skipped skipped
Image #1 | Image #1 Address | Other thoughts or comments for Image #1 | Image #2 | Image #2 Address | Other thoughts or comments for Image #2 | Image #3 | Image #3 Address | Other thoughts or comments for Image #3 | Image #4 | ... | Image #4 Address | Image #5 | Image #5 Address | Other thoughts or comments for Image #5 | Image #1 coordinates | Image #2 coordinates | Image #3 coordinates | Image #4 coordinates | Image #5 coordinates | Census Tract | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://drive.google.com/open?id=1RmWTmeeYjbIg... | 601 Webster St, Oakland, CA, 94609 | This was one of a few stores that were gated u... | https://drive.google.com/open?id=1PXD3bu_7j2vV... | 601 Webster Street, Oakland, CA, 94609 | The Salvation Army was closed on this particul... | https://drive.google.com/open?id=1KY1Mvgc9-eLq... | 601 Webster Street, Oakland, CA, 94609 | I should have gone in here to try the food, i... | https://drive.google.com/open?id=1keCgzxOTujwn... | ... | 601 Webster St, Oakland, CA, 94609 | NaN | NaN | NaN | [37.798025, -122.272283] | [37.798025, -122.272283] | [37.798025, -122.272283] | [37.798025, -122.272283] | NaN | 4030 |
1 | https://drive.google.com/open?id=1stJ2WAm5XvqD... | 931 Carmel Ave, Albany, CA, 94706 | NaN | https://drive.google.com/open?id=1UlT_fZkGjDLH... | 1362 Marin Ave, Albany, CA , 94706 | NaN | https://drive.google.com/open?id=1-eg5yVHgMiRt... | 1391 Solano Ave, Albany, CA, 94706 | NaN | https://drive.google.com/open?id=1mouPFYSJ16OW... | ... | 1391 Solano Ave, Albany, CA, 94706 | https://drive.google.com/open?id=1ymFvN6smtsGK... | 708 Solano Ave, Albany, CA, 94706 | NaN | [37.8899946030529, -122.289521691888] | [37.8883455055134, -122.288877697661] | [37.8908002222222, -122.290461888889] | [37.8908002222222, -122.290461888889] | [37.888633027305, -122.307014522052] | 4205 |
2 | https://drive.google.com/open?id=1kpIEXVR335w3... | 2200 Los Angeles St, Berkeley, CA, 94704 | The only house on Los Angeles Street with a si... | https://drive.google.com/open?id=1zRsqDdsZBeYD... | 2210 Los Angeles Street, Berkeley, CA, 94704 | Another example of a well-maintained house on ... | https://drive.google.com/open?id=12Fqi0YrnKfV9... | 1136 Spruce Street, Berkeley, CA, 94704 | One of many houses that’s currently being reno... | https://drive.google.com/open?id=189j-3sZAefhs... | ... | 1128 Spruce Street, Berkeley, CA, 94704 | https://drive.google.com/open?id=1Hn70fF7xfZh1... | 1133 Spruce Street, Berkeley, CA, 94704 | This house is located at the east side of Spru... | (37.8883409, -122.270922) | (37.8883409, -122.270922) | [37.8876024042097, -122.266919689902] | [37.8877886656757, -122.266948315416] | [37.8879082848895, -122.26673971951] | 4214 |
3 | https://drive.google.com/open?id=15rdCtKIgX-Fb... | 378 8th Street, Oakland, CA, 94607 | NaN | https://drive.google.com/open?id=1hMNbBE8pc9Q8... | 360 8th Street, Oakland, CA, 94607 | NaN | https://drive.google.com/open?id=1ykCymHyvIELa... | 376 8th Street, Oakland, CA, 94607 | NaN | https://drive.google.com/open?id=1rfVrj-7QZkbf... | ... | 800 Franklin Street, Oakland, CA, 94607 | NaN | NaN | NaN | [37.7995886695836, -122.272012934734] | [37.7994604111548, -122.27167993785] | [37.8020326, -122.2784424] | [37.799789, -122.272409] | NaN | 4030 |
4 | https://drive.google.com/open?id=1F1B-1oHlh9y6... | 1645 Hopkins Street, Berkeley, CA, 94707 | Unfortunately, this building was at the end of... | https://drive.google.com/open?id=1ycZ91fitTHwh... | 1226 Carlotta Avenue, Berkeley, CA, 94707 | For the stretch from the intersection of Carlo... | https://drive.google.com/open?id=1S_BJ03A64lP9... | 1647 Hopkins Street, Berkeley, CA, 94707 | NaN | https://drive.google.com/open?id=1a4rgJSw2qIMk... | ... | 1288 Carlotta Avenue, Berkeley, CA, 94707 | https://drive.google.com/open?id=1clCPXj-QBzMp... | 1611 Hopkins Street, Berkeley, CA, 94707 | This building had a mattress leaning on one of... | [37.88256465, -122.28065260328] | [37.8839914809146, -122.28048481675] | [37.8824167547207, -122.280525537024] | [37.8826557841063, -122.280673827285] | [37.8822196, -122.281236836644] | 4213 |
5 rows × 21 columns
Select columns for class_data and change some entries of establishments col
class_data = class_data.iloc[:,:17]
class_data["Types of Establishments"] = class_data["Types of Establishments"].str.replace('Bodega, deli, corner-store, convenience store', 'Bodega deli corner-store convenience store')
class_data["Types of Establishments"] = class_data["Types of Establishments"].str.replace('Payday lenders, check cashers, or pawn shops', 'Payday lenders check cashers or pawn shops')
class_data["Types of Establishments"] = class_data["Types of Establishments"].str.replace('Professional offices \(doctor, dentist, lawyer, accountant, real estate\)', 'Professional offices (doctor dentist lawyer accountant real estate)')
image_data.to_csv("data/image_data.csv", index=False)
class_data.to_csv("data/class_data.csv", index=False)