#!/usr/bin/env python # coding: utf-8 # Open In Colab # # Finding Where to Thrive - Part V # ## Comparative Analysis # In this notebook, we will combine our FMR, Wage, CPI and Population data and perform comparative analysis. # # We will look to answer the following questions: # # 1. Have the percentage increases in median wages kept up with the percentage increases in rent and CPI? # # 2. Is there a correlation between wages and other variables like rent, CPI, and population? # # 3. Can we conclude that an increase in an area's population directly relates to an increase in living expenses based on our collected data? # In[ ]: import warnings warnings.simplefilter(action='ignore', category=FutureWarning) import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px import plotly.graph_objects as go import statsmodels.api as sm import statsmodels.formula.api as smf from statsmodels.formula.api import ols import fuzzywuzzy from fuzzywuzzy import fuzz from fuzzywuzzy import process plt.style.use('bmh') pd.set_option("display.max.columns", None) #change jupyter settings to see all columns pd.set_option("display.precision", 2) #change jupyter settings to only show two decimal places pd.set_option('display.float_format', str) # disable scientific notation # ### Data Wrangling # # In this section we'll merge our four cleaned data sets for the Fair Market Rents, occupational wage statistics, population, and the Consumer Price Index into one dataframe. # # First we'll read in our data. # In[ ]: # load cleaned data sets fmr = pd.read_csv('FWT_FMR_Wide.csv') wage = pd.read_csv('BLS Wage Data/FWT_Metro_OEWS_Total.csv') cpi = pd.read_csv('BLS CPI/FWT_CPI_Year.csv') pop = pd.read_csv('Population Data/FWT_Metro_Pop.csv') # Next we will convert the FMR values from monthly to yearly values, since our other data points all represent a yearly value. # In[ ]: # multiply monthly rent values to yearly values by multiplying by 12 fmr[['studio', 'one_bed', 'two_bed', 'three_bed', 'four_bed']] = fmr[['studio', 'one_bed', 'two_bed', 'three_bed', 'four_bed']].mul(12) # add column for average FMR for all residence types fmr['mean_fmr'] = fmr[['studio', 'one_bed', 'two_bed', 'three_bed', 'four_bed']].mean(axis=1) fmr.head() # Since there is a slight difference in some of the metro names between the fmr and wage dataframes (ie Twin Falls, ID' vs 'Twin Falls County, ID) we will use fuzzywuzzy to match and replace the metro_names from the wage dataframe with the metro_names from the fmr dataframe. We already mapped the fmr metro_names to our populaion dataframe in Part IV or our analysis. # In[ ]: # casting the metro_name column of both dataframes into lists fmr_names = list(fmr['metro_name'].unique()) wage_names = list(wage['metro_name'].unique()) # defining a function to return the match and similarity score of the fuzz.ratio() scorer. The function will take in a term(name), list of terms(list_names), # and a minimum similarity score(min_score) to return the match. def match_names(name, list_names, min_score=0): max_score = -1 max_name = '' for x in list_names: score = fuzz.ratio(name, x) if (score > min_score) & (score > max_score): max_name = x max_score = score return (max_name, max_score) # create a for loop to create a list of tuples with the first value being the name from the wage dataframe (name to replace) and the second value from # the fmr dataframe, casting the list of tuples as a dictionary. names = [] for x in wage_names: match = match_names(x, fmr_names, 50) if match[1] >= 50: name = (str(x), str(match[0])) names.append(name) name_dict = dict(names) name_dict # ##### # After adjusting the minimum similarity score, we were able to get the majority of the metro names mapped correctly, although we made manual corrections to a few. # In[ ]: name_dict = {'Anchorage, AK': 'Anchorage, AK', 'Fairbanks, AK': 'Fairbanks, AK', 'Anniston-Oxford-Jacksonville, AL': 'Anniston-Oxford-Jacksonville, AL', 'Auburn-Opelika, AL': 'Auburn-Opelika, AL', 'Birmingham-Hoover, AL': 'Birmingham-Hoover, AL', 'Daphne-Fairhope-Foley, AL': 'Daphne-Fairhope-Foley, AL', 'Decatur, AL': 'Decatur, AL', 'Dothan, AL': 'Dothan, AL', 'Florence-Muscle Shoals, AL': 'Florence-Muscle Shoals, AL', 'Gadsden, AL': 'Gadsden, AL', 'Huntsville, AL': 'Huntsville, AL', 'Mobile, AL': 'Mobile, AL', 'Montgomery, AL': 'Montgomery, AL', 'Tuscaloosa, AL': 'Tuscaloosa, AL', 'Fayetteville-Springdale-Rogers, AR-MO': 'Fayetteville-Springdale-Rogers, AR', 'Fort Smith, AR-OK': 'Fort Smith, AR-OK', 'Hot Springs, AR': 'Hot Springs, AR', 'Jonesboro, AR': 'Jonesboro, AR', 'Little Rock-North Little Rock-Conway, AR': 'Little Rock-North Little Rock-Conway, AR', 'Pine Bluff, AR': 'Pine Bluff, AR', 'Flagstaff, AZ': 'Flagstaff, AZ', 'Lake Havasu City-Kingman, AZ': 'Lake Havasu City-Kingman, AZ', 'Phoenix-Mesa-Scottsdale, AZ': 'Phoenix-Mesa-Scottsdale, AZ', 'Prescott, AZ': 'Prescott, AZ', 'Sierra Vista-Douglas, AZ': 'Sierra Vista-Douglas, AZ', 'Tucson, AZ': 'Tucson, AZ', 'Yuma, AZ': 'Yuma, AZ', 'Anaheim-Santa Ana-Irvine, CA': 'Santa Ana-Anaheim-Irvine, CA', 'Bakersfield, CA': 'Bakersfield, CA', 'Chico, CA': 'Chico, CA', 'El Centro, CA': 'El Centro, CA', 'Fresno, CA': 'Fresno, CA', 'Hanford-Corcoran, CA': 'Hanford-Corcoran, CA', 'Los Angeles-Long Beach-Glendale, CA': 'Los Angeles-Long Beach-Glendale, CA', 'Madera, CA': 'Madera, CA', 'Merced, CA': 'Merced, CA', 'Modesto, CA': 'Modesto, CA', 'Napa, CA': 'Napa, CA', 'Oakland-Hayward-Berkeley, CA': 'Oakland-Fremont, CA', 'Oxnard-Thousand Oaks-Ventura, CA': 'Oxnard-Thousand Oaks-Ventura, CA', 'Redding, CA': 'Redding, CA', 'Riverside-San Bernardino-Ontario, CA': 'Riverside-San Bernardino-Ontario, CA', 'Sacramento--Roseville--Arden-Arcade, CA': 'Sacramento-Roseville-Arden-Arcade, CA', 'Salinas, CA': 'Salinas, CA', 'San Diego-Carlsbad, CA': 'San Diego-Carlsbad, CA', 'San Francisco-Redwood City-South San Francisco, CA': 'San Francisco, CA', 'San Jose-Sunnyvale-Santa Clara, CA': 'San Jose-Sunnyvale-Santa Clara, CA', 'San Luis Obispo-Paso Robles-Arroyo Grande, CA': 'San Luis Obispo-Paso Robles-Arroyo Grande, CA', #'San Rafael, CA': 'Santa Rosa, CA', 'San Rafael, CA': 'San Rafael, CA', 'Santa Cruz-Watsonville, CA': 'Santa Cruz-Watsonville, CA', 'Santa Maria-Santa Barbara, CA': 'Santa Maria-Santa Barbara, CA', 'Santa Rosa, CA': 'Santa Rosa, CA', 'Stockton-Lodi, CA': 'Stockton-Lodi, CA', 'Vallejo-Fairfield, CA': 'Vallejo-Fairfield, CA', 'Visalia-Porterville, CA': 'Visalia-Porterville, CA', 'Yuba City, CA': 'Yuba City, CA', 'Boulder, CO': 'Boulder, CO', 'Colorado Springs, CO': 'Colorado Springs, CO', 'Denver-Aurora-Lakewood, CO': 'Denver-Aurora-Lakewood, CO', 'Fort Collins, CO': 'Fort Collins, CO', 'Grand Junction, CO': 'Grand Junction, CO', 'Greeley, CO': 'Greeley, CO', 'Pueblo, CO': 'Pueblo, CO', 'Bridgeport-Stamford-Norwalk, CT': 'Stamford-Norwalk, CT', 'Danbury, CT': 'Danbury, CT', 'Hartford-West Hartford-East Hartford, CT': 'Hartford-West Hartford-East Hartford, CT', 'New Haven, CT': 'New Haven-Meriden, CT', 'Norwich-New London-Westerly, CT-RI': 'Norwich-New London, CT', 'Waterbury, CT': 'Waterbury, CT', 'Washington-Arlington-Alexandria, DC-VA-MD-WV': 'Washington-Arlington-Alexandria, DC-VA-MD', 'Dover, DE': 'Dover, DE', #'Wilmington, DE-MD-NJ': 'Wilmington, NC', 'Wilmington, DE-MD-NJ': 'Wilmington, DE-MD-NJ', 'Cape Coral-Fort Myers, FL': 'Cape Coral-Fort Myers, FL', 'Crestview-Fort Walton Beach-Destin, FL': 'Crestview-Fort Walton Beach-Destin, FL', 'Deltona-Daytona Beach-Ormond Beach, FL': 'Deltona-Daytona Beach-Ormond Beach, FL', 'Fort Lauderdale-Pompano Beach-Deerfield Beach, FL': 'Fort Lauderdale, FL', 'Gainesville, FL': 'Gainesville, FL', 'Homosassa Springs, FL': 'Homosassa Springs, FL', 'Jacksonville, FL': 'Jacksonville, FL', 'Lakeland-Winter Haven, FL': 'Lakeland-Winter Haven, FL', 'Miami-Miami Beach-Kendall, FL': 'Miami-Miami Beach-Kendall, FL', 'Naples-Immokalee-Marco Island, FL': 'Naples-Immokalee-Marco Island, FL', 'North Port-Sarasota-Bradenton, FL': 'North Port-Sarasota-Bradenton, FL', 'Ocala, FL': 'Ocala, FL', 'Orlando-Kissimmee-Sanford, FL': 'Orlando-Kissimmee-Sanford, FL', 'Palm Bay-Melbourne-Titusville, FL': 'Palm Bay-Melbourne-Titusville, FL', #'Panama City, FL': 'Palm Coast, FL', 'Panama City, FL': 'Panama City, FL', 'Pensacola-Ferry Pass-Brent, FL': 'Pensacola-Ferry Pass-Brent, FL', 'Port St. Lucie, FL': 'Port St. Lucie, FL', 'Punta Gorda, FL': 'Punta Gorda, FL', 'Sebastian-Vero Beach, FL': 'Sebastian-Vero Beach, FL', 'Sebring, FL': 'Sebring, FL', 'Tallahassee, FL': 'Tallahassee, FL', 'Tampa-St. Petersburg-Clearwater, FL': 'Tampa-St. Petersburg-Clearwater, FL', 'The Villages, FL': 'The Villages, FL', 'West Palm Beach-Boca Raton-Delray Beach, FL': 'West Palm Beach-Boca Raton, FL', 'Albany, GA': 'Albany, GA', 'Athens-Clarke County, GA': 'Athens-Clarke County, GA', 'Atlanta-Sandy Springs-Roswell, GA': 'Atlanta-Sandy Springs-Roswell, GA', 'Augusta-Richmond County, GA-SC': 'Augusta-Richmond County, GA-SC', 'Brunswick, GA': 'Brunswick, GA', 'Columbus, GA-AL': 'Columbus, GA-AL', 'Dalton, GA': 'Dalton, GA', 'Gainesville, GA': 'Gainesville, GA', 'Hinesville, GA': 'Hinesville, GA', 'Macon, GA': 'Macon, GA', 'Rome, GA': 'Rome, GA', 'Savannah, GA': 'Savannah, GA', 'Valdosta, GA': 'Valdosta, GA', 'Warner Robins, GA': 'Warner Robins, GA', 'Kahului-Wailuku-Lahaina, HI': 'Kahului-Wailuku-Lahaina, HI', 'Urban Honolulu, HI': 'Urban Honolulu, HI', 'Ames, IA': 'Ames, IA', 'Cedar Rapids, IA': 'Cedar Rapids, IA', 'Davenport-Moline-Rock Island, IA-IL': 'Davenport-Moline-Rock Island, IA-IL', 'Des Moines-West Des Moines, IA': 'Des Moines-West Des Moines, IA', 'Dubuque, IA': 'Dubuque, IA', 'Iowa City, IA': 'Iowa City, IA', 'Sioux City, IA-NE-SD': 'Sioux City, IA-NE-SD', 'Waterloo-Cedar Falls, IA': 'Waterloo-Cedar Falls, IA', 'Boise City, ID': 'Boise City, ID', "Coeur d'Alene, ID": "Coeur d'Alene, ID", 'Idaho Falls, ID': 'Idaho Falls, ID', 'Lewiston, ID-WA': 'Lewiston, ID-WA', 'Pocatello, ID': 'Pocatello, ID', 'Bloomington, IL': 'Bloomington, IL', #'Carbondale-Marion, IL': 'Canton-Massillon, OH', 'Carbondale-Marion, IL': 'Carbondale-Marion, IL', 'Champaign-Urbana, IL': 'Champaign-Urbana, IL', 'Chicago-Naperville-Arlington Heights, IL': 'Chicago-Joliet-Naperville, IL', 'Danville, IL': 'Danville, IL', 'Decatur, IL': 'Decatur, IL', #'Elgin, IL': 'Flint, MI', 'Elgin, IL': 'Elgin, IL', 'Kankakee, IL': 'Kankakee, IL', 'Lake County-Kenosha County, IL-WI': 'Kenosha County, WI', 'Peoria, IL': 'Peoria, IL', 'Rockford, IL': 'Rockford, IL', 'Springfield, IL': 'Springfield, IL', 'Bloomington, IN': 'Bloomington, IN', 'Columbus, IN': 'Columbus, IN', 'Elkhart-Goshen, IN': 'Elkhart-Goshen, IN', 'Evansville, IN-KY': 'Evansville, IN-KY', 'Fort Wayne, IN': 'Fort Wayne, IN', 'Gary, IN': 'Gary, IN', 'Indianapolis-Carmel-Anderson, IN': 'Indianapolis-Carmel, IN', 'Kokomo, IN': 'Kokomo, IN', 'Lafayette-West Lafayette, IN': 'Lafayette-West Lafayette, IN', 'Michigan City-La Porte, IN': 'Michigan City-La Porte, IN', 'Muncie, IN': 'Muncie, IN', 'South Bend-Mishawaka, IN-MI': 'South Bend-Mishawaka, IN', 'Terre Haute, IN': 'Terre Haute, IN', 'Lawrence, KS': 'Lawrence, KS', 'Manhattan, KS': 'Manhattan, KS', 'Topeka, KS': 'Topeka, KS', 'Wichita, KS': 'Wichita, KS', 'Bowling Green, KY': 'Bowling Green, KY', 'Elizabethtown-Fort Knox, KY': 'Elizabethtown, KY', 'Lexington-Fayette, KY': 'Lexington-Fayette, KY', 'Louisville/Jefferson County, KY-IN': 'Louisville, KY-IN', 'Owensboro, KY': 'Owensboro, KY', 'Alexandria, LA': 'Alexandria, LA', 'Baton Rouge, LA': 'Baton Rouge, LA', 'Hammond, LA': 'Hammond, LA', 'Houma-Thibodaux, LA': 'Houma-Thibodaux, LA', 'Lafayette, LA': 'Lafayette, LA', 'Lake Charles, LA': 'Lake Charles, LA', 'Monroe, LA': 'Monroe, LA', 'New Orleans-Metairie, LA': 'New Orleans-Metairie, LA', 'Shreveport-Bossier City, LA': 'Shreveport-Bossier City, LA', 'Barnstable Town, MA': 'Barnstable Town, MA', 'Boston-Cambridge-Newton, MA': 'Boston-Cambridge-Quincy, MA-NH', #'Brockton-Bridgewater-Easton, MA': 'Allentown-Bethlehem-Easton, PA', 'Brockton-Bridgewater-Easton, MA': 'Brockton-Bridgewater-Easton, MA', #'Framingham, MA': 'Farmington, NM', 'Framingham, MA': 'Framingham, MA', 'Lawrence-Methuen Town-Salem, MA-NH': 'Lawrence, MA-NH', 'Leominster-Gardner, MA': 'Fitchburg-Leominster, MA', 'New Bedford, MA': 'New Bedford, MA', #'Peabody-Salem-Beverly, MA': 'Seattle-Bellevue, WA', 'Peabody-Salem-Beverly, MA': 'Peabody-Salem-Beverly, MA', 'Pittsfield, MA': 'Pittsfield, MA', 'Springfield, MA-CT': 'Springfield, MA', 'Taunton-Middleborough-Norton, MA': 'Taunton-Mansfield-Norton, MA', 'Worcester, MA-CT': 'Worcester, MA', 'Baltimore-Columbia-Towson, MD': 'Baltimore-Columbia-Towson, MD', 'California-Lexington Park, MD': 'California-Lexington Park, MD', 'Cumberland, MD-WV': 'Cumberland, MD-WV', 'Hagerstown-Martinsburg, MD-WV': 'Martinsburg, WV', 'Salisbury, MD-DE': 'Salisbury, MD', 'Bangor, ME': 'Bangor, ME', 'Lewiston-Auburn, ME': 'Lewiston-Auburn, ME', 'Portland-South Portland, ME': 'Portland, ME', 'Ann Arbor, MI': 'Ann Arbor, MI', 'Battle Creek, MI': 'Battle Creek, MI', 'Bay City, MI': 'Bay City, MI', 'Detroit-Dearborn-Livonia, MI': 'Detroit-Warren-Livonia, MI', 'Flint, MI': 'Flint, MI', 'Grand Rapids-Wyoming, MI': 'Grand Rapids-Wyoming, MI', 'Jackson, MI': 'Jackson, MI', 'Kalamazoo-Portage, MI': 'Kalamazoo-Portage, MI', 'Lansing-East Lansing, MI': 'Lansing-East Lansing, MI', 'Midland, MI': 'Midland, MI', 'Monroe, MI': 'Monroe, MI', 'Muskegon, MI': 'Muskegon, MI', 'Niles-Benton Harbor, MI': 'Niles-Benton Harbor, MI', 'Saginaw, MI': 'Saginaw, MI', #'Warren-Troy-Farmington Hills, MI': 'Farmington, NM', 'Warren-Troy-Farmington Hills, MI': 'Warren-Troy-Farmington Hills, MI', 'Duluth, MN-WI': 'Duluth, MN-WI', 'Mankato-North Mankato, MN': 'Mankato-North Mankato, MN', 'Minneapolis-St. Paul-Bloomington, MN-WI': 'Minneapolis-St. Paul-Bloomington, MN-WI', 'Rochester, MN': 'Rochester, MN', 'St. Cloud, MN': 'St. Cloud, MN', 'Cape Girardeau, MO-IL': 'Cape Girardeau, MO-IL', 'Columbia, MO': 'Columbia, MO', 'Jefferson City, MO': 'Jefferson City, MO', 'Joplin, MO': 'Joplin, MO', 'Kansas City, MO-KS': 'Kansas City, MO-KS', 'St. Joseph, MO-KS': 'St. Joseph, MO-KS', 'St. Louis, MO-IL': 'St. Louis, MO-IL', 'Springfield, MO': 'Springfield, MO', 'Gulfport-Biloxi-Pascagoula, MS': 'Gulfport-Biloxi, MS', 'Hattiesburg, MS': 'Hattiesburg, MS', 'Jackson, MS': 'Jackson, MS', 'Billings, MT': 'Billings, MT', 'Great Falls, MT': 'Great Falls, MT', 'Missoula, MT': 'Missoula, MT', 'Asheville, NC': 'Asheville, NC', 'Burlington, NC': 'Burlington, NC', 'Charlotte-Concord-Gastonia, NC-SC': 'Charlotte-Concord-Gastonia, NC-SC', 'Durham-Chapel Hill, NC': 'Durham-Chapel Hill, NC', 'Fayetteville, NC': 'Fayetteville, NC', 'Goldsboro, NC': 'Goldsboro, NC', 'Greensboro-High Point, NC': 'Greensboro-High Point, NC', 'Greenville, NC': 'Greenville, NC', 'Hickory-Lenoir-Morganton, NC': 'Hickory-Lenoir-Morganton, NC', 'Jacksonville, NC': 'Jacksonville, NC', #'New Bern, NC': 'New Bedford, MA', 'New Bern, NC': 'New Bern, NC', 'Raleigh, NC': 'Raleigh, NC', 'Rocky Mount, NC': 'Rocky Mount, NC', 'Wilmington, NC': 'Wilmington, NC', 'Winston-Salem, NC': 'Winston-Salem, NC', 'Bismarck, ND': 'Bismarck, ND', 'Fargo, ND-MN': 'Fargo, ND-MN', 'Grand Forks, ND-MN': 'Grand Forks, ND-MN', #'Grand Island, NE': 'Grand Forks, ND-MN', 'Grand Island, NE': 'Grand Island, NE', 'Lincoln, NE': 'Lincoln, NE', 'Omaha-Council Bluffs, NE-IA': 'Omaha-Council Bluffs, NE-IA', 'Dover-Durham, NH-ME': 'Dover, DE', 'Manchester, NH': 'Manchester, NH', 'Nashua, NH-MA': 'Nashua, NH', 'Portsmouth, NH-ME': 'Portsmouth-Rochester, NH', 'Atlantic City-Hammonton, NJ': 'Atlantic City-Hammonton, NJ', #'Camden, NJ': 'Gadsden, AL', 'Camden, NJ': 'Camden, NJ', 'Newark, NJ-PA': 'Newark, NJ', 'Ocean City, NJ': 'Ocean City, NJ', 'Trenton, NJ': 'Trenton, NJ', 'Vineland-Bridgeton, NJ': 'Vineland-Bridgeton, NJ', 'Albuquerque, NM': 'Albuquerque, NM', 'Farmington, NM': 'Farmington, NM', 'Las Cruces, NM': 'Las Cruces, NM', 'Santa Fe, NM': 'Santa Fe, NM', 'Carson City, NV': 'Carson City, NV', 'Las Vegas-Henderson-Paradise, NV': 'Las Vegas-Henderson-Paradise, NV', 'Reno, NV': 'Reno, NV', 'Albany-Schenectady-Troy, NY': 'Albany-Schenectady-Troy, NY', 'Binghamton, NY': 'Binghamton, NY', 'Buffalo-Cheektowaga-Niagara Falls, NY': 'Buffalo-Cheektowaga-Niagara Falls, NY', #'Dutchess County-Putnam County, NY': 'Putnam County, IN', 'Dutchess County-Putnam County, NY': 'Dutchess County-Putnam County, NY', 'Elmira, NY': 'Elmira, NY', 'Glens Falls, NY': 'Glens Falls, NY', 'Ithaca, NY': 'Ithaca, NY', 'Kingston, NY': 'Kingston, NY', 'Nassau County-Suffolk County, NY': 'Nassau-Suffolk, NY', 'New York-Jersey City-White Plains, NY-NJ': 'Jersey City, NJ', 'Rochester, NY': 'Rochester, NY', 'Syracuse, NY': 'Syracuse, NY', 'Utica-Rome, NY': 'Utica-Rome, NY', 'Watertown-Fort Drum, NY': 'Watertown-Fort Drum, NY', 'Akron, OH': 'Akron, OH', 'Canton-Massillon, OH': 'Canton-Massillon, OH', 'Cincinnati, OH-KY-IN': 'Cincinnati, OH-KY-IN', 'Cleveland-Elyria, OH': 'Cleveland-Elyria, OH', 'Columbus, OH': 'Columbus, OH', 'Dayton, OH': 'Dayton, OH', 'Lima, OH': 'Lima, OH', 'Mansfield, OH': 'Mansfield, OH', 'Springfield, OH': 'Springfield, OH', 'Toledo, OH': 'Toledo, OH', 'Youngstown-Warren-Boardman, OH-PA': 'Youngstown-Warren-Boardman, OH', 'Enid, OK': 'Enid, OK', 'Lawton, OK': 'Lawton, OK', 'Oklahoma City, OK': 'Oklahoma City, OK', 'Tulsa, OK': 'Tulsa, OK', 'Albany, OR': 'Albany, OR', 'Bend-Redmond, OR': 'Bend-Redmond, OR', 'Corvallis, OR': 'Corvallis, OR', 'Eugene, OR': 'Eugene-Springfield, OR', 'Grants Pass, OR': 'Grants Pass, OR', 'Medford, OR': 'Medford, OR', 'Portland-Vancouver-Hillsboro, OR-WA': 'Portland-Vancouver-Hillsboro, OR-WA', 'Salem, OR': 'Salem, OR', 'Allentown-Bethlehem-Easton, PA-NJ': 'Allentown-Bethlehem-Easton, PA', 'Altoona, PA': 'Altoona, PA', 'Bloomsburg-Berwick, PA': 'Harrisburg-Carlisle, PA', 'Chambersburg-Waynesboro, PA': 'Chambersburg-Waynesboro, PA', 'East Stroudsburg, PA': 'East Stroudsburg, PA', 'Erie, PA': 'Erie, PA', 'Gettysburg, PA': 'Gettysburg, PA', 'Harrisburg-Carlisle, PA': 'Harrisburg-Carlisle, PA', 'Johnstown, PA': 'Johnstown, PA', 'Lancaster, PA': 'Lancaster, PA', 'Lebanon, PA': 'Lebanon, PA', #'Montgomery County-Bucks County-Chester County, PA': 'Western Worcester County, MA', 'Montgomery County-Bucks County-Chester County, PA': 'Montgomery County-Bucks County-Chester County, PA', 'Philadelphia, PA': 'Williamsport, PA', 'Pittsburgh, PA': 'Pittsburgh, PA', 'Reading, PA': 'Reading, PA', 'Scranton--Wilkes-Barre--Hazleton, PA': 'Scranton-Wilkes-Barre, PA', 'State College, PA': 'State College, PA', 'Williamsport, PA': 'Williamsport, PA', 'York-Hanover, PA': 'York-Hanover, PA', 'Providence-Warwick, RI-MA': 'Providence-Fall River, RI-MA', 'Charleston-North Charleston, SC': 'Charleston-North Charleston, SC', 'Columbia, SC': 'Columbia, SC', 'Florence, SC': 'Florence, SC', 'Greenville-Anderson-Mauldin, SC': 'Greenville-Mauldin-Easley, SC', 'Myrtle Beach-Conway-North Myrtle Beach, SC-NC': 'Myrtle Beach-North Myrtle Beach-Conway, SC', 'Spartanburg, SC': 'Spartanburg, SC', 'Sumter, SC': 'Sumter, SC', 'Rapid City, SD': 'Rapid City, SD', 'Sioux Falls, SD': 'Sioux Falls, SD', 'Chattanooga, TN-GA': 'Chattanooga, TN-GA', 'Clarksville, TN-KY': 'Clarksville, TN-KY', 'Cleveland, TN': 'Cleveland, TN', 'Jackson, TN': 'Jackson, TN', 'Johnson City, TN': 'Johnson City, TN', 'Kingsport-Bristol-Bristol, TN-VA': 'Kingsport-Bristol-Bristol, TN-VA', 'Knoxville, TN': 'Knoxville, TN', 'Memphis, TN-MS-AR': 'Memphis, TN-MS-AR', 'Morristown, TN': 'Morristown, TN', 'Nashville-Davidson--Murfreesboro--Franklin, TN': 'Nashville-Davidson-Murfreesboro-Franklin, TN', 'Abilene, TX': 'Abilene, TX', 'Amarillo, TX': 'Amarillo, TX', 'Austin-Round Rock, TX': 'Austin-Round Rock, TX', 'Beaumont-Port Arthur, TX': 'Beaumont-Port Arthur, TX', 'Brownsville-Harlingen, TX': 'Brownsville-Harlingen, TX', 'College Station-Bryan, TX': 'College Station-Bryan, TX', 'Corpus Christi, TX': 'Corpus Christi, TX', 'Dallas-Plano-Irving, TX': 'Dallas, TX', 'El Paso, TX': 'El Paso, TX', 'Fort Worth-Arlington, TX': 'Fort Worth-Arlington, TX', 'Houston-The Woodlands-Sugar Land, TX': 'Houston-The Woodlands-Sugar Land, TX', 'Killeen-Temple, TX': 'Killeen-Temple, TX', 'Laredo, TX': 'Laredo, TX', 'Longview, TX': 'Longview, TX', 'Lubbock, TX': 'Lubbock, TX', 'McAllen-Edinburg-Mission, TX': 'McAllen-Edinburg-Mission, TX', 'Midland, TX': 'Midland, TX', 'Odessa, TX': 'Odessa, TX', 'San Angelo, TX': 'San Angelo, TX', 'San Antonio-New Braunfels, TX': 'San Antonio-New Braunfels, TX', 'Sherman-Denison, TX': 'Sherman-Denison, TX', 'Texarkana, TX-AR': 'Texarkana, TX-Texarkana, AR', 'Tyler, TX': 'Tyler, TX', 'Victoria, TX': 'Victoria, TX', 'Waco, TX': 'Waco, TX', 'Wichita Falls, TX': 'Wichita Falls, TX', 'Logan, UT-ID': 'Logan, UT-ID', 'Ogden-Clearfield, UT': 'Ogden-Clearfield, UT', 'Provo-Orem, UT': 'Provo-Orem, UT', 'St. George, UT': 'St. George, UT', 'Salt Lake City, UT': 'Salt Lake City, UT', 'Blacksburg-Christiansburg-Radford, VA': 'Blacksburg-Christiansburg-Radford, VA', 'Charlottesville, VA': 'Charlottesville, VA', 'Harrisonburg, VA': 'Harrisonburg, VA', 'Lynchburg, VA': 'Lynchburg, VA', 'Richmond, VA': 'Richmond, VA', 'Roanoke, VA': 'Roanoke, VA', 'Staunton-Waynesboro, VA': 'Staunton-Waynesboro, VA', 'Virginia Beach-Norfolk-Newport News, VA-NC': 'Virginia Beach-Norfolk-Newport News, VA-NC', 'Winchester, VA-WV': 'Winchester, VA-WV', 'Burlington-South Burlington, VT': 'Burlington-South Burlington, VT', 'Bellingham, WA': 'Bellingham, WA', 'Bremerton-Silverdale, WA': 'Bremerton-Silverdale, WA', 'Kennewick-Richland, WA': 'Kennewick-Richland, WA', 'Longview, WA': 'Longview, WA', 'Mount Vernon-Anacortes, WA': 'Mount Vernon-Anacortes, WA', 'Olympia-Tumwater, WA': 'Olympia-Tumwater, WA', 'Seattle-Bellevue-Everett, WA': 'Seattle-Bellevue, WA', 'Spokane-Spokane Valley, WA': 'Spokane, WA', 'Tacoma-Lakewood, WA': 'Tacoma, WA', 'Walla Walla, WA': 'Walla Walla County, WA', 'Wenatchee, WA': 'Wenatchee, WA', 'Yakima, WA': 'Yakima, WA', 'Appleton, WI': 'Appleton, WI', 'Eau Claire, WI': 'Eau Claire, WI', 'Fond du Lac, WI': 'Fond du Lac, WI', 'Green Bay, WI': 'Green Bay, WI', 'Janesville-Beloit, WI': 'Janesville-Beloit, WI', 'La Crosse-Onalaska, WI-MN': 'La Crosse-Onalaska, WI-MN', 'Madison, WI': 'Madison, WI', 'Milwaukee-Waukesha-West Allis, WI': 'Milwaukee-Waukesha-West Allis, WI', 'Oshkosh-Neenah, WI': 'Oshkosh-Neenah, WI', 'Racine, WI': 'Racine, WI', 'Sheboygan, WI': 'Sheboygan, WI', 'Wausau, WI': 'Wausau, WI', #'Beckley, WV': 'Greeley, CO', 'Beckley, WV': 'Beckley, WV', 'Charleston, WV': 'Charleston, WV', 'Huntington-Ashland, WV-KY-OH': 'Huntington-Ashland, WV-KY-OH', 'Morgantown, WV': 'Morgantown, WV', 'Parkersburg-Vienna, WV': 'Parkersburg-Vienna, WV', 'Weirton-Steubenville, WV-OH': 'Weirton-Steubenville, WV-OH', 'Wheeling, WV-OH': 'Wheeling, WV-OH', 'Casper, WY': 'Casper, WY', 'Cheyenne, WY': 'Cheyenne, WY', 'Los Angeles-Long Beach-Anaheim, CA': 'Los Angeles-Long Beach-Glendale, CA', 'San Francisco-Oakland-Hayward, CA': 'San Francisco, CA', 'Miami-Fort Lauderdale-West Palm Beach, FL': 'Fort Lauderdale, FL', 'Chicago-Naperville-Elgin, IL-IN-WI': 'Chicago-Joliet-Naperville, IL', 'Boston-Cambridge-Nashua, MA-NH': 'Boston-Cambridge-Quincy, MA-NH', 'Detroit-Warren-Dearborn, MI': 'Detroit-Warren-Livonia, MI', 'New York-Newark-Jersey City, NY-NJ-PA': 'Jersey City, NJ', 'Philadelphia-Camden-Wilmington, PA-NJ-DE-MD': 'Philadelphia-Camden-Wilmington, PA-NJ-DE-MD', 'Dallas-Fort Worth-Arlington, TX': 'Fort Worth-Arlington, TX', 'Seattle-Tacoma-Bellevue, WA': 'Seattle-Bellevue, WA', 'Twin Falls, ID': 'Twin Falls County, ID'} # ##### # In[ ]: # map fmr metro_names to wage dataframe wage.replace({'metro_name': name_dict},inplace=True) # Now we'll merge the fmr, wage, and population data sets. We will only use the median wage for our analysis. # In[ ]: # merge fmr and wage data, drop duplicate values and unused columns df = fmr.merge(wage.query("wage_type == 'a_median'"), on=['year','metro_name'])\ .drop(columns=['county_name', 'st_county', 'occ_title', 'tot_emp', 'wage_type'])\ .drop_duplicates(keep='first') # In[ ]: df.sample(10) # In[ ]: # sum population data by year and metro area pop = pop.groupby(['year', 'metro_name']).agg('sum').reset_index() # In[ ]: # merge in population data with our dataframe containing fmr and wage data df = df.merge(pop, on=['year', 'metro_name']) df # Next, we'll map each state to its respective CPI region using a mapping document created in Excel and the same abbreviation to state dictionary that we used in Part IV of our analysis. # In[ ]: # read in our CPI region mapping document cpi_map = pd.read_excel('BLS CPI/BLS Region Mapping.xlsx') cpi_map.head() # In[ ]: # create state abbreviation to state name dictionary abbrev_state = { 'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland', 'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina', 'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming', 'DC': 'District of Columbia', } # create inverse, state name to state abbreviation dictionary state_abbrev = {v: k for k, v in abbrev_state.items()} # In[ ]: # map state abbreviations from dictionary to create state_alpha column in the CPI mapping cpi_map['state_alpha'] = cpi_map['states'].map(state_abbrev) # In[ ]: cpi_map.head(5) # In[ ]: # create dictionary from cpi_map dataframe region_map = dict(zip(cpi_map.state_alpha, cpi_map.region)) # In[ ]: # create a region column based on state alpha df['region'] = df['state_alpha'].map(region_map) # In[ ]: df # Now we'll pivot the CPI data, update the column names, # and merge it to the dataframe. # In[ ]: # pivot cpi dataframe to change to long format cpi = cpi.pivot(index =['region','year'], columns ='category', values ='value') # In[ ]: cpi # In[ ]: # update cpi column names cpi = cpi.rename(columns={'All items':'all_items','All items less food and energy':'all_items_less_food_energy', 'Energy':'energy', 'Food':'food', 'Shelter': 'shelter'}) cpi # In[ ]: # merge cpi data with our df df = df.merge(cpi, on=['year', 'region']) df # In[ ]: # check for null values df.isna().sum() # In[ ]: # check for duplicate rows df.duplicated().sum() # ### Exploratory Data Analysis # Now that we have all of our data in one dataframe, we'll complete some multivariate analysis. First we will add a column to the dataframe that gives what percentage of the median wage the median FMR is (the median FMR is represented by the 2-bedroom rent), and another that labels if the precentage is greater than 30%. Remember, the definition of being cost burdened is paying more than 30% of income on housing. # In[ ]: # create copy of dataframe df_cb = df.copy() # add column for median FMR/wage percentage df_cb['housing_pct'] = df_cb['two_bed']/df_cb['wage'] # add column to identify if housing percentage is greater than 30% df_cb['cost_burdened'] = df_cb['housing_pct'] > .30 # check head of data df_cb.head() # In[ ]: fig = px.histogram(df_cb, x='cost_burdened', facet_col='year', color='cost_burdened', color_discrete_sequence=['red', 'gray'], opacity=0.6, title='Count of "Cost Burdened" Metro Areas: 2017 - 2021', labels={'count':'', 'cost_burdened':''}) fig.show() # We can see that for each year, over a third of the total metro area have a median FMR that exceeds 30% of the median wage. Now we will plot the number of "cost-burdened" metro areas belonging to each state in 2021. # In[ ]: fig = px.bar(df_cb.query("cost_burdened == True & year == 2021") .sort_values(by='state_alpha'), x='state_alpha', hover_data=['metro_name'], title='Count of "Cost Burdened" Metro Areas by State: 2021', labels={'count':'', 'state_alpha':''}) fig.show() # We can see that California, Florida, and Texas have the most "cost_burdened" metro areas. Let's look at the list of metro areas for each of the three states. # In[ ]: df_cb.query("cost_burdened == True & year == 2021 & state_alpha == 'CA'")[['state_alpha', 'metro_name']].reset_index() # In[ ]: df_cb.query("cost_burdened == True & year == 2021 & state_alpha == 'FL'")[['state_alpha', 'metro_name']].reset_index() # In[ ]: df_cb.query("cost_burdened == True & year == 2021 & state_alpha == 'TX'")[['state_alpha', 'metro_name']].reset_index() # Next, we will calculate the 2017 to 2021 percentage changes for each of our values. # In[ ]: # create copy of df, set index as our non-numerical values df_diff = df.set_index(['year','metro_name', 'state_alpha', 'region']).copy() # sort values by metro name and year df_diff = df_diff.sort_values(by=['metro_name', 'year']) # calcualte percent difference from 2017 to 2021 df_diff = df_diff.groupby('metro_name')\ .pct_change(periods=4).mul(100) # drop null rows and year column df_diff = df_diff[~df_diff.isna().any(axis=1)]\ .reset_index()\ .drop(columns='year') # convert to long format df_diff = df_diff\ .melt(id_vars=['state_alpha','metro_name', 'region'], var_name='type', value_name='pct_change') df_diff.head(10) # Now well plot our percentage values for select areas. # In[ ]: fig = px.bar(df_diff.query("metro_name == 'San Francisco, CA'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: San Francisco', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Riverside-San Bernardino-Ontario, CA'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Riverside-San Bernardino-Ontario, CA', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Orlando-Kissimmee-Sanford, FL'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Orlando-Kissimmee-Sanford, FL', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Fort Lauderdale, FL'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Fort Lauderdale, FL', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Houston-The Woodlands-Sugar Land, TX'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Houston-The Woodlands-Sugar Land, TX', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Waco, TX'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Waco, TX', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Montgomery, AL'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Montgomery, AL', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Idaho Falls, ID'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Idaho Falls, ID', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: fig = px.bar(df_diff.query("metro_name == 'Decatur, IL'"),\ x='type',\ y='pct_change',\ text_auto='.3s',\ color='type',\ height=500, title='2017 to 2021 Percentage Changes: Decatur, IL', labels={'pct_change':'Percent', 'type':''}) fig.update_yaxes(ticksuffix="%") fig.show() # In[ ]: df_cb.query("cost_burdened == False & year == 2021 & state_alpha == 'IL'")[['state_alpha', 'metro_name']].reset_index() # For each of the metro areas that we plotted, we can see that the fair market rents for a studio or one bedroom residence have the largest increase among the FMRs, and Energy consistently had the largest increase among the consumer price index increases. # Next we will begin to look at the relationships between variables using scatterplots, correlation heatmaps, and linear regression using Statsmodels. # In[ ]: fig = px.scatter(df, x='wage', y='two_bed', color='year', trendline='ols', hover_data=['wage', 'mean_fmr', 'year', 'metro_name'], labels={'two_bed':'Median FMR', 'wage':'Median Wage'}, title='Medan Annual FMR vs Median Annual Wage') fig.show() # In[ ]: fig = px.scatter(df, x='wage', y='shelter', color='year', trendline='ols', hover_data=['wage', 'shelter', 'year', 'metro_name'], title='Shelter CPI vs Median Annual Wage', labels={'shelter':'Shelter', 'wage':'Median Wage'}) fig.show() # In[ ]: fig = px.scatter(df, x='wage', y='food', color='year', trendline='ols', hover_data=['wage', 'food', 'year', 'metro_name'], title='Food CPI vs Median Annual Wage', labels={'food':'Food', 'wage':'Median Wage'}) fig.show() # In[ ]: fig = px.scatter(df, x='wage', y='energy', color='year', trendline='ols', hover_data=['wage', 'energy', 'year', 'metro_name'], title='Energy CPI vs Median Annual Wage', labels={'energy':'Energy CPI', 'wage':'Median Wage'}) fig.show() # In[ ]: fig = px.scatter(df, x='wage', y='all_items', color='year', trendline='ols', hover_data=['wage', 'all_items', 'year', 'metro_name'], title='All Items CPI vs Median Annual Wage', labels={'all_itsems':'All Items CPI', 'wage':'Median Wage'}) fig.show() # Now we will create correlation heatmaps for each of our four regions. # In[ ]: corr = df.query("region == 'Northeast'").corr() ax = sns.heatmap(corr,cmap='crest', annot=True, fmt='.1g', linewidth=1, linecolor='w', square=True) ax.set(title = "Correlation Heatmap: Northeast Region") plt.show() # In[ ]: corr = df.query("region == 'South'").corr() ax = sns.heatmap(corr,cmap='flare', annot=True, fmt='.1g', linewidth=1, linecolor='w', square=True) ax.set(title = "Correlation Heatmap: South Region") plt.show() # In[ ]: corr = df.query("region == 'West'").corr() ax = sns.heatmap(corr,cmap='viridis_r', annot=True, fmt='.1g', linewidth=1, linecolor='w', square=True) ax.set(title = "Correlation Heatmap: Western Region") plt.show() # In[ ]: corr = df.query("region == 'Midwest'").corr() ax = sns.heatmap(corr,cmap='mako_r', annot=True, fmt='.1g', linewidth=1, linecolor='w', square=True) ax.set(title = "Correlation Heatmap: Midwest Region") plt.show() # Now we will use Statsmodels to perform simple linear regressions. For these models, we will assign wage as our dependent variable. Our scatterplots and correlation heatmaps showed wage had a higher correlation to the FMRs than it did to population or the CPIs. We can test this again by reviewing the OLS regression results. # In[ ]: formula1 = 'mean_fmr ~ population' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula7 = 'wage ~ mean_fmr' model7 = smf.ols(formula = formula7, data = df) fitted7 = model7.fit() fitted7.summary() # In[ ]: formula1 = 'wage ~ all_items' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ food' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ shelter' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ energy' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # Based on the above regression summaries, we can see that the mean FMR explains a larger percentage of the variance in the wage than can be explained by the other independent variables. Additionally, the P-values for the intercepts of the Food and All Items CPIs are greater than 0.05, indicating that they may not have a statistically significant relationship to the median wage. # # Now we will perform multiple regressions. # ### Multiple Regression # In[ ]: formula1 = 'wage ~ shelter + food' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ shelter + food + energy' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ shelter + food + energy + all_items_less_food_energy' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ shelter + food + energy + all_items_less_food_energy + population' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ shelter + food + energy + all_items_less_food_energy + population + two_bed + year' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # In[ ]: formula1 = 'wage ~ shelter + food + energy + all_items_less_food_energy + population + two_bed + year' model1 = smf.ols(formula = formula1, data = df) fitted1 = model1.fit() fitted1.summary() # ### Insights # **1. Have the percentage increases in median wages kept up with the percentage increases in rent and CPI?** # # In short, it depends on the area. For some areas that we plotted, like Fort Lauderdale, FL or San Francisco, CA, we can see that the median wage increase is well below the increases of the FMRs and consumer price indexes. Other areas like Decatur, IL and Rochester, NY have seen greater increases in the median wage. A point to consider however, is that the Fair Market Rent data provided by HUD may differ from actual market rent data that can be found directly on property listings in some cities. # # **2. Is there a correlation between wages and other variables like rent, CPI, and population?** # # There appears to be a correlation between wages and the other variables, but the strongest correlation is between wages and fair market rents. # # **3. Can we conclude that an increase in an area's population directly relates to an increase in living expenses based on our collected data?** # # Again, it depends largely on the area. In some areas, like those in the southern region, we see a higher correlation between the median FMR and population. In other areas population seems to have little to no effect on living expenses.