# Connect to Google Drive from google.colab import drive drive.mount("/content/gdrive") # Open and read the file file_content = open("/content/gdrive/MyDrive/datasets/house_prices.txt", "r").read() # Convert file lines to numbers prices = [] for line in file_content.strip().split('\n'): prices.append(float(line)) print(f"Loaded {len(prices)} house prices") print(f"First 5 prices: {prices[:5]}") from math import floor def get_mean(prices): total = sum(prices) count = len(prices) mean = total / count return mean def get_median(prices): sorted_prices = sorted(prices) count = len(sorted_prices) # If even if count % 2 == 0: idx_lo = count // 2 - 1 idx_hi = count // 2 median = (sorted_prices[idx_hi] + sorted_prices[idx_lo]) / 2 # If odd else: idx = count // 2 median = sorted_prices[idx] return median def get_dumb_median(prices): sorted_prices = sorted(prices) count = len(sorted_prices) median = sorted_prices[count // 2] return median def get_dumb_median_short_code(prices): return sorted(prices)[len(prices) // 2] # Looks a lot like median def get_quartile_q1(prices): sorted_prices = sorted(prices) count = len(sorted_prices) median = sorted_prices[count // 4] return median def get_percentile(prices, percent): sorted_prices = sorted(prices) count = len(sorted_prices) idx = floor((percent / 100) * count) return sorted_prices[idx] mean = get_mean(prices) print(f"Mean is: {mean}") median = get_median(prices) print(f"Median is: {median}") median = get_dumb_median(prices) print(f"Dumb Median is: {median}") q1 = get_quartile_q1(prices) print(f"Q1 is: {q1}") p25 = get_percentile(prices, 25) print(f"25th percentile is: {p25}") p50 = get_percentile(prices, 50) print(f"50th percentile is: {p50}") p95 = get_percentile(prices, 95) print(f"95th percentile is: {p95}") p5 = get_percentile(prices, 5) print(f"5th percentile is: {p5}") p0 = get_percentile(prices, 0) print(f"0th percentile is: {p0}") p99 = get_percentile(prices, 99) print(f"99th percentile is: {p99}") def find_percentile(my_house, prices): count = len(prices) count_below = 0 for price in prices: if price <= my_house: count_below += 1 return (count_below / count) * 100 my_house = 450000 my_house_percentile = find_percentile(my_house, prices) print(f"My house is in the {my_house_percentile:.2f}th percentile") def make_bar(size): bar = "" for i in range(size // 20): bar += "█" return bar bin_0_50k = 0 bin_50_100k = 0 bin_100_150k = 0 bin_150_200k = 0 bin_200_250k = 0 bin_250_300k = 0 bin_300_350k = 0 bin_350_400k = 0 bin_400_450k = 0 bin_450_500k = 0 bin_500_550k = 0 for price in prices: if price > 0 and price < 50000: bin_0_50k += 1 elif price >= 50000 and price < 100000: bin_50_100k += 1 elif price >= 100000 and price < 150000: bin_100_150k += 1 elif price >= 150000 and price < 200000: bin_150_200k += 1 elif price >= 200000 and price < 250000: bin_200_250k += 1 elif price >= 250000 and price < 300000: bin_250_300k += 1 elif price >= 300000 and price < 350000: bin_300_350k += 1 elif price >= 350000 and price < 400000: bin_350_400k += 1 elif price >= 400000 and price < 450000: bin_400_450k += 1 elif price >= 450000 and price < 500000: bin_450_500k += 1 elif price >= 500000 and price < 550000: bin_500_550k += 1 # Print the histogram print("California House Price Distribution:") print(f"$0-50k: {make_bar(bin_0_50k)}") print(f"$50-100k: {make_bar(bin_50_100k)}") print(f"$100-150k: {make_bar(bin_100_150k)}") print(f"$150-200k: {make_bar(bin_150_200k)}") print(f"$200-250k: {make_bar(bin_200_250k)}") print(f"$250-300k: {make_bar(bin_250_300k)}") print(f"$300-350k: {make_bar(bin_300_350k)}") print(f"$350-400k: {make_bar(bin_350_400k)}") print(f"$400-450k: {make_bar(bin_400_450k)}") print(f"$450-500k: {make_bar(bin_450_500k)}") print(f"$500-550k: {make_bar(bin_500_550k)}") # TODO: Print the other bins