NOTE: make sure, to run this notebook, you have installed rtree
.
%matplotlib inline
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely.geometry as geoms
print gpd.__version__
Paths:
gz_link = '../../../../data/airbnb/listings.csv.gz'
msoas_link = '../../../../data/London Output Area Classification/Shapefiles/london_msoas.shp'
Subset of variables of interest:
x = ['id', 'longitude', 'latitude', 'property_type', 'room_type', 'accommodates', \
'bathrooms', 'bedrooms', 'beds', 'price', 'security_deposit', \
'number_of_reviews', 'reviews_per_month', \
'review_scores_rating', 'review_scores_accuracy', 'review_scores_cleanliness', \
'review_scores_checkin', 'review_scores_communication', 'review_scores_location', \
'review_scores_value']
db = pd.read_csv(gz_link)
db = db[x].dropna()
db.info()
Convert lat/lon to proper geoms:
geos = gpd.GeoSeries(db[['longitude', 'latitude']]\
.apply(lambda x: geoms.Point((x.longitude, x.latitude)), axis=1), \
crs={'init': 'epsg:4326'})
geos = geos.to_crs(epsg=27700)
db['geometry'] = geos
db = gpd.GeoDataFrame.from_records(db)
db.crs = geos.crs
We use the London boroughs geometries (source) and the WikiPedia definition of Inner London to extract its shape:
lnd_bor = gpd.read_file('../../../../data/airbnb/london_boroughs.geojson', \
driver='GeoJSON').set_index('name')
lnd_bor = lnd_bor.to_crs(epsg=27700)
lnd_bor.plot()
inner_bor_names = ['Camden', 'Greenwich', 'Hackney', 'Hammersmith and Fulham', \
'Islington', 'Kensington and Chelsea', 'Lambeth', 'Lewisham', \
'Southwark', 'Tower Hamlets', 'Wandsworth', 'Westminster', \
'City of London']
inner_bor = lnd_bor.reindex(inner_bor_names)
inner_bor = inner_bor.unary_union
f, ax = plt.subplots(1, figsize=(4, 4))
lnd_bor.plot(ax=ax, color='grey', linewidth=0)
gpd.plotting.plot_multipolygon(ax, inner_bor)
Load up London MSOAs and keep only those whose centroid is inside Inner London:
msoas = gpd.read_file(msoas_link)
inner_lnd_flag = msoas.centroid.within(inner_bor)
inner_msoas = msoas.loc[inner_lnd_flag, :].to_crs(epsg=27700)
inner_msoas.plot()
Join AirBnb locations to MSOAs:
db.crs
inner_msoas.crs
%%time
# You will need the rtree extension for this
from geopandas.tools import sjoin
abb_msoa = sjoin(db, inner_msoas, how='left')
# Keep complete rows
abb_msoa = abb_msoa.drop('OA11_LSO_6', axis=1)\
.dropna()\
[x + ['OA11_LSO_2', 'geometry']]\
.rename(columns={'OA11_LSO_2': 'MSOA_id'})
Get a table for all the Inner London MSOAs with average values of AirBnb attributes:
g = abb_msoa.groupby('MSOA_id')
ilm_abb = g.mean()
ilm_abb['property_count'] = g.size()
# Decided to keep only non-zero polygons
#ilm_abb = ilm_abb.reindex(inner_msoas['OA11_LSO_2'])
ilm_abb.info()
ilm_abb['geometry'] = inner_msoas.set_index('OA11_LSO_2')\
.reindex(ilm_abb.index)\
['geometry']
ilm_abb = gpd.GeoDataFrame(ilm_abb, crs=inner_msoas.crs)
ilm_abb.plot(column='property_count', cmap='YlGn', linewidth=0)
ilm_abb.to_file('../../../../data/airbnb/ilm_abb.geojson', driver='GeoJSON')