import requests
import bs4
import csv
r = requests.get('https://www.oyorooms.com/oyos-in-kochi?adults=1&checkin=30%2F03%2F2018&checkout=06%2F04%2F2018&children=0&city=kochi&country=India&employee_id=null&guests=1&latitude=null&location=Kochi&longitude=null&page=1&rooms=1&searchType=city&src=null')
from bs4 import BeautifulSoup
mypage = BeautifulSoup(r.text)
/Users/p17429374/Desktop/Eudora/venv/lib/python3.6/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("html5lib"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently. The code that caused this warning is on line 193 of the file /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py. To get rid of this warning, change code that looks like this: BeautifulSoup(YOUR_MARKUP}) to this: BeautifulSoup(YOUR_MARKUP, "html5lib") markup_type=markup_type))
myspan = mypage.find_all('span', attrs = {'class':'newHotelCard__hotelName'})
myspan[0]
<span class="newHotelCard__hotelName">OYO Flagship 240 South Railway Station East Gate</span>
myspan[0].text.strip()
'OYO Flagship 240 South Railway Station East Gate'
hotels = []
for hotel in myspan:
hotels.append(hotel.text.strip())
hotels
['OYO Flagship 240 South Railway Station East Gate', 'OYO 9902 Nedumparambil Residency', 'OYO 9954 Emarald Hotel', 'OYO 10461 Hotel White Residency', 'OYO 12329 Prime Palace Hotel', 'OYO 9560 Hotel Bellwether', 'OYO 6725 Palm Inn', 'OYO 11328 Hotel Chandrika Residency', 'OYO 4802 Cochin City residency', 'OYO 10719 Mermaid Hotel', 'OYO 11404 Hotel Tri Star Regency', 'OYO Rooms 138 South Railway Station Extention', 'OYO 7181 VR Inn', 'OYO 11909 Star Plaza', 'OYO 4824 Hotel Star', 'OYO 11309 Hotel Green Land Residency', 'OYO 10220 near Infopark', 'OYO 6665 Thomson Regency', 'OYO 10149 Noor Residency', 'OYO 8434 The Qasr']
mylocations = mypage.find_all('span', attrs = {'class':'newHotelCard__hotelAddress'})
mylocations[0]
<span class="newHotelCard__hotelAddress">18.6 Cents , Opp. GCDA Flats, Karshaka Road, Kochi</span>
locations = []
for location in mylocations:
locations.append(location.text.strip())
locations
['18.6 Cents , Opp. GCDA Flats, Karshaka Road, Kochi', 'Pettah, Poonithura, Kochi', 'Major Road , Vytilla, Kochi', 'Palarivattom- Kakkanad Road, Vazhakkala, Kakkanad, Kochi', 'M.G Road Near Maharajas Metro Station, Shenoy Junction, Fashion Street Sheshadri Lane Kochi, Kochi', 'P T Jacob Rd, Opposite Kannamaly Bus Stop, Thopumpady, Kochi', 'Near Aluva Pumb Junction, Aluva, Kochi', 'Diwans Road, Near TDM Hall, Ernakulam, Kochi - 682016 Kerala, India , Diwans Road, Kochi', 'Panampilly, Kochi', 'Kaniyampuzha Road, Kochi', 'Near GCDA Head Office, SA Road, Kadavanthra, Kochi', 'Chittoor Road, Kochi', 'K R Pankajakshan Road, Kochi', 'Kariyad - Airport - Mattoor Kerala 683589, Kochi', 'Near Kaloor Metro Station, Banerji Road, Kochi', 'Monastery Road, Karikkamuri, Shenoys, Kochi', 'Karimughal Junction, Kochi', 'Near CIAL Convention Centre, Akaparambu Road, Kochi', 'Cheranaloor, Ernakulam, Kochi', 'Near Masjid Noor, Kochi']
myratings = mypage.find_all('span', attrs = {'class':'hotelRating__value hotelRating__value--verygood'})
ratings = []
for rating in myratings:
ratings.append(rating.text.strip())
ratings
['4.2', '4.4', '4.2', '4.3', '4.1', '4.3', '4.3', '4.2', '4.2', '4.1', '4.3', '4.4', '4.4']
len (ratings)
13
content = []
for i in range(0,13):
content.append([hotels[i], locations[i], ratings[i]])
content
[['OYO Flagship 240 South Railway Station East Gate', '18.6 Cents , Opp. GCDA Flats, Karshaka Road, Kochi', '4.2'], ['OYO 9902 Nedumparambil Residency', 'Pettah, Poonithura, Kochi', '4.4'], ['OYO 9954 Emarald Hotel', 'Major Road , Vytilla, Kochi', '4.2'], ['OYO 10461 Hotel White Residency', 'Palarivattom- Kakkanad Road, Vazhakkala, Kakkanad, Kochi', '4.3'], ['OYO 12329 Prime Palace Hotel', 'M.G Road Near Maharajas Metro Station, Shenoy Junction, Fashion Street Sheshadri Lane Kochi, Kochi', '4.1'], ['OYO 9560 Hotel Bellwether', 'P T Jacob Rd, Opposite Kannamaly Bus Stop, Thopumpady, Kochi', '4.3'], ['OYO 6725 Palm Inn', 'Near Aluva Pumb Junction, Aluva, Kochi', '4.3'], ['OYO 11328 Hotel Chandrika Residency', 'Diwans Road, Near TDM Hall, Ernakulam, Kochi - 682016 Kerala, India , Diwans Road, Kochi', '4.2'], ['OYO 4802 Cochin City residency', 'Panampilly, Kochi', '4.2'], ['OYO 10719 Mermaid Hotel', 'Kaniyampuzha Road, Kochi', '4.1'], ['OYO 11404 Hotel Tri Star Regency', 'Near GCDA Head Office, SA Road, Kadavanthra, Kochi', '4.3'], ['OYO Rooms 138 South Railway Station Extention', 'Chittoor Road, Kochi', '4.4'], ['OYO 7181 VR Inn', 'K R Pankajakshan Road, Kochi', '4.4']]
import csv
with open('Kochi_hotels.csv', 'w', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerows(content)