import pandas as pd import requests from bs4 import BeautifulSoup url='http://www.myanmars.net/enews/2010/myanmar20101113.html' page=requests.get(url) soup=BeautifulSoup(page.content) def dictify(txt): attrs={} for item in txt.split(';'): itembits=item.split(':') attrs[itembits[0]]=itembits[1] return attrs def approxify(x,y): if abs(x-y)<3: return True return False txt='Pyithu Hluttaw representatives in the constituencies shown against them.' leftCounters={} for line in soup.find(text=txt).parent.parent.find_next_siblings(): attrs=dictify(line['style']) currLeft=int(attrs['left']) if currLeft not in leftCounters: leftCounters[currLeft]=1 else: leftCounters[currLeft]=leftCounters[currLeft]+1 for c in leftCounters: if leftCounters[c]>3: print(c,leftCounters[c]) colSep=600 lefts=[100,628] #--- import re prevLeft=0 prevTop=0 cols1={} cols2={} topCounters1=[] topCounters2=[] newState=False currstate='' stateReps={} col=1 for line in soup.find(text=txt).parent.parent.find_next_siblings(): attrs=dictify(line['style']) currLeft=int(attrs['left']) if leftCounters[currLeft]<4: continue currTop=int(attrs['top']) if currLeft