In [1]:

("hue" * 3).upper()

Out[1]:

'HUEHUEHUE'

In [12]:

str = "is is is";
print(str.replace("is", "was"))
print(str.replace("is", "was", 2))

was was was
was was is

In [132]:

" \tworld\n  ".strip()    # strip, lstrip, rstrip

Out[132]:

'world'

In [7]:

print(str(b'946809'), end = " ### " )
print(b'946809'.decode('utf8') )

b'946809' ### 946809

In [1]:

"%.2f" % float("3.5555")

Out[1]:

'3.56'

In [3]:

type('foo'.encode('utf-8'))

Out[3]:

bytes

In [9]:

from urllib.parse import urlparse

def get_filename_from_url(url):
    return urlparse(url).path.split('/')[-1]

url = "http://imgqn.xxx.com/upload_files/2015/05/29/yyy.jpg!730x0.jpg"
urlparse(url)

Out[9]:

ParseResult(scheme='http', netloc='imgqn.xxx.com', path='/upload_files/2015/05/29/yyy.jpg!730x0.jpg', params='', query='', fragment='')

In [240]:

from bs4 import BeautifulSoup  # analyze html
#http://www.crummy.com/software/BeautifulSoup/bs4/doc/
soup = BeautifulSoup(html_doc)
soup.p['class']
soup.find_all('a')
soup.find_all('img', src=True):
soup.find_all("div", { "class" : "xxx"})  
soup.find(id="link3")
# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>

Regex¶

https://www.regex101.com/
http://regexr.com/
https://docs.python.org/3/library/re.html

In [3]:

import re
re.sub(r'(?i)\b(u|you+)\b', "your name", 'u YOU')

Out[3]:

'your name your name'

In [61]:

re.match("c", "abcdef") # checks for a match only at the beginning of the string, No match

In [59]:

re.search("c", "abcdef")

Out[59]:

<_sre.SRE_Match object; span=(2, 3), match='c'>

In [7]:

m = re.search("\w",'1dfsde2') # \w match  as well as numbers and the underscore
if m: 
    print(m.group(0))
m    

Out[7]:

<_sre.SRE_Match object; span=(0, 1), match='1'>

In [ ]:

import re
pattern="BEGIN:VCARD.*?END:VCARD"
result = re.findall(pattern,content,re.DOTALL) 

In [2]:

import re
print( 'Positive Lookbehind:\t' + re.sub(u'(?<=a)b', "*", 'abc a b c') )
print( 'Negative Lookbehind:\t' + re.sub(u'(?<!a)b', "*", 'abc a b c') )
print( 'Positive Lookahead:\t'  + re.sub(u'b(?=c)', "*", 'abc a b c') )
print( 'Negative Lookahead:\t'  + re.sub(u'b(?!c)', "*", 'abc a b c') )

Positive Lookbehind:	a*c a b c
Negative Lookbehind:	abc a * c
Positive Lookahead:	a*c a b c
Negative Lookahead:	abc a * c