def edDistRecursive(a,b):
""" Calculate edit distance between sequences x and y using
recursion. Return distance. """
# This implementation is very slow
if len(a) == 0:
return len(b)
if len(b) == 0:
return len(a)
delta = 1 if a[-1] != b[-1] else 0
return min(edDistRecursive(a[:-1], b[:-1]) + delta,
edDistRecursive(a, b[:-1]) + 1,
edDistRecursive(a[:-1], b) + 1)
import datetime as d
st = d.datetime.now()
print edDistRecursive("Shakespeare", "shake spear")
print(d.datetime.now() -st).total_seconds()
3 34.848434
from numpy import zeros
def edDistDP(x,y):
# Create distance matrix
D = zeros((len(x) + 1, len(y) + 1), dtype = int)
# Initialize the first column of matrix
D[1:,0] = range(1, len(x) + 1)
# Initialize the first row of matrix
D[0, 1:] = range(1, len(y) + 1)
# Fill in the rest of matrix
for i in range(1, len(x) + 1):
for j in range(1, len(y) + 1):
distHor = D[i, j-1] + 1
distVer = D[i-1, j] + 1
if x[i-1] == y[j-1]:
distDiag = D[i-1, j-1]
else:
distDiag = D[i-1, j-1] + 1
D[i][j] = min(distHor, distVer, distDiag)
return D[-1][-1]
%%time
x = 'shake spea'
y = 'Shakespear'
print edDistRecursive(x, y)
3 CPU times: user 6.02 s, sys: 84.8 ms, total: 6.1 s Wall time: 6.04 s
%%time
x = 'shake spea'
y = 'Shakespear'
print edDistDP(x, y)
3 CPU times: user 748 µs, sys: 545 µs, total: 1.29 ms Wall time: 50.2 ms
edDistDP('PLEASANTLY', 'MEANLY')
5
x = 'HMARCIMTIFQQVQCECHALVTTMPPYRCCEMIAGDEGFFLTQSDTWIHQGMESILGCTWHTLEHVFNKSVIPTCHVMDCILVCPIDYKLYHRIENWARFMILGEVDRERLIMQWQGYSNCNKWQIWQDESIKYFKINHDSLYWVPQYVGTMVDYTCIAWATLILMECCQYPHSKGPTWSVYNDAHFSLMKHHAMCMFFSMEHSNEFTFLHKCMATFLLPVKEQPSFGMCCWLQKVGMGPPWFMYMNQCQRKSMGSKKQGNKYSCRHDAEQRSVATKFYDDMFWVSQCLSNYHTYNLSAPSVHWKRRPFYQLHIWANEILIHDTMNLCFLGELTWSHESIDYSRFHEPWFYQSGVSDLTIPFWWQRRGYTLSESLWAMNYCKGMLCDKMEELRTRHRDPHAIGMFRKALKEGPMYALSPWMCQLMARVCGIHQAWAFLQGMCFIWRHIHYKYMCFEYYWWYPIAECMPWPLSMKYTIQPDVSHGSTAYKEYQKKYPPPGQMCIYKQCEMAIMFKGRHKHTIFVSYRGDIAKMQLWQLVNKDISWPRSTVAGMFNFEHAFGEHVWSQVYQNRPQPVRPIMQQQKLVAPDIPNWILAVTYWRGMICLDHKENRYSKLYIMSINWHQFSRRSDPGKSKQNGQASWGHSMTQWGWPACHAEFTKYRGIEYCDIYCKQEQNRSPNCDEPCHQYSWHRCECGNCQHAAYFTLEGWDLEHFGEPAEKDKPEPILRKTDKPQAKMPRCSLSFPHNRHHQDKNYVHYYFAAGVGPGGGWQVKSREFIPMHNTPHLIWMGGSKWVQWGQKEIWAWQERPFHYYCHCFCAHILYRMYVVFRMKWTCGNPQFEPHCWMLFDRIIPQNMSVCDFCHSPKKQSAECMGIMFTELSKEPHIWVWKMQYYKWGLWLPSCLTWNIPTDLRFHTSMWSFMCWGTFRFCTIYAQPWLEWHHCANDEYFTGGMGNGSWCMIEMCFCITVKAYYTGAVKMRMFHVLQWTDDWYIPWRAMTYKLHCMFGYCYWSIGGCGMGWMSCTNGVQGTHIGPVCYRTIAYREIQVFIMSVEIFRLAIRTFGRVCTDHWAKDANLASCCWFRIHIASDQAKYCVSVQAYNRNDASLVNDHWPIKYMMITPHDDEKHMMLEFTNLVTCMTPPARKCIKLSGYGTDGNMEKYHSMEPSCHVMQANAEASRGDWRQHWAWTIGHLGRKLEDHGFSEWRKHDTFNSEGFFAQRASYNMPDFIQNKWEEAHYCVTSDYRTWCHLKEAGSNLLIDDAVTMDCNTWYGHAEAPHFTNKYVTYYEFMHVNHIIFVYANHYCCTLYYVDWGFNKAHTGFSIMYVKVTNASNDYWEWDGQTRIIIECWTQGKTKTKRHIRKADQKYVVVCGAKTDDTAINDNNVEHQSQTWPWTCGCHMQWWVTQEEVSHFRTWEFRNKYNCQRRMTRLENEYHLIMLKSCDCTDWDLPSGPVRTCWAHNRYTVLLQFILFIDWNMCKKKDLSFSESFCFCDMLLPARYDIWVDVERWAAVELCYRHGPYRSWAIKGMAYVTLSMSWNDQSHMEEDFNATCMNHSASPDEACMIKHEPVAFCYAKLNRQVSWCCTPYWQQMSENRPDMARGWPGHAMLLMPCYCYLFIAYMCMLPIYGEQNQKTLLYGGDRYYHNHMDQNLINMCYQAEDVCESFRTDDNGIFSYAEQDAQCKTYRASWKHPKVDECSRHKQDGRYNDMRWFSIEIASLQPQLGYNDVYQTYGIRYFVTRSEKFGTYHTLNEMVSFRADMVTPLSYNDDEADVYVAKSWKMKPCPQARERYGPEKCCQQVGQGYQQANREHHFCMIARSNSAEFQRWTVCRSTKGIWLMRFNCYGCFKHCYVKWCCRHEFQCKGGGQWNHFEGRKQKNLRMPTGQDAFAVKFETPCFRHDLKLEIWHDRKRHPIPTPECFCNMRLESDLQALGNWSEFTFRNEARMYTQEYDWQQCGWCPQSALACWKEPMHLMPKMGHKMRMRTMGMIKVMSQNSIRCGEFSHPRWLACKRPKETCSKMAPMTDHYSSLGAWQRHQQPYNFYIYMKAWLNSYGRSAGGQQMRGLMAKMIQYEPNPWNYTTNRKWFISDNHTHMGMYKRIFDIYTFSSFHGGEPSFEDSDTWAFRLQRPEKNQSMLNHYGARTYQIEAKAADLCNYPVGYNPSKGRFDMGVNEAQQPQWLEFVNFRLVCVQFTRHNPNTMLYKRHHGWMNQWEAISAHMCSYMHNWPAQNCIHLSDVVYEATWERQFPTLDGRSNYLTWKDNMISDSPKGWSKHSAGHTCCQTENFALWCFECNCECHEHYMKGDQTTARRNMRRYQAHTKQRAVMRAEVILTGQDGLAKIVRCLVANPMIIDSVVPWHHRGDEFPTGNPIVANPPTDYCPMISREAWDMFSDHERCDPDYCTEHCKTVIKFMPCCSWQWMKETSRTKNPKYAQFFGDHDKSWMWFEQLSKIWMWCSMEHTSMRECEKFAAAFPHMGHFIQVANPMDWCCPHMWYHRTSQTAYNRINRWNWQTTHTIGKIRNEMEWRWPYQPHTSRQSCEQSEIHRLDQTNACKAQFEMKWPFRCQTTSIFYYYFTGCTYGLRITQHPGWSSMRWVSRTRMIGSAAMASRQKLYLNNSQYDMGTYVWNNKKQLRSTEHNHSHPRVHKFEWNTITPTSEDPESRCYPQIKVARNAIFKYKMQHRLDGFHIVYIMCRKVYVFCPIEVTQSLVMQLRNCTRWSVSREATYFTSNHYMKREFTYRYENDEWWPAIIGRIFGYCKYVVDAPVITNQMAATERMTYLPMLDKVWFDNILTQFFRKERWMCNLIHLREGFYQSTYRICGRIDSENMPYRSSMWGWIRMYIFAEFAEIMDALHKVLVGHEKHYASIVSMTTSDKNHMFGDRYNCKDKVVIHGESFEYAWQQFYLALKTMSFIQQGTNQQMCQWYGWPANEVYWIQYYYKEHPTLESEYIIAPSKEGKCYNQTQYRKKKKNKFSTCDPVRGEWKWDELKPSKRDGAIVTANAESCWFDQLHNDCDSRVGIPSFHNRYMLMLMMIRYPYVVNGFINVDSRGWPKYVQENFTYIRDEIIWSDFWVTTANYMSIYFHTAFKTEWWVIFCARITVSQCDQMAKEKQKPIYDFRDRHVSWQLTQFPCGPWSFRMQCENPCVLAAKCRSWRSRLMDMNAYMHKAPVPRSMKSGHCMTQITVFMNQLQGMLSWVGYLPLNDGDEDYTVPCHIPELKGCGNDWMWDQEFRIYLVTWTRLHGVRNPQFQAYEGPMPEADVGDHTPGNMLNYFTRSWNGFDRRNSHTGMYLGAEVWAACLHSAYKHELRTVVNISLFSPNWYPQEKNLGAKPQYGVIRMHEDIGTIFRIRWVFSAMFYAKPKYKDTAQYANMFACYHFHLNAKTSLMEYVMITATTWMHSANQDTWLNKEAMFENETRNLNPPVHICWKEKDYFIIYEQREPEDMGDAWHKGKVKWLVNWMKFGHFPLNQKWVMQDQEHIYHDTTRCMLDPLNYNKYDQLDPQEFLTHTKGASWQETRNDLTIISTWRSAVPGVLRMSIGYDLRYGCDVPYTAKAELNEFHTQQASENCQKHSGAKQYMCIIAKICRTCVHMPINDVSMSYKPVCNDWMIVKTCGWEMFYHWIFNPGHFCWYYICDWKAGARRFFYRIHIDPNRYYVKEGQVGCNVWDIVSFLQTHIWHPHAMAPDRWHWRFCFTNKWKRYNCLRCYMAVPPWCQENVINWPRYPCLFTMIGFYDLEAHAKVFYTPNYWNHIWKMFNHTIDKCVDWQQMTIDFECSPYKWPLWGVIMQQLFKIKEYFMHIHPRQYMNWICTVYVPTCYESFMDVCNECMANCEGCQNPPYRAFTFVWQVRDWITGQCQQHSFYMAHSFEEYVWAWWEHFGQEAKAMMHRSQGCNFYRQHYEPYFWDEASWQMQNECYRQGFGCKHIFELIFTMEREMVSSDPIKTSRHCYYKEYYLWCYYKNMTHWYCCNFARRALCWYEFVNIQNTLGCTERNKCNSHYVLKKVIMELVNIRHVRKCSDFAGTRIMSQFHSAAGHQVYENYPLDHIKQPRQYISQRCEDGAWYQSRKNMSGYKATLNHMNTNPCFDEPYRVINQDRMKRNIWTLCNEEKSCYTFEYYNTFESDATYNMTCNWTTFFKVDQMWQGGSPNFTCKGKAMIAEFGIDPALWSEPNFVARESYTMFGKGDHHVNGALFLPNTRASRHTLKHLMEAANGFEYEQVVLWAFTGLEHCAGRCGAEQGLFDHELNSFQRNEVMKGCSAHEKKLGNLQVQELTWWFFYECMHNVFVIPPFARTSGAGENDIPDVRKWTADCRWNIHQRVNNVAQTEFNAKQHTNSSLWNALTLEFEYFWTQFFYDLGCNDCPWIMVIGLKLYGGYTTQWMMRDFMYPCRCQFSICFRSMCQPERPCCRQIFYHWCTNSNCYWWPC'
y = 'HTAIQGTYARCIMTIFQQVQCECHCKKLVWTMPIYKCCEMILGDEGFFLTQSYVVEKTWIHQGMEDILGCTWHTLEKVFNKSVIPTMQSIASGMFVMDMMFCLFLTSALYKLYHRIEWHKKMSRPWARFMILFAEQFPLRWVDRERLILVQIWQDRSIKYFKINHDPTMFNGPMVATLILMYPYSKGPTWSFYNDACQLFSLMKVHAMCMFFEMEHSNEFTFLHICMATFLLPVKEQPSCCWLQKVQNQSMGSKKAGNKYQRSVATKFYDDMFWVSQCLSNYHTYNNRALCGQQGSAPSQLHSCPLGELTWSHESIDWGLSDTTIDAELSDKIFYLERLIEIFWPVTHMEVTRAGYTVSESLWAMNYCKGMWCDKMEELKGSHPGTQSVKADPIVKTWKEGPMYALSPHMCQRPNAYTMARVCGIHLWDNGMCFIWSHMHYKYMCIAECMPWPLSMKYTVSHGSTAYKEAMMPSDQKMYPPPGNMCIYKQCKHAKDWSWPRSHAFGEHVWSQYQFWTEPTPVRPIMQQQKLVAPDIPYWRGCLDHKENRYSKLYIIHTHPLEGMFRSDPGGVKQNGQLCIEENPGHSHDVHTQWYWPACVEDMIPEFVKYKGIEQCDIYSKYDSMSKTWCIPNCDEPYMPPHQKSWHRNECGNCQHAECYFTLEGWDLEHIGEPAANTPRPILRKTDKPQMPRCSQSFRHHQDKNYMPHRYFAWGVGPGGGWSREFINTFHVICQVIWDGGSKWVQCGQKFKRKFECPIMMNLAWQERPFHPKLYCYRMMVVTRMKWTCTCMQYFSPCDSIWMLHCWMLFDRISVCDFCHSPKKQSHVYVWKMQPSCLTWNIVRDLQIADNDQFHTSMWSFMDTWGTFRFCTIYAQPWLEWHHIFKCWYFANDVGPWGNGSGCGILHIENCDCITVKASYQMYTGMVKMRMFVLQWTDDWYIPWTKLHMFGWCFWSIGGCGMGWMSCHNGGTHCGPMCYRTIAYREIQVFIMDVEIFRRTWGRYIEDCTDHWAKDANLASCCEFRIHIASCQAVQYNRNDASLHWPPNHLILIKYMMITPHYGTMMMMGLNMLEFENRADSHVTCMTPPARKWGWCIILSGWDQTDGGEEKYHPMEPSCWCMQANAEAYRGDWRQHWAWFIGFRGRDHGFSEWRKHDTFNSEGFFAQRASYNMPDFIQNKTKEAHYCGGPQKDHCHFHLKVRNALDVFGSELLIVTMAGCRPVHGFQLWHHAFDAQNHPWITYYEFMHVNMIIFVYNHESTMLVGGFKIMYATLKPTNCSNDYWEWDGQTRIIIECWEQGKTKTKRHIRKADQKGQDVVVCGCKDGAKTDDTGQNDNNVLPHWPWTCLWWCHMVLADERIFYGYKEDWYLTWVNKNNCQDETERMTKFTCYLENEYHLKSCDDTDWDLPSGPVRTGWAHVTYTVLLQFILKYYKINPEEEYKPYIWNMKWQTNHDLRKKKDLSFSEPFCFCDMLLVARYDGSSFGPFGASWQTTMRWVDVERVAAVEYRHGPYRSWAIKGMAYVTLSKSWNDQDVLFETNNATCINHSASCDEDCMIKHEPVAHCYAKLNNLGEWMQYSKCCTPYSFYVQMSENRPDMARDWPGHANAMLIMPCYWMLFPIYGEQNQKTLGGDRNHMDQNLFASPTMNMCFRTDNNGIFSYAEQIAQCKTYRAVWKQDGRLNLMRWFPVDLIESIASLPQLGSQQWAPVMENDVYQTYGIRYFVTRSECQYFRQCNYITYHTLNEMGQTCDSFTPQCTICHADMITPSCTLWNCSYNDTEADVYMAKSWKMKPCPQARERYGPEKCNQQVGQANREHDFCMIARSTSSNGKFWWTVILPSWLMRFNCYGCFKSVSPVHLRDPGHNKEDVKWCCRPEFQCKGGQWNHLDEGRKQKNLRMPTNQDACAVKFEATTHQKLEIRHGTPECDCNMRLGNWSYFTFRYRGAHRIYPKKRMNMCYDWQSCGWCPQSALACWKEVFSGTRRQPPKMGHKMRMKQQRINQHTMSMIKVMSNCIRKGACKRPKEMCSKMAPMTDHVSVCGAWQRCNQPYNFYIYMKAWLNSYGRVAGGQQLRGCAMAKMIQYEPYVWCYTLAYMGHMKNRKWFISDNHTHMGYSFSSFHGGEPSFEDSDFRLQRPPPAGCKNQSMLNHYVNQCARCARACLCNYPVGYNVNMSRSEGRFDMGVYTQGMCNMNAAQQPQWLAFVLHVPCFRLVCVQFTRHNPNTMLYKRHHWAAWMNDNMHQHCRWCAISAHMPSYMHAQNLANLHDVYEAAWERQFPTLIGRSNYLTWKDNMISDSPKGSAQVPKHSAGHTCCQTEQQALWCFCYMKGDQTTARRNNRRYQASVTEQRNAVVMRAVVYMQWTYVQLTGQDVRCLVANPKIIKLSRMYMTGGTRQHHLMPDVANPPTDYCPMISREAWDMFSDHERCDPDYCTAHCKTVIFPYRFMPVCSWQWMKETSRTRNPKMGWAQFFGDQFKQYFEQLSKCKGWCDCWDWMTSMRAAFPGILDVDDPWNPMQWCCPHMWYHRVSQCAYNHCINTFYNWQTTHCFWHTIGDVHGDNQQSCEQSEIHHVWRVIGNNCKAFYVATIEIKFCLKWPFRCQTTSIFYYYFTGCTYFQIGLRITQQISTRMKGSKLWRVQSSREGTDVWNNKKQLRSTEHVHKFEWNTIIPTSRSDHVFDPESRCYPQIKVARNAIFKYKMQHRLDGFVYIMCRKVVDRFHSTMVFCPIEVTQRRTFMLVAQARNCTRWSVSREATYFTCPRFCEWNVYMENDEDWLWVAIIGRICGPVITNQHAATERMTYLPPLQHSDLKVWFDNILPENWTFCQFFRKERWLNEGKITNKCGYQSTYRIVGRIDLENMPVRSNMWGYIKMYIAEIMDAHEKHYASIVSMTTSDPNHEDIRQNTFQCGACKDKVVIHGEAFVFYLALKTMSFIQQGTNQQMHAPNWPANMYWIQYMYKEHPTLESWDDYENRWYIIAPSKEQKFSTCDPVRGEWKWDELWPSKRDGAIAATANAESCWFDQLHNDCDSRFGIPSFTNRYMLMLMAIRYPYVVNGFINVPKYVLRGLEAKRDEIITANYRSDYFMTAFKSRETHHYFEWYTNEPIFCATCISWYLDYITEKLEEIPHFPSRSVSWQLPWSFCEYMQCENPCNCDMNAYMHKAPPKPRSMKSGHCMGQFTVFMNQLQGMLSWVGYFPLNDGDEDLTVPCHINDGMWWTCLHGVTNAYEEPLPGADVGLDHTPGNMLNYFTRSWNNFAPFDRRNHHTVWAHTQQMMSYSKHVVNISLFSESCVFSRPMNWYPQELGAKPQYGVDRILVDIFRISWHFSAMFYAKPTSHIMVLINMFICYHITITATTWMLNKGQINRNSANQDTWLEAMFENETRNLCWFCAPKDYTIPYEQREPEDMGSHEDAWNIKKAKGKVKWLVNWMKFGHFPCNQKHIYHDNYNKGVDDQLDYWKGASWLMDEGESRQTTRNDLTIISTWRSAVPGVFFMSIGYDLRYGCDVAWAWSSEANYTAGVFKLLRNLCEFNEFWTQQASENCQKHSEAKVYMCWIAKICETRSDSYKPVCNDWMIVLILITTRHKYIICGWEMFYHWHVFLFKMWFFNPGNFCSWTWDYYYIKKAGARRDPYRYYGQVGCNTHIWHPHAMAPDRWHWRNKSWAIKEEYKRYNCLRAYYGIFHYMAVPMWCQVLVQVSPVIWGAEHHQNWPRYYHLEAHAKVFYTSNYWNHIWKMGNHTIDKCVDWQQMVIAFRCMCQSFQYKWPLWLIDMWVIVGMECNTQQCFKIKEYFMHIHPRQYMVWMCTVYVPTCYGSFMDTCNECMANCEGCQNRAFTFVWQVRDWMKYRTGQGQQHSFYMAHSFEEYKCPIRMKVWAWWEHFGQSAKAMMHREYHFVWCYLQFCHMYYYETAFYRFWEEASMQMQNECVRGATRQGFGCKHITMVSSDPIKTTFMERMDKRHCYYKEYADFMRVFDLINMTCNSADCPIQRASASVEHMTWYEFHNCTERIKCNSHYVEIAMILVLIRQIMPVVRKCSDFAGGKRIMSQFHSAAPHKVYENYPLDWIKQISQRCNLGAWTRSRKGYYNTNPCFDEPYRVINQDRYKRNIWWLCNEEKSQCEYTTKSNSDIIFEYYVFTEIKTFNMTCNWTTFFKVDQHWQTHKGHFGIDPSLLRLWSEPYDMGKGDRDKHVNGHLFTRASRHTLKHLMEAANGYEIEIVQFKYMGIKWAFRFQEAHAYAYYLAEDGLDDHELNSFGLGNLQHQELTNWLHVWIFFYAATQGGWTQLESHNVFYIPPFARTSCFGENDIPDVRKWTADCWWAQIGFNAWHALTLEIFFRDLGCNWIMVIGLKLYGGYTTQWMMATEFYYPCRCSICFRSMCQPDQVPVDRPCCRQGFYHWCTNSNCYWWEC'
edDistDP(x, y)
1998
x, y = [i.strip() for i in open('input/dataset_248_3.txt', 'r')]
edDistDP(x, y)
332