import pandas as PD
import numpy as N
# Lit le dictionnaire FR (p.ex. https://github.com/dlevel/sandbox_profilmatch/blob/master/public/liste_francais.txt)
df = PD.read_csv("liste_francais.txt.gz", header=None, index_col=False).squeeze("columns") # Series
print(f"{len(df)} mots")
22740 mots
# Ne garde que les mots de 7 lettres
if False:
df = df.loc[df.str.len() == 7]
print(f"{len(df)} mots de 7 lettres")
# Élimine les noms propres (commençant par une majuscule)
df = df.loc[~df.str.match('^[A-Z]')]
print(f"{len(df)} noms communs")
21315 noms communs
# Génère un mot normalisé (sans accent), et supprime les duplicats
df = df.str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')
print("Duplicats:", df.duplicated(0).sum())
df.drop_duplicates(inplace=True)
print(f"{len(df)} noms uniques")
Duplicats: 1242 20691 noms uniques
current = 's..p..e' # Nb lettres + lettres en bonne position
lin = 'ui' # Lettres présentes
lout = 'aor' # Lettres absentes
candidats = df[df.str.match(current)]
candidats
20161 somptueusement 20162 somptueux 20291 soupape 20292 soupapes 20302 soupeser 20305 soupire 20306 soupirer 20514 stipule 20553 stupide 20723 suppleer 20731 supplier 20742 suppose 20744 supposee 20745 supposer 20746 supposes 20757 suppurer 20798 surpayer Name: 0, dtype: object
# Mots contenant les lettres présentes
selin = N.prod([ candidats.str.contains(letter) for letter in lin ], axis=0).astype(bool)
candidats[selin]
20305 soupire 20306 soupirer 20514 stipule 20553 stupide 20731 supplier Name: 0, dtype: object
# Mots ne contenant pas les lettres absentes
selout = N.prod([ ~candidats.str.contains(letter) for letter in lout ], axis=0).astype(bool)
candidats[selout]
20514 stipule 20553 stupide Name: 0, dtype: object
candidats[selin & selout]
20514 stipule 20553 stupide Name: 0, dtype: object
current = 'g......'
candidats = df[df.str.fullmatch(current)]
candidats
10054 gadgets 10061 gageure 10062 gagnage 10063 gagnait 10064 gagnant ... 10621 guerres 10628 guetter 10634 guichet 10649 guitare 10658 gypaete Name: 0, Length: 90, dtype: object
Le meilleur candidat est choisi comme étant celui offrant la plus grande diversité de lettres.
def count_unique_letters(s):
return len(set(s))
imax = candidats.apply(count_unique_letters).argmax()
candidats.iloc[imax]
'galopin'
current = 'gr.n.e.' # Nb lettres + lettres en bonne position
lin = 'e' # Lettres présentes
lout = 'lasod' # Lettres absentes
candidats = df[df.str.fullmatch(current)]
selin = N.prod([ candidats.str.contains(letter) for letter in lin ], axis=0).astype(bool)
selout = N.prod([ ~candidats.str.contains(letter) for letter in lout ], axis=0).astype(bool)
candidats = candidats[selin & selout]
print("Possible candidates:", ', '.join(candidats))
imax = candidats.apply(count_unique_letters).argmax()
print("Best candidate:", candidats.iloc[imax])
Possible candidates: grenier, grincer Best candidate: grincer
regex = r'(?=gr.n.e.)(?!.*[lsasod])(?=.*e.*e.*)'
df[df.str.match(regex)]
10541 grenier Name: 0, dtype: object
def complete_regex(pattern, lin='', lout=''):
if not '.' in pattern: # Trivial pattern
return pattern
nin = [ pattern.count(l) + 1 for l in lin ] # Count needed letters
subs = [] # Regex sub-elements
if lout:
subs.append(f"(?!.*[{lout}])") # Negative lookahead: should not contain any forbidden letter
for l, n in zip(lin, nin):
subs.append(f"(?=.*{'.*'.join(l*n)}.*)") # Positive lookahead: should contain n × letter
regex = f"(?=^{pattern}$){''.join(subs)}" if subs else f"^{pattern}$"
return regex
regex = complete_regex('g..n.e.', lin='e', lout='lasod')
print(regex)
(?=^g..n.e.$)(?!.*[lasod])(?=.*e.*e.*)
df[df.str.match(regex)]
10541 grenier Name: 0, dtype: object
def iteration(df, current, lin='', lout=''):
candidats = df[df.str.match(complete_regex(current, lin, lout))]
#print(complete_regex(current, lin, lout))
if candidats.empty:
print("No solution")
elif len(candidats) == 1:
print("Unique solution:", candidats.iloc[0])
else:
if len(candidats) < 10:
print("Possible solutions:", ', '.join(candidats))
else:
print(f"{len(candidats)} possible solutions")
imax = candidats.apply(count_unique_letters).argmax()
print("Best candidate:", candidats.iloc[imax])
return candidats
current, lin, lout = 'g......', '', ''
iteration(df, current, lin, lout);
90 possible solutions Best candidate: galopin
current = 'g..n.e.' # Nb lettres + lettres en bonne position
lin = 'e' # Lettres présentes
lout = 'las' # Lettres absentes
iteration(df, current, lin, lout);
Unique solution: grenier
current = 'f......' # Nb lettres + lettres en bonne position
lin = '' # Lettres présentes
lout = '' # Lettres absentes
iteration(df, current, lin, lout);
150 possible solutions Best candidate: facheux
current = 'fa...u.' # Nb lettres + lettres en bonne position
lin = 'e' # Lettres présentes
lout = 'chx' # Lettres absentes
iteration(df, current, lin, lout);
Unique solution: fatigue
def read_dict(dicname, nletter=0):
"""
Lecture et préparation du dictionnaire. Si nletter>0, seuls les mots de nletter sont conservés.
"""
adic = PD.read_csv(dicname, header=None, index_col=False).squeeze("columns") # Series
if nletter:
adic = adic.loc[adic.str.len() == nletter]
adic = adic.loc[~adic.str.match('^[A-Z]')]
adic = adic.str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')
adic.drop_duplicates(inplace=True)
print(f"{dicname}: {len(adic)} words")
return adic
adic = read_dict("liste_francais.txt.gz")
liste_francais.txt.gz: 20691 words
import re
pattern = re.compile(r"(?P<current>[a-zA-Z.]+)(\+(?P<lin>\w+))?(\-(?P<lout>\w+))?")
match = pattern.fullmatch("fa...u.+e-chx")
match = pattern.fullmatch("fa...u.-chx")
if match:
current = match.group("current")
lin = match.group("lin")
lout = match.group("lout")
current, lin, lout
('fa...u.', None, 'chx')
def play(adic):
"""
Aide aux jeux de lettres, sur la base du dictionnaire (pandas.Series) adic.
À chaque itération, rentrer le puzzle sous la forme 'b......+inr-age':
* mot de 7 lettres commençant par 'b'
* contenant les lettres 'inr'
* ne contenant pas les lettres 'age'
"""
pattern = re.compile(r"(?P<current>[a-zA-Z.]+)(\+(?P<lin>[a-zA-Z]+))?(\-(?P<lout>[a-zA-Z]+))?")
iter = 0
while True: # Game loop
iter += 1
while True: # Loop on user input (has to match pattern)
puzzle = input(f"#{iter} iteration: ").lower()
match = pattern.fullmatch(puzzle)
if match:
break
c = iteration(adic,
match.group("current"),
match.group("lin") or '',
match.group("lout") or '')
if len(c) == 1:
break
play(adic)
#1 iteration: b...... 145 possible solutions Best candidate: baigner #2 iteration: b......+inr-age Possible solutions: bonsoir, bourrin Best candidate: bonsoir #3 iteration: bo...i.+nr-ages Unique solution: bourrin