Module generation

This module allows you to build the matrix from the processed dictionary and generate words.

Expand source code
"""
## This module allows you to **build** the matrix from the processed dictionary and **generate** words.
"""

from random import choices
from itertools import product

###########################################
# N dimensions matrix, N-1 letters before #
###########################################

separator_list = [' ','\t','-','_',',',';',':','|']

def find_separator(alphabet):
    """
    `find_separator()` gets the first char in the list above that is not in the alphabet, if no such character exists, an exception is raised.

    * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
    * **return** (*char*): the first separator that is not in the alphabet
    """
    for s in separator_list:
        if s not in alphabet:
            return s
    raise Exception(f"no separator available: all characters in {separator_list} are in the alphabet, maybe try to add one manually in the code")

def build_ND_matrix(dictionary, alphabet, N):
    """
    `build_ND_matrix()` initiate and fill a N dimension matrix (dict of dict object) by browsing the dictionary.

    * **dictionary** (*list*): the input dictionary (after processing)
    * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
    * **N** (*int*): the dimension of the matrix
    * **return** (*dict*): the matrix representing the probability of letter chaining each other
    """
    separator = alphabet[-1]

    # initiate the matrix
    matrix = dict()
    for i in product(alphabet, repeat=N-1):
        index = ''.join(i)
        matrix[index] = dict()
        for l in alphabet:
            matrix[index][l] = 0

    # fill matrix with dictionary
    for word in dictionary:
        previous_letters = (N-1)*separator
        for current_letter in word:
            matrix[previous_letters][current_letter]+=1
            previous_letters = previous_letters[1:] + current_letter
        for i in range (1,N):
            matrix[previous_letters][separator]+=1
            previous_letters = previous_letters[1:] + separator
    return matrix

def generate_word_ND(matrix, alphabet, prefix, N):
    """
    `generate_word_ND()` generates a word used the `random.choices()` method uppon the ND matrix in the last letter column.

    * **matrix** (*dict*): the matrix representing the probability of letter chaining each other
    * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
    * **prefix** (*str*): the prefix requested for the generated words
    * **N** (*int*): the dimension of the matrix
    * **return** (*str*): the generated word (length variable)
    """
    separator = alphabet[-1]

    previous_letters = (N-1)*separator
    if len(prefix) < N:
        previous_letters = previous_letters[len(prefix):] + prefix
    else:
        previous_letters = prefix[len(prefix)-N+1:]

    word = prefix
    new_letter = None
    while new_letter != separator:
        new_letter = choices(population=alphabet, weights=matrix[previous_letters].values(), k=1)[0]
        if new_letter != separator:
            word = word+new_letter
            previous_letters = previous_letters[1:] + new_letter
    return (word)

Functions

def build_ND_matrix(dictionary, alphabet, N)

build_ND_matrix() initiate and fill a N dimension matrix (dict of dict object) by browsing the dictionary.

  • dictionary (list): the input dictionary (after processing)
  • alphabet (list): the used alphabet (from input file or from dictionary)
  • N (int): the dimension of the matrix
  • return (dict): the matrix representing the probability of letter chaining each other
Expand source code
def build_ND_matrix(dictionary, alphabet, N):
    """
    `build_ND_matrix()` initiate and fill a N dimension matrix (dict of dict object) by browsing the dictionary.

    * **dictionary** (*list*): the input dictionary (after processing)
    * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
    * **N** (*int*): the dimension of the matrix
    * **return** (*dict*): the matrix representing the probability of letter chaining each other
    """
    separator = alphabet[-1]

    # initiate the matrix
    matrix = dict()
    for i in product(alphabet, repeat=N-1):
        index = ''.join(i)
        matrix[index] = dict()
        for l in alphabet:
            matrix[index][l] = 0

    # fill matrix with dictionary
    for word in dictionary:
        previous_letters = (N-1)*separator
        for current_letter in word:
            matrix[previous_letters][current_letter]+=1
            previous_letters = previous_letters[1:] + current_letter
        for i in range (1,N):
            matrix[previous_letters][separator]+=1
            previous_letters = previous_letters[1:] + separator
    return matrix
def find_separator(alphabet)

find_separator() gets the first char in the list above that is not in the alphabet, if no such character exists, an exception is raised.

  • alphabet (list): the used alphabet (from input file or from dictionary)
  • return (char): the first separator that is not in the alphabet
Expand source code
def find_separator(alphabet):
    """
    `find_separator()` gets the first char in the list above that is not in the alphabet, if no such character exists, an exception is raised.

    * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
    * **return** (*char*): the first separator that is not in the alphabet
    """
    for s in separator_list:
        if s not in alphabet:
            return s
    raise Exception(f"no separator available: all characters in {separator_list} are in the alphabet, maybe try to add one manually in the code")
def generate_word_ND(matrix, alphabet, prefix, N)

generate_word_ND() generates a word used the random.choices() method uppon the ND matrix in the last letter column.

  • matrix (dict): the matrix representing the probability of letter chaining each other
  • alphabet (list): the used alphabet (from input file or from dictionary)
  • prefix (str): the prefix requested for the generated words
  • N (int): the dimension of the matrix
  • return (str): the generated word (length variable)
Expand source code
def generate_word_ND(matrix, alphabet, prefix, N):
    """
    `generate_word_ND()` generates a word used the `random.choices()` method uppon the ND matrix in the last letter column.

    * **matrix** (*dict*): the matrix representing the probability of letter chaining each other
    * **alphabet** (*list*): the used alphabet (from input file or from dictionary)
    * **prefix** (*str*): the prefix requested for the generated words
    * **N** (*int*): the dimension of the matrix
    * **return** (*str*): the generated word (length variable)
    """
    separator = alphabet[-1]

    previous_letters = (N-1)*separator
    if len(prefix) < N:
        previous_letters = previous_letters[len(prefix):] + prefix
    else:
        previous_letters = prefix[len(prefix)-N+1:]

    word = prefix
    new_letter = None
    while new_letter != separator:
        new_letter = choices(population=alphabet, weights=matrix[previous_letters].values(), k=1)[0]
        if new_letter != separator:
            word = word+new_letter
            previous_letters = previous_letters[1:] + new_letter
    return (word)