master-thesis/python/richard_hops/combLib.py

from math import factorial as fac
import numpy as np

int_type = np.int16

import functools

def init_occupation(n, two_dim=False, oc_iter=None):
    """
        create the "k-vector" for the hierarchies

        basically an n-dimensional vector
        with non negative integer values

        if two_dim is set True, set shape to (1,n),
        allows to vstack several occupations arrays

        if oc_iter is given, is must be an iterable
        object who's data is set to the occupation array
    """
    res = np.zeros(shape=n, dtype=int_type)

    if oc_iter is not None:
        for i, o in enumerate(oc_iter):
            res[i] = o

    if two_dim:
        return res.reshape((1,n))
    else:
        return res

def dist_k_elements_in_n_slots(k, n, k_max_restrictions=None):
    """
        choose k elements from an n element set including repetition
        not caring about the order.

        so it is sufficient to name the number occurrences of each element.

        therefore the number notation (n1, n2, n3, ... ) is chosen, which means
        the first element occurs n1 times, the second n2 and so on.

        if 'k_max_restrictions' is given, the restrictions
        (n_i <= k_max_restrictions[i]) will be fulfilled

        to generate all possibilities, we can think of the following picture.

        we have k marbles, and we need to distribute them on n slots.

        The recursive procedure is as follows:

            first place all marbles in the same spot, gives one possibility.

            then we decrease the number of marbles for the first spot one by
            one, and distribute the remaining marbles on the remaining
            spots. The last step will be to have zero marbles in the first spot,
            and all marbled distributed on the remaining n-1 spots.

    """
    if k_max_restrictions is not None:
        k_max = k_max_restrictions[0]
        k_max_restrictions = k_max_restrictions[1:]
    else:
        k_max = k


    res = None
    if k <= k_max:
        # all in the first, leave none for the rest
        res = init_occupation(n, two_dim=True)
        res[0,0] = k

    # if n > 1 (number of spots)
    # decrease the number of marbles at this spot (first one) until 0,
    # and distribute the remaining marbles on the remaining slots

    if n > 1:
        # marbles in first spot going down from k-1 to 0
        # if a restriction k_max is given and k_max < k, then
        # go down from k_max to 0
        for k_i in range(min(k-1, k_max), -1, -1):
            # distribute remaining (k-k_i) marbles on the remaining n-1 spots
            k_remaining = k-k_i

            # if the remaining marbles fulfill the total
            # number of marble restriction for the remaining spots ...
            if (k_max_restrictions is None) or (k_remaining <= sum(k_max_restrictions)):
                res_k_i = dist_k_elements_in_n_slots(k_remaining, n-1, k_max_restrictions)
                l = res_k_i.shape[0]

                # reconstruct the whole marble ensemble with the first spot

                # [ [k_i, res_k_i_1_0,  res_k_i_1_1,  .. res_k_i_1_n-1]
                #   [k_i, res_k_i_2_0,  res_k_i_2_1,  .. res_k_i_2_n-1]
                #   [k_i, res_k_i_3_0,  res_k_i_3_1,  .. res_k_i_3_n-1]
                #    ...
                #   [k_i, res_k_i_4_0,  res_k_i_4_1,  .. res_k_i_4_n-1] ]

                res_k_i = np.hstack( (k_i * np.ones(shape=(l,1), dtype=int_type), res_k_i) )

                if res is None:
                    res = res_k_i
                else:
                    res = np.vstack( (res, res_k_i) )
            # if there are to many marbles to meet the restrictions
            # return what we got so far
            else:
                return res

    return res

def all_comb(n, k_max, sum_k_max='simplex'):
    """
        calls dist_k_elements_in_n_slots for fixed n and all
        integer k up to sum_k_max

        sum_k_max may be a specific integer value or one of the following
            'simplex': all_comb will cover the corner of an n-cube wich the
                       restrictions given by k_max
                       yields: sum_k_max = max(k_max)
            'cuboid' : all_comb covers the full n-dim hyper cuboid defined by
                       the restrictions imposed by k_max
                       yields: sum_k_max = sum(k_max)

        this generates all possible k-vectors needed by the hierarchy
        up to a maximum value k_max such that
        sum(k) <= k_max
    """
    res = init_occupation(n, two_dim=True)

    try:
        l = len(k_max)
        if l != n:
            raise RuntimeError("len(k_max)={} must be n={}".format(l, n))
    except TypeError:
        k_max = [k_max]*n

    if sum_k_max == 'simplex':
        sum_k_max = max(k_max)
    elif sum_k_max == 'cuboid':
        sum_k_max = sum(k_max)
    else:
        sum_k_max = int(sum_k_max)


    for k in range(1, sum_k_max + 1):
        res = np.vstack( (res, dist_k_elements_in_n_slots(k, n, k_max)) )

    return res

def get_k_of_occupation(oc):
    """
        returns the hierarchy depth for a given occupation number
        (occupation number can be interchanged with k-vector)

        which is simply the sum(k)
    """
    return sum(oc)

def number_of_all_combinations_old(n, k_max):
    """
        analytic expression for number of k-vectors
        of dimension n and hierarchy depth k_max
    """
    return fac(k_max + n) // fac(n) // fac(k_max)

def number_of_all_combinations(n, k_max, sum_k_max='simplex'):
    if not hasattr(k_max, '__len__'):
        k_max = (k_max,)*n
    else:
        assert len(k_max) == n
        k_max = tuple(k_max)

    if sum_k_max == 'simplex':
        sum_k_max = max(k_max)
    elif sum_k_max == 'cuboid':
        sum_k_max = sum(k_max)

    r = 0
    for k in range(0, sum_k_max+1):
        r += _comb_with_trunc(n, k, m = k_max)
    return r


@functools.lru_cache(maxsize=1024, typed=False)
def _comb_with_trunc(n, k, m):
    """
        calculates the number of possibilities of how to
        distribute k marbles on n slots where each slot i
        may have a maximum of m[i] marbles.
    """
#     if k > sum(m):
#         return 0
    if n == 1:
        if k > m[0]:
            return 0
        else:
            return 1
    m_ = min(k,m[0])
    r = 0
    for l in range(0, m_+1):  # l = m_ ... k
        t_ = _comb_with_trunc(n-1, k-l, m[1:])
        r += t_

    return r

def occupation_to_bin(oc):
    oc = np.asarray(oc, dtype=int_type)
    if oc.ndim == 1:
        return oc.data.tobytes()
    else:
        l = oc.shape[0]
        res = [0]*l         # init a python list with l elements
        for i in range(l):
            res[i] = oc[i].data.tobytes()
        return res

def binkey_to_nparray(binkey):
    return np.fromstring(binkey, dtype=int_type)

def idx_dict(n, k_max, sum_k_max='simplex'):
    """
        create a hashtable look up which
        assigns an index to each k-vector

        or more precise to the binary data
        in the numpy array buffer

        see 'all_comb' for details on n, k_max and sum_k_max
    """
    ac = all_comb(n, k_max, sum_k_max)
    return dict(zip(occupation_to_bin(ac), range(len(ac))))

def occupation_dec(oc, l):
    res = init_occupation(n=len(oc), oc_iter=oc)
    if oc[l] == 0:
        return None
    res[l] -= 1
    return res

def occupation_inc(oc, l):
    res = init_occupation(n=len(oc), oc_iter=oc)
    res[l] += 1
    return res

def occupation_to_set(oc):
    res = ()
    for i, oci in enumerate(oc):
        res += (i+1,)*oci

    return res

def set_to_occupation(s, n):
    res = init_occupation(n)
    for si in s:
        res[si-1] += 1

    return res