Source code for ffc.uflacs.analysis.graph_ssa

# -*- coding: utf-8 -*-
# Copyright (C) 2011-2017 Martin Sandve Alnæs
#
# This file is part of UFLACS.
#
# UFLACS is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# UFLACS is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with UFLACS. If not, see <http://www.gnu.org/licenses/>.

"""Algorithms for working with computational graphs."""

import numpy

from ufl.classes import (GeometricQuantity, ConstantValue,
                         Argument, Coefficient,
                         Grad, Restricted, Indexed,
                         MathFunction)
from ufl.checks import is_cellwise_constant
from ffc.log import error

from ffc.uflacs.analysis.crsarray import CRSArray


[docs]def default_partition_seed(expr, rank):
    """
    Partition 0: Piecewise constant on each cell (including Real and DG0 coefficients)
    Partition 1: Depends on x
    Partition 2: Depends on x and coefficients
    Partitions [3,3+rank): depend on argument with count partition-3
    """
    # TODO: Use named constants for the partition numbers here

    modifiers = (Grad, Restricted, Indexed)  # FIXME: Add CellAvg, FacetAvg types here, others?
    if isinstance(expr, modifiers):
        return default_partition_seed(expr.ufl_operands[0], rank)

    elif isinstance(expr, Argument):
        ac = expr.number()
        assert 0 <= ac < rank
        poffset = 3
        p = poffset + ac
        return p

    elif isinstance(expr, Coefficient):
        if is_cellwise_constant(expr):  # This is crap, doesn't include grad modifier
            return 0
        else:
            return 2

    elif isinstance(expr, GeometricQuantity):
        if is_cellwise_constant(expr):  # This is crap, doesn't include grad modifier
            return 0
        else:
            return 1

    elif isinstance(expr, ConstantValue):
        return 0

    else:
        error("Don't know how to handle %s" % expr)


[docs]def mark_partitions(V, active, dependencies, rank,
                    partition_seed=default_partition_seed,
                    partition_combiner=max):
    """FIXME: Cover this with tests.

    Input:
    - V            - Array of expressions.
    - active       - Boolish array.
    - dependencies - CRSArray with V dependencies.
    - partition_seed - Policy for determining the partition of a terminalish.
    - partition_combiner - Policy for determinging the partition of an operator.

    Output:
    - partitions   - Array of partition int ids.
    """
    n = len(V)
    assert len(active) == n
    assert len(dependencies) == n
    partitions = numpy.zeros(n, dtype=int)
    for i, v in enumerate(V):
        deps = dependencies[i]
        if active[i]:
            if len(deps):
                p = partition_combiner([partitions[d] for d in deps])
            else:
                p = partition_seed(v, rank)
        else:
            p = -1
        partitions[i] = p
    return partitions


"""
def build_factorized_partitions():
    num_points = [3]

    # dofrange = (begin, end)
    # dofblock = ()  |  (dofrange0,)  |  (dofrange0, dofrange1)

    partitions = {}

    # partitions["piecewise"] = partition of expressions independent of quadrature and argument loops
    partitions["piecewise"] = []

    # partitions["varying"][np] = partition of expressions dependent on np quadrature but independent of argument loops
    partitions["varying"] = dict((np, []) for np in num_points)

    # partitions["argument"][np][iarg][dofrange] = partition depending on this dofrange of argument iarg
    partitions["argument"] = dict((np, [dict() for i in range(rank)]) for np in num_points)

    # partitions["integrand"][np][dofrange] = partition depending on this dofrange of argument iarg
    partitions["integrand"] = dict((np, dict()) for np in num_points)
"""

[docs]def compute_dependency_count(dependencies):
    """FIXME: Test"""
    n = len(dependencies)
    depcount = numpy.zeros(n, dtype=int)
    for i in range(n):
        for d in dependencies[i]:
            depcount[d] += 1
    return depcount


[docs]def invert_dependencies(dependencies, depcount):
    """FIXME: Test"""
    n = len(dependencies)
    m = sum(depcount)
    invdeps = [()] * n
    for i in range(n):
        for d in dependencies[i]:
            invdeps[d] = invdeps[d] + (i,)
    return CRSArray.from_rows(invdeps, n, m, int)


[docs]def default_cache_score_policy(vtype, ndeps, ninvdeps, partition):
    # Start at 1 and then multiply with various heuristic factors
    s = 1

    # Is the type particularly expensive to compute?
    expensive = (MathFunction,)
    if vtype in expensive:  # Could make a type-to-cost mapping, but this should do.
        s *= 20

    # More deps roughly correlates to more operations
    s *= ndeps

    # If it is reused several times let that count significantly
    s *= ninvdeps ** 3  # 1->1, 2->8, 3->27

    # Finally let partition count for something?
    # Or perhaps we need some more information, such as
    # when x from outer loop is used by y within inner loop.

    return s


[docs]def compute_cache_scores(V, active, dependencies, inverse_dependencies, partitions,
                         cache_score_policy=default_cache_score_policy):
    """FIXME: Cover with tests.

    TODO: Experiment with heuristics later when we have functional code generation.
    """
    n = len(V)
    score = numpy.zeros(n, dtype=int)
    for i, v in enumerate(V):
        if active[i]:
            deps = dependencies[i]
            ndeps = len(deps)
            invdeps = inverse_dependencies[i]
            ninvdeps = len(invdeps)
            p = partitions[i]
            s = cache_score_policy(type(v), ndeps, ninvdeps, p)
        else:
            s = -1
        score[i] = s
    return score


import heapq


[docs]def allocate_registers(active, partitions, targets,
                       scores, max_registers, score_threshold):
    """FIXME: Cover with tests.

    TODO: Allow reuse of registers, reducing memory usage.

    TODO: Probably want to sort within partitions.
    """
    # Check for consistent number of variables
    n = len(scores)
    assert n == len(active)
    assert n == len(partitions)
    num_targets = len(targets)

    # Analyse scores
    #min_score = min(scores)
    #max_score = max(scores)
    #mean_score = sum(scores) // n

    # Can allocate a number of registers up to given threshold
    num_to_allocate = max(num_targets,
                          min(max_registers, n) - num_targets)
    to_allocate = set()

    # For now, just using an arbitrary heuristic algorithm to select m largest scores
    queue = [(-scores[i], i) for i in range(n) if active[i]]
    heapq.heapify(queue)

    # Always allocate registers for all targets, for simplicity
    # in the rest of the code generation pipeline
    to_allocate.update(targets)

    # Allocate one register each for max_registers largest symbols
    for r in range(num_to_allocate):
        s, i = heapq.heappop(queue)
        if -s <= score_threshold:
            break
        if i in targets:
            continue
        to_allocate.add(i)

    registers_used = len(to_allocate)

    # Some consistency checks
    assert num_to_allocate <= max_registers
    assert registers_used <= num_to_allocate + len(targets)
    assert registers_used <= max(max_registers, len(targets))

    # Mark allocations
    allocations = numpy.zeros(n, dtype=int)
    allocations[:] = -1
    for r, i in enumerate(sorted(to_allocate)):
        allocations[i] = r

    # Possible data structures for improved register allocations
    # register_status = numpy.zeros(max_registers, dtype=int)

    # Stack/set of free registers (should wrap in stack abstraction):
    # free_registers = numpy.zeros(max_registers, dtype=int)
    # num_free_registers = max_registers
    # free_registers[:] = reversed(xrange(max_registers))

    return allocations