pt_locally_connected_1d_layer.cpp 5.6 KB
Newer Older
Gustavo Valiente's avatar
Gustavo Valiente committed
1
/*
Gustavo Valiente's avatar
Gustavo Valiente committed
2
 * pocket-tensor (c) 2018 Gustavo Valiente gustavo.valiente.m@gmail.com
Gustavo Valiente's avatar
Gustavo Valiente committed
3 4 5 6 7 8 9
 * Kerasify (c) 2016 Robert W. Rose
 *
 * MIT License, see LICENSE file.
 */

#include "pt_locally_connected_1d_layer.h"

Gustavo Valiente's avatar
Gustavo Valiente committed
10 11 12
#include <array>
#include "pt_dispatcher.h"
#include "pt_layer_data.h"
Gustavo Valiente's avatar
Gustavo Valiente committed
13 14 15 16 17 18 19 20 21
#include "pt_multiply_add.h"
#include "pt_logger.h"

namespace pt
{

namespace
{
    template<class MultiplyAddType>
Gustavo Valiente's avatar
Gustavo Valiente committed
22
    void multiplyAddImpl(const Tensor& weights, const Tensor& biases, LayerData& layerData)
Gustavo Valiente's avatar
Gustavo Valiente committed
23
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
24
        struct Task
Gustavo Valiente's avatar
Gustavo Valiente committed
25
        {
Gustavo Valiente's avatar
Gustavo Valiente committed
26 27 28 29 30
            const Tensor* weights;
            const Tensor* biases;
            LayerData* layerData;
            int threads;
            int taskId;
Gustavo Valiente's avatar
Gustavo Valiente committed
31

Gustavo Valiente's avatar
Gustavo Valiente committed
32
            void operator()() noexcept
Gustavo Valiente's avatar
Gustavo Valiente committed
33
            {
Gustavo Valiente's avatar
Gustavo Valiente committed
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
                const Tensor& in = layerData->in;
                Tensor& out = layerData->out;

                const auto& ww = weights->getDims();
                const auto& iw = in.getDims();
                auto inInc = int(iw[1]);
                auto bOutInc = int(ww[1]);
                auto wInc = int(ww[2] * ww[1]);
                auto wInc2 = int(ww[2]);

                auto inIt = in.begin();
                auto outIt = out.begin();
                auto bIt = biases->begin();
                MultiplyAddType multiplyAdd;

                auto weightsBegin = weights->begin();
                int its = int(weights->end() - weightsBegin) / wInc;
                int taskIts = its / threads;
                int taskBegin = taskIts * taskId;
                int taskEnd;

                if(taskId == threads - 1)
                {
                    taskEnd = its;
                }
                else
                {
                    taskEnd = taskBegin + taskIts;
                }

                inIt += taskIts * taskId * inInc;
                outIt += taskIts * taskId * bOutInc;
                bIt += taskIts * taskId * bOutInc;

                for(auto wIt = weightsBegin + (taskBegin * wInc), wEnd = weightsBegin + (taskEnd * wInc);
                    wIt != wEnd; wIt += wInc)
                {
                    auto outIt2 = outIt;
                    auto bIt2 = bIt;

                    for(auto wIt2 = wIt; wIt2 != wIt + wInc; wIt2 += wInc2)
                    {
                        *outIt2 = *bIt2 + multiplyAdd(&*inIt, &*wIt2, wInc2);
                        ++outIt2;
                        ++bIt2;
                    }

                    inIt += inInc;
                    outIt += bOutInc;
                    bIt += bOutInc;
                }
Gustavo Valiente's avatar
Gustavo Valiente committed
85
            }
Gustavo Valiente's avatar
Gustavo Valiente committed
86 87
        };

Gustavo Valiente's avatar
Gustavo Valiente committed
88
        std::array<Task, PT_MAX_CPU_THREADS> tasks;
Gustavo Valiente's avatar
Gustavo Valiente committed
89
        Dispatcher& dispatcher = layerData.dispatcher;
Gustavo Valiente's avatar
Gustavo Valiente committed
90
        auto threads = int(dispatcher.threads());
Gustavo Valiente's avatar
Gustavo Valiente committed
91 92 93 94 95 96

        for(int taskId = 0; taskId != threads; ++taskId)
        {
            Task& task = tasks[std::size_t(taskId)];
            task = Task{ &weights, &biases, &layerData, threads, taskId };
            dispatcher.add([&task]{ task(); });
Gustavo Valiente's avatar
Gustavo Valiente committed
97
        }
Gustavo Valiente's avatar
Gustavo Valiente committed
98 99

        dispatcher.join();
Gustavo Valiente's avatar
Gustavo Valiente committed
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    }
}

std::unique_ptr<LocallyConnected1DLayer> LocallyConnected1DLayer::create(std::istream& stream)
{
    auto weights = Tensor::create(3, stream);

    if(! weights)
    {
        PT_LOG_ERROR << "Weights tensor parse failed" << std::endl;
        return std::unique_ptr<LocallyConnected1DLayer>();
    }

    auto biases = Tensor::create(2, stream);

    if(! biases)
    {
        PT_LOG_ERROR << "Biases tensor parse failed" << std::endl;
        return std::unique_ptr<LocallyConnected1DLayer>();
    }

    auto activation = ActivationLayer::create(stream);

    if(! activation)
    {
        PT_LOG_ERROR << "Activation layer parse failed" << std::endl;
        return std::unique_ptr<LocallyConnected1DLayer>();
    }

    return std::unique_ptr<LocallyConnected1DLayer>(
                new LocallyConnected1DLayer(std::move(*weights), std::move(*biases),
                                            std::move(activation)));
}

Gustavo Valiente's avatar
Gustavo Valiente committed
134
bool LocallyConnected1DLayer::apply(LayerData& layerData) const
Gustavo Valiente's avatar
Gustavo Valiente committed
135
{
Gustavo Valiente's avatar
Gustavo Valiente committed
136 137
    Tensor& in = layerData.in;
    Tensor& out = layerData.out;
Gustavo Valiente's avatar
Gustavo Valiente committed
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
    const auto& iw = in.getDims();

    if(iw.size() != 2)
    {
        PT_LOG_ERROR << "Input tensor dims count must be 2" <<
                            " (input dims: " << VectorPrinter<std::size_t>{iw} << ")" << std::endl;
        return false;
    }

    const auto& ww = _weights.getDims();
    auto offset = (ww[2] / iw[1]) - 1;

    if(iw[0] != ww[0] + offset)
    {
        PT_LOG_ERROR << "Input tensor dims[0] must be the same as weights dims[0] + offset" <<
                            " (input dims: " << VectorPrinter<std::size_t>{iw} << ")" <<
                            " (weights dims: " << VectorPrinter<std::size_t>{ww} << ")" <<
                            " (offset: " << offset << ")" << std::endl;
        return false;
    }

    out.resize(ww[0], ww[1]);

Gustavo Valiente's avatar
Gustavo Valiente committed
161 162
    auto threads = int(layerData.dispatcher.threads());
    auto threadSize = int(ww[2]) / threads;
Gustavo Valiente's avatar
Gustavo Valiente committed
163 164

    if(PT_LOOP_UNROLLING_ENABLE && threadSize && threadSize % (Tensor::VectorSize * 2) == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
165
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
166
        multiplyAddImpl<Vector2MultiplyAdd>(_weights, _biases, layerData);
Gustavo Valiente's avatar
Gustavo Valiente committed
167
    }
Gustavo Valiente's avatar
Gustavo Valiente committed
168
    else if(threadSize && threadSize % Tensor::VectorSize == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
169
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
170
        multiplyAddImpl<VectorMultiplyAdd>(_weights, _biases, layerData);
Gustavo Valiente's avatar
Gustavo Valiente committed
171 172 173
    }
    else
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
174
        multiplyAddImpl<ScalarMultiplyAdd>(_weights, _biases, layerData);
Gustavo Valiente's avatar
Gustavo Valiente committed
175 176
    }

Gustavo Valiente's avatar
Gustavo Valiente committed
177
    _activation->apply(out);
Gustavo Valiente's avatar
Gustavo Valiente committed
178 179 180 181 182 183 184 185 186 187 188 189
    return true;
}

LocallyConnected1DLayer::LocallyConnected1DLayer(Tensor&& weights, Tensor&& biases,
                                                 std::unique_ptr<ActivationLayer>&& activation) noexcept :
    _weights(std::move(weights)),
    _biases(std::move(biases)),
    _activation(std::move(activation))
{
}

}