pt_tensor.cpp 13.7 KB
Newer Older
Gustavo Valiente's avatar
Gustavo Valiente committed
1
/*
Gustavo Valiente's avatar
Gustavo Valiente committed
2
 * PocketTensor (c) 2018 Gustavo Valiente gustavo.valiente.m@gmail.com
Gustavo Valiente's avatar
Gustavo Valiente committed
3 4 5 6 7 8 9
 * Kerasify (c) 2016 Robert W. Rose
 *
 * MIT License, see LICENSE file.
 */

#include "pt_tensor.h"

Gustavo Valiente's avatar
Gustavo Valiente committed
10
#include <array>
Gustavo Valiente's avatar
Gustavo Valiente committed
11 12 13 14
#include <numeric>
#include "pt_add.h"
#include "pt_multiply.h"
#include "pt_multiply_add.h"
Gustavo Valiente's avatar
Gustavo Valiente committed
15 16
#include "pt_parser.h"
#include "pt_dispatcher.h"
Gustavo Valiente's avatar
Gustavo Valiente committed
17 18 19 20 21 22 23

namespace pt
{

namespace
{
    template<class AddType>
Gustavo Valiente's avatar
Gustavo Valiente committed
24
    void addImpl(const Tensor& in, Tensor& out, Dispatcher& dispatcher)
Gustavo Valiente's avatar
Gustavo Valiente committed
25
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
26 27 28 29 30 31 32 33 34
        struct Task
        {
            const Tensor* in;
            Tensor* out;
            int threads;
            int taskId;

            void operator()() noexcept
            {
Gustavo Valiente's avatar
Gustavo Valiente committed
35 36 37
                auto its = int(in->getSize());
                auto taskIts = its / threads;
                auto taskBegin = taskIts * taskId;
Gustavo Valiente's avatar
Gustavo Valiente committed
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
                int taskEnd;

                if(taskId == threads - 1)
                {
                    taskEnd = its;
                }
                else
                {
                    taskEnd = taskBegin + taskIts;
                }

                auto inBegin = in->begin() + taskBegin;
                auto outBegin = out->begin() + taskBegin;
                AddType()(&*inBegin, &*outBegin, taskEnd - taskBegin);
            }
        };

Gustavo Valiente's avatar
Gustavo Valiente committed
55
        std::array<Task, PT_MAX_CPU_THREADS> tasks;
Gustavo Valiente's avatar
Gustavo Valiente committed
56
        auto threads = int(dispatcher.threads());
Gustavo Valiente's avatar
Gustavo Valiente committed
57 58 59 60 61 62 63 64 65

        for(int taskId = 0; taskId != threads; ++taskId)
        {
            Task& task = tasks[std::size_t(taskId)];
            task = Task{ &in, &out, threads, taskId };
            dispatcher.add([&task]{ task(); });
        }

        dispatcher.join();
Gustavo Valiente's avatar
Gustavo Valiente committed
66 67 68
    }

    template<class MultiplyType>
Gustavo Valiente's avatar
Gustavo Valiente committed
69
    void multiplyImpl(const Tensor& in, Tensor& out, Dispatcher& dispatcher)
Gustavo Valiente's avatar
Gustavo Valiente committed
70
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
71 72 73 74 75 76
        struct Task
        {
            const Tensor* in;
            Tensor* out;
            int threads;
            int taskId;
Gustavo Valiente's avatar
Gustavo Valiente committed
77

Gustavo Valiente's avatar
Gustavo Valiente committed
78 79
            void operator()() noexcept
            {
Gustavo Valiente's avatar
Gustavo Valiente committed
80 81 82
                auto its = int(in->getSize());
                auto taskIts = its / threads;
                auto taskBegin = taskIts * taskId;
Gustavo Valiente's avatar
Gustavo Valiente committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
                int taskEnd;

                if(taskId == threads - 1)
                {
                    taskEnd = its;
                }
                else
                {
                    taskEnd = taskBegin + taskIts;
                }

                auto inBegin = in->begin() + taskBegin;
                auto outBegin = out->begin() + taskBegin;
                MultiplyType()(&*inBegin, &*outBegin, taskEnd - taskBegin);
            }
        };
Gustavo Valiente's avatar
Gustavo Valiente committed
99

Gustavo Valiente's avatar
Gustavo Valiente committed
100
        std::array<Task, PT_MAX_CPU_THREADS> tasks;
Gustavo Valiente's avatar
Gustavo Valiente committed
101
        auto threads = int(dispatcher.threads());
Gustavo Valiente's avatar
Gustavo Valiente committed
102

Gustavo Valiente's avatar
Gustavo Valiente committed
103
        for(int taskId = 0; taskId != threads; ++taskId)
Gustavo Valiente's avatar
Gustavo Valiente committed
104
        {
Gustavo Valiente's avatar
Gustavo Valiente committed
105 106 107
            Task& task = tasks[std::size_t(taskId)];
            task = Task{ &in, &out, threads, taskId };
            dispatcher.add([&task]{ task(); });
Gustavo Valiente's avatar
Gustavo Valiente committed
108 109
        }

Gustavo Valiente's avatar
Gustavo Valiente committed
110 111 112 113 114 115 116
        dispatcher.join();
    }

    template<class MultiplyAddType>
    void dotImpl(const Tensor& a, const Tensor& b, Tensor& out, Dispatcher& dispatcher)
    {
        struct Task
Gustavo Valiente's avatar
Gustavo Valiente committed
117
        {
Gustavo Valiente's avatar
Gustavo Valiente committed
118 119 120 121 122
            const Tensor* a;
            const Tensor* b;
            Tensor* out;
            int threads;
            int taskId;
Gustavo Valiente's avatar
Gustavo Valiente committed
123

Gustavo Valiente's avatar
Gustavo Valiente committed
124
            void operator()() noexcept
Gustavo Valiente's avatar
Gustavo Valiente committed
125
            {
Gustavo Valiente's avatar
Gustavo Valiente committed
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
                auto outInc = int(out->getDims()[1]);
                int its = int(out->end() - out->begin()) / outInc;
                int taskIts = its / threads;
                int taskBegin = taskIts * taskId;
                int taskEnd;

                if(taskId == threads - 1)
                {
                    taskEnd = its;
                }
                else
                {
                    taskEnd = taskBegin + taskIts;
                }

                auto aIt = a->begin();
                auto iInc = int(a->getDims()[1]);
                auto bBegin = b->begin();
                auto oBegin = out->begin();
                MultiplyAddType multiplyAdd;
                aIt += taskIts * taskId * iInc;

                for(auto outIt = oBegin + (taskBegin * outInc), outEnd = oBegin + (taskEnd * outInc);
                    outIt != outEnd; outIt += outInc)
                {
                    auto bIt = bBegin;

                    for(auto outIt2 = outIt; outIt2 != outIt + outInc; ++outIt2)
                    {
                        *outIt2 = multiplyAdd(&*aIt, &*bIt, iInc);
                        bIt += iInc;
                    }

                    aIt += iInc;
                }
Gustavo Valiente's avatar
Gustavo Valiente committed
161
            }
Gustavo Valiente's avatar
Gustavo Valiente committed
162
        };
Gustavo Valiente's avatar
Gustavo Valiente committed
163

Gustavo Valiente's avatar
Gustavo Valiente committed
164
        std::array<Task, PT_MAX_CPU_THREADS> tasks;
Gustavo Valiente's avatar
Gustavo Valiente committed
165
        auto threads = int(dispatcher.threads());
Gustavo Valiente's avatar
Gustavo Valiente committed
166 167 168 169 170 171

        for(int taskId = 0; taskId != threads; ++taskId)
        {
            Task& task = tasks[std::size_t(taskId)];
            task = Task{ &a, &b, &out, threads, taskId };
            dispatcher.add([&task]{ task(); });
Gustavo Valiente's avatar
Gustavo Valiente committed
172
        }
Gustavo Valiente's avatar
Gustavo Valiente committed
173 174

        dispatcher.join();
Gustavo Valiente's avatar
Gustavo Valiente committed
175 176 177
    }

    template<class MultiplyAddType>
Gustavo Valiente's avatar
Gustavo Valiente committed
178
    void multiplyAddImpl(const Tensor& scale, const Tensor& in, Tensor& out, Dispatcher& dispatcher)
Gustavo Valiente's avatar
Gustavo Valiente committed
179
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
180 181 182 183 184 185 186 187 188 189
        struct Task
        {
            const Tensor* scale;
            const Tensor* in;
            Tensor* out;
            int threads;
            int taskId;

            void operator()() noexcept
            {
Gustavo Valiente's avatar
Gustavo Valiente committed
190 191 192
                auto its = int(in->getSize());
                auto taskIts = its / threads;
                auto taskBegin = taskIts * taskId;
Gustavo Valiente's avatar
Gustavo Valiente committed
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
                int taskEnd;

                if(taskId == threads - 1)
                {
                    taskEnd = its;
                }
                else
                {
                    taskEnd = taskBegin + taskIts;
                }

                auto inBegin = in->begin() + taskBegin;
                auto scaleBegin = scale->begin() + taskBegin;
                auto outBegin = out->begin() + taskBegin;
                MultiplyAddType()(&*inBegin, &*scaleBegin, &*outBegin, taskEnd - taskBegin);
            }
        };

Gustavo Valiente's avatar
Gustavo Valiente committed
211
        std::array<Task, PT_MAX_CPU_THREADS> tasks;
Gustavo Valiente's avatar
Gustavo Valiente committed
212
        auto threads = int(dispatcher.threads());
Gustavo Valiente's avatar
Gustavo Valiente committed
213 214 215 216 217 218 219 220 221

        for(int taskId = 0; taskId != threads; ++taskId)
        {
            Task& task = tasks[std::size_t(taskId)];
            task = Task{ &scale, &in, &out, threads, taskId };
            dispatcher.add([&task]{ task(); });
        }

        dispatcher.join();
Gustavo Valiente's avatar
Gustavo Valiente committed
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
    }
}

std::unique_ptr<Tensor> Tensor::create(std::size_t dims, std::istream& stream)
{
    if(dims == 0)
    {
        PT_LOG_ERROR << "Invalid dims value: " << dims << std::endl;
        return std::unique_ptr<Tensor>();
    }

    std::unique_ptr<Tensor> tensor(new Tensor());
    tensor->_dims.reserve(dims);

    for(std::size_t i = 0; i != dims; ++i)
    {
        unsigned int stride = 0;

        if(! Parser::parse(stream, stride))
        {
            PT_LOG_ERROR << "Stride parse failed" << std::endl;
            return std::unique_ptr<Tensor>();
        }

        if(stride == 0)
        {
            PT_LOG_ERROR << "Invalid stride value: " << stride << std::endl;
            return std::unique_ptr<Tensor>();
        }

        tensor->_dims.push_back(stride);
    }

    std::size_t size = tensor->getSize();

    #if PT_DOUBLE_ENABLE
        std::vector<float> data(size);
        tensor->_data.resize(size);

        if(! Parser::parse(stream, data.data(), size))
        {
            PT_LOG_ERROR << "Data parse failed" << std::endl;
            return std::unique_ptr<Tensor>();
        }

        for(std::size_t index = 0; index != size; ++index)
        {
            tensor->_data[index] = FloatType(data[index]);
        }
    #else
        tensor->_data.resize(size);

        if(! Parser::parse(stream, tensor->_data.data(), size))
        {
            PT_LOG_ERROR << "Data parse failed" << std::endl;
            return std::unique_ptr<Tensor>();
        }
    #endif

    return tensor;
}

void Tensor::copyTo(Tensor& other) const
{
    other._dims.clear();
    other._dims.reserve(_dims.size());
    other._dims.insert(other._dims.end(), _dims.begin(), _dims.end());

    other._data.clear();
    other._data.reserve(_data.size());
    other._data.insert(other._data.end(), _data.begin(), _data.end());
}

void Tensor::resize(std::size_t i)
{
    PT_ASSERT(i > 0);

    _dims.clear();
    _dims.push_back(i);
    _data.resize(i);
}

void Tensor::resize(std::size_t i, std::size_t j)
{
    PT_ASSERT(i > 0);
    PT_ASSERT(j > 0);

    _dims.clear();
    _dims.reserve(2);
    _dims.push_back(i);
    _dims.push_back(j);
    _data.resize(i * j);
}

void Tensor::resize(std::size_t i, std::size_t j, std::size_t k)
{
    PT_ASSERT(i > 0);
    PT_ASSERT(j > 0);
    PT_ASSERT(k > 0);

    _dims.clear();
    _dims.reserve(3);
    _dims.push_back(i);
    _dims.push_back(j);
    _dims.push_back(k);
    _data.resize(i * j * k);
}

void Tensor::resize(std::size_t i, std::size_t j, std::size_t k, std::size_t l)
{
    PT_ASSERT(i > 0);
    PT_ASSERT(j > 0);
    PT_ASSERT(k > 0);
    PT_ASSERT(l > 0);

    _dims.clear();
    _dims.reserve(4);
    _dims.push_back(i);
    _dims.push_back(j);
    _dims.push_back(k);
    _dims.push_back(l);
    _data.resize(i * j * k * l);
}

void Tensor::fill(Type value) noexcept
{
    std::fill(begin(), end(), value);
}

void Tensor::flatten()
{
    PT_ASSERT(isValid());

    auto size = getSize();
    _dims.clear();
    _dims.push_back(size);
}

void Tensor::unpack(std::size_t row, Tensor& out) const
{
    PT_ASSERT(isValid());
    PT_ASSERT(_dims.size() >= 2);
    PT_ASSERT(row < _dims[0]);

    auto packSize = std::accumulate(_dims.begin() + 1, _dims.end(), std::size_t(0));
    auto base = row * packSize;
    auto first = begin() + long(base);
Gustavo Valiente's avatar
Gustavo Valiente committed
369
    auto last = first + long(packSize);
Gustavo Valiente's avatar
Gustavo Valiente committed
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385

    out._dims.clear();
    out._dims.reserve(_dims.size() - 1);
    out._dims.insert(out._dims.end(), _dims.begin() + 1, _dims.end());

    out._data.clear();
    out._data.reserve(std::size_t(last - first));
    out._data.insert(out._data.end(), first, last);
}

void Tensor::select(std::size_t row, Tensor& out) const
{
    unpack(row, out);
    out._dims.insert(out._dims.begin(), 1);
}

Gustavo Valiente's avatar
Gustavo Valiente committed
386
void Tensor::add(const Tensor& other, Tensor& out, Dispatcher& dispatcher) const
Gustavo Valiente's avatar
Gustavo Valiente committed
387 388 389
{
    PT_ASSERT(_dims == other._dims);

Gustavo Valiente's avatar
Gustavo Valiente committed
390 391
    auto threads = int(dispatcher.threads());
    auto threadSize = int(getSize()) / threads;
Gustavo Valiente's avatar
Gustavo Valiente committed
392 393
    copyTo(out);

Gustavo Valiente's avatar
Gustavo Valiente committed
394
    if(PT_LOOP_UNROLLING_ENABLE && threadSize && threadSize % (Tensor::VectorSize * 2) == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
395
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
396
        addImpl<Vector2Add>(other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
397
    }
Gustavo Valiente's avatar
Gustavo Valiente committed
398
    else if(threadSize && threadSize % Tensor::VectorSize == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
399
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
400
        addImpl<VectorAdd>(other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
401 402 403
    }
    else
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
404
        addImpl<ScalarAdd>(other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
405 406 407
    }
}

Gustavo Valiente's avatar
Gustavo Valiente committed
408
void Tensor::multiply(const Tensor& other, Tensor& out, Dispatcher& dispatcher) const
Gustavo Valiente's avatar
Gustavo Valiente committed
409 410 411 412
{
    PT_ASSERT(isValid());
    PT_ASSERT(_dims == other._dims);

Gustavo Valiente's avatar
Gustavo Valiente committed
413 414
    auto threads = int(dispatcher.threads());
    auto threadSize = int(getSize()) / threads;
Gustavo Valiente's avatar
Gustavo Valiente committed
415 416
    copyTo(out);

Gustavo Valiente's avatar
Gustavo Valiente committed
417
    if(PT_LOOP_UNROLLING_ENABLE && threadSize && threadSize % (Tensor::VectorSize * 2) == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
418
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
419
        multiplyImpl<Vector2Multiply>(other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
420
    }
Gustavo Valiente's avatar
Gustavo Valiente committed
421
    else if(threadSize && threadSize % Tensor::VectorSize == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
422
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
423
        multiplyImpl<VectorMultiply>(other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
424 425 426
    }
    else
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
427
        multiplyImpl<ScalarMultiply>(other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
428 429 430
    }
}

Gustavo Valiente's avatar
Gustavo Valiente committed
431
void Tensor::dot(const Tensor& other, Tensor& out, Dispatcher& dispatcher) const
Gustavo Valiente's avatar
Gustavo Valiente committed
432 433 434 435 436
{
    PT_ASSERT(_dims.size() == 2);
    PT_ASSERT(other._dims.size() == 2);
    PT_ASSERT(_dims[1] == other._dims[1]);

Gustavo Valiente's avatar
Gustavo Valiente committed
437
    out.resize(_dims[0], other._dims[0]);
Gustavo Valiente's avatar
Gustavo Valiente committed
438

Gustavo Valiente's avatar
Gustavo Valiente committed
439 440
    auto threads = int(dispatcher.threads());
    auto threadSize = int(_dims[1]) / threads;
Gustavo Valiente's avatar
Gustavo Valiente committed
441

Gustavo Valiente's avatar
Gustavo Valiente committed
442
    if(PT_LOOP_UNROLLING_ENABLE && threadSize && threadSize % (Tensor::VectorSize * 2) == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
443
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
444
        dotImpl<Vector2MultiplyAdd>(*this, other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
445
    }
Gustavo Valiente's avatar
Gustavo Valiente committed
446
    else if(threadSize && threadSize % Tensor::VectorSize == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
447
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
448
        dotImpl<VectorMultiplyAdd>(*this, other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
449 450 451
    }
    else
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
452
        dotImpl<ScalarMultiplyAdd>(*this, other, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
453 454 455
    }
}

Gustavo Valiente's avatar
Gustavo Valiente committed
456
void Tensor::fma(const Tensor& scale, const Tensor& bias, Tensor& out, Dispatcher& dispatcher) const
Gustavo Valiente's avatar
Gustavo Valiente committed
457 458 459 460
{
    PT_ASSERT(_dims == scale._dims);
    PT_ASSERT(_dims == bias._dims);

Gustavo Valiente's avatar
Gustavo Valiente committed
461 462
    auto threads = int(dispatcher.threads());
    auto threadSize = int(getSize()) / threads;
Gustavo Valiente's avatar
Gustavo Valiente committed
463 464
    bias.copyTo(out);

Gustavo Valiente's avatar
Gustavo Valiente committed
465
    if(PT_LOOP_UNROLLING_ENABLE && threadSize && threadSize % (Tensor::VectorSize * 2) == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
466
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
467
        multiplyAddImpl<Vector2MultiplyAdd>(scale, *this, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
468
    }
Gustavo Valiente's avatar
Gustavo Valiente committed
469
    else if(threadSize && threadSize % Tensor::VectorSize == 0)
Gustavo Valiente's avatar
Gustavo Valiente committed
470
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
471
        multiplyAddImpl<VectorMultiplyAdd>(scale, *this, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
472 473 474
    }
    else
    {
Gustavo Valiente's avatar
Gustavo Valiente committed
475
        multiplyAddImpl<ScalarMultiplyAdd>(scale, *this, out, dispatcher);
Gustavo Valiente's avatar
Gustavo Valiente committed
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
    }
}

void Tensor::eraseDummyDims() noexcept
{
    auto numDims = _dims.size();

    if(numDims > 1)
    {
        for(std::size_t index = 0; index != numDims - 1; ++index)
        {
            if(_dims[index] == 1)
            {
                _dims.erase(_dims.begin() + long(index));
                --index;
                --numDims;
            }
        }
    }
}

void Tensor::clear() noexcept
{
    _dims.clear();
    _data.clear();
}

std::ostream& operator<<(std::ostream& stream, const Tensor& tensor)
{
    const auto& dims = tensor.getDims();
    std::vector<std::size_t> steps(dims.size());
Gustavo Valiente's avatar
Gustavo Valiente committed
507
    std::partial_sum(dims.rbegin(), dims.rend(), steps.rbegin(), std::multiplies<std::size_t>());
Gustavo Valiente's avatar
Gustavo Valiente committed
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541

    size_t count = 0;

    for(auto value : tensor.getData())
    {
        for(std::size_t step : steps)
        {
            if(count % step == 0)
            {
                stream << '[';
            }
        }

        stream << value;
        ++count;

        for(std::size_t step : steps)
        {
            if(count % step == 0)
            {
                stream << ']';
            }
        }

        if(count != steps[0])
        {
            stream << ", ";
        }
    }

    return stream;
}

}