Tensor

最新推荐文章于 2024-04-12 15:53:09 发布

Calcular

最新推荐文章于 2024-04-12 15:53:09 发布

阅读量647

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/Calcular/article/details/81714375

版权

深度学习专栏收录该内容

5 篇文章 0 订阅

订阅专栏

tensor.h：

/*
This file is important that must be included
*/
#ifndef TENSOR_H
#define TENSOR_H

#define GPU_EN //Enable GPU or parallel-computing-device


#define STATIC_TENSOR   0  //tensors have their own data
#define VARIABLE_TENSOR 1  //tenosrs have their own data and grad
#define COMPUTE_TENSOR  2  //tensors whose data depend on others


typedef struct Tensor {

    char    name[20];
    char    type;
    int     dim;
    int    *shape;    //cpu ram
    int     len;      //total data floats
    int     device;   //-1:cpu ram, 0~n:gpu ram, indecate where data is
    float  *data;     //in cpu or gpu ram
    //static tensors have attributes above

    float  *grad;     //same shape and place as data
    //variable tensors have attributes above

    int     pn;       //how many parent
    Tensor **p;       //parent's ptr
    int     para[12]; //compute parameters
    int(*gendata)(Tensor* my); //compute my data accroding to parent's data
    int(*gengrad)(Tensor* my); //accumulate each parent's grad accroding to their data and my grad 
    //compute tensors have attributes and methods above

}Tensor;


void    initptr_t (Tensor* T); //T=malloc_t();initptr_t(T);
Tensor* malloc_t  (int device, const char *name, int type, int dim, ...);
void    free_t    (Tensor *T);
int     reshape_t (Tensor *T, int dim, ...);
int     _reshape_t(Tensor *T, int *shape, int dim, int len);
int     memcpy_t  (Tensor *dst, Tensor *src); //static:data  variable:data,grad  compute:data,grad
void    print_t   (Tensor *T);
void    printshape(Tensor *T);
void    printinfo (Tensor *T);
int     reset_gpu (int device); //after all processes finished this must be called
int     malloc_tp (Tensor *T, int pn, ...); //allocate parents if T is compute tensor
Tensor* copy_t    (int device, const char *name, Tensor *src);  //copy tensor T, specify a device
int     memcpy_tdata(Tensor *T, const float *src, int srclen);
int     memcpy_tgrad(Tensor *T, const float *src, int srclen);


void    analyze_t (Tensor *T); //show all relationship
int     forward_t (Tensor *T); //generate data
void    zerograd_t(Tensor *T); //zero grad, not fast
int     backward_t(Tensor *T); //generate grad
void    deepfree_t(Tensor *T); //free all relationship


/*
this method malloc tensor array, should be freed after use:
Tensor** LT=all_tensors(T,FALSE,TRUE,FALSE,&len);  free(LT);
every pointer in LT is copyed from original pointer
*/
Tensor** all_tensors(Tensor *T, int _static, int _variable, int _compute, int *len);


#define ROUND(X) (int)(X+0.5)
#define CEIL(X)  (int)(X+0.99999)
#define FLOOR(X) (int)(X-0.99999)+1
void _shuffle(int * L, int len);


#endif

tensor.c：

#include "stdafx.h"
#include "tensor.h"
#include "tensor_sq.h"


#ifdef GPU_EN

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

//int GPU_SETDEVICE(int device)
#define GPU_SETDEVICE cudaSetDevice 

//int GPU_MALLOC(void **devPtr, size_t size)
//implement: *devPtr=(void*)allocated_ptr
#define GPU_MALLOC    cudaMalloc

//void GPU_FREE(void *devPtr)
#define GPU_FREE      cudaFree

//int GPU_MEMCPY(void *dst, void *src, size_t size, int kind)
#define GPU_MEMCPY    cudaMemcpy
#define GPU_MEMCPY_KIND_D2H cudaMemcpyDeviceToHost
#define GPU_MEMCPY_KIND_H2D cudaMemcpyHostToDevice
#define GPU_MEMCPY_KIND_D2D cudaMemcpyDeviceToDevice

//int GPU_RESET() to destory all allocations
#define GPU_RESET     cudaDeviceReset

#endif


void initptr_t(Tensor* T)
{
    int i;
    for (i = 0; i < 20; i++) T->name[i] = 0;
    T->type = -1;
    T->dim = 0;
    T->shape = NULL;
    T->len = 0;
    T->device = -1;
    T->data = NULL;
    T->grad = NULL;
    T->pn = 0;
    T->p = NULL;
    for (i = 0; i < 12; i++) T->para[i] = 0;
    T->gendata = NULL;
    T->gengrad = NULL;
}


Tensor* malloc_t(int device, const char *name, int type, int dim, ...)
{
    int i, d, len = 1, ret;
    Tensor *T;
    va_list vl;
    //malloc tensor and shape
    T = (Tensor*)malloc(sizeof(Tensor)); initptr_t(T);
    T->shape = (int*)malloc(dim * sizeof(int));
    //init tensor except data and grad
    strcpy_s(T->name, name);
    T->type = type;
    va_start(vl, dim);
    for (i = 0; i < dim; i++)
    {
        d = va_arg(vl, int);
        T->shape[i] = d;
        len *= d;
    }
    va_end(vl);
    T->dim = dim;
    T->len = len;
    T->device = device;
    //malloc data and grad
    if (device < 0) { //cpu
        T->data = (float*)malloc(len * sizeof(float));
        if (type != STATIC_TENSOR) T->grad = (float*)malloc(len * sizeof(float));
    }
    else { //gpu
#ifdef GPU_EN
        //choose gpu
        ret = GPU_SETDEVICE(device);
        if (ret) {
            fprintf(stderr, "GPU_SETDEVICE failed!");
            free(T->shape);
            free(T);
            return NULL;
        }
        //malloc
        ret = GPU_MALLOC((void**)&T->data, len * sizeof(float));
        if (ret) {
            fprintf(stderr, "GPU_MALLOC failed!");
            GPU_FREE(T->data);
            free(T->shape);
            free(T);
            return NULL;
        }
        if (type != STATIC_TENSOR) {
            ret = GPU_MALLOC((void**)&T->grad, len * sizeof(float));
            if (ret) {
                fprintf(stderr, "GPU_MALLOC failed!");
                GPU_FREE(T->grad);
                GPU_FREE(T->data);
                free(T->shape);
                free(T);
                return NULL;
            }
        }
#else
        free(T->shape);
        free(T);
        return NULL;
#endif
    }
    return T;
}


void free_t(Tensor *T)
{
    int ret;
    if (T == NULL) return;
    if (T->device < 0) { //cpu
        free(T->shape);
        free(T->data);
        free(T->grad);
        free(T->p);
        free(T);
        T = NULL;
    }
    else { //gpu
#ifdef GPU_EN
        //choose gpu
        ret = GPU_SETDEVICE(T->device);
        if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return; }
        //free
        free(T->shape);
        GPU_FREE(T->data);
        if (T->type != STATIC_TENSOR) {
            GPU_FREE(T->grad);
        }
        free(T->p);
        free(T);
        T = NULL;
#endif
    }
}


int reshape_t(Tensor *T, int dim, ...)
{
    int i, d, len = 1;
    va_list vl;
    if (T == NULL || T->shape == NULL) return -1;
    va_start(vl, dim);
    for (i = 0; i < dim; i++)
    {
        d = va_arg(vl, int);
        len *= d;
    }
    va_end(vl);
    if (T->len != len) return -1;
    T->dim = dim;
    T->len = len;
    free(T->shape);
    T->shape = (int*)malloc(dim * sizeof(int));
    va_start(vl, dim);
    for (i = 0; i < dim; i++)
    {
        d = va_arg(vl, int);
        T->shape[i] = d;
    }
    va_end(vl);
    return 0;
}


int _reshape_t(Tensor *T, int *shape, int dim,int len)
{
    int i;
    if (T == NULL || T->shape == NULL) return -1;
    T->dim = dim;
    T->len = len;
    free(T->shape);
    T->shape = (int*)malloc(dim * sizeof(int));
    for (i = 0; i < dim; i++)
        T->shape[i] = shape[i];
    return 0;
}


int memcpy_t(Tensor *dst, Tensor *src)
{
    int i, ret = 0, flag = 1;
    float *data_buffer = NULL;
    float *grad_buffer = NULL;
    if (dst == NULL || src == NULL || dst->data == NULL || src->data == NULL) return -1;
    if (dst->len != src->len) return -1; //same len
    if (dst->type != src->type) return -1; //same type
    if (dst->device < 0 && src->device < 0) { //cpu<-cpu
        for (i = 0; i < dst->len; i++) dst->data[i] = src->data[i];
        if (src->type != STATIC_TENSOR)
            for (i = 0; i < dst->len; i++) dst->grad[i] = src->grad[i];
        flag = 0;
    }
#ifdef GPU_EN
    else if (dst->device < 0 && src->device >= 0) { //cpu<-gpu
        //choose gpu
        ret = GPU_SETDEVICE(src->device);
        if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; }
        //copy data
        ret = GPU_MEMCPY(dst->data, src->data, src->len * sizeof(float), GPU_MEMCPY_KIND_D2H);
        if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
        //copy grad
        if (src->type != STATIC_TENSOR) {
            ret = GPU_MEMCPY(dst->grad, src->grad, src->len * sizeof(float), GPU_MEMCPY_KIND_D2H);
            if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
        }
    }
    else if (dst->device >= 0 && src->device < 0) { //gpu<-cpu
        //choose gpu
        ret = GPU_SETDEVICE(dst->device);
        if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; }
        //copy data
        ret = GPU_MEMCPY(dst->data, src->data, src->len * sizeof(float), GPU_MEMCPY_KIND_H2D);
        if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
        //copy grad
        if (src->type != STATIC_TENSOR) {
            ret = GPU_MEMCPY(dst->grad, src->grad, src->len * sizeof(float), GPU_MEMCPY_KIND_H2D);
            if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
        }
    }
    else { //gpu<-gpu;
        if (dst->device == src->device) { //same device
            //choose gpu
            ret = GPU_SETDEVICE(src->device);
            if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; }
            //copy data
            ret = GPU_MEMCPY(dst->data, src->data, src->len * sizeof(float), GPU_MEMCPY_KIND_D2D);
            if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
            //copy grad
            if (src->type != STATIC_TENSOR) {
                ret = GPU_MEMCPY(dst->grad, src->grad, src->len * sizeof(float), GPU_MEMCPY_KIND_D2D);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
            }
        }
        else { //different device
            if (src->type == STATIC_TENSOR) {
                data_buffer = (float*)malloc(src->len * sizeof(float));
                //choose src gpu
                ret = GPU_SETDEVICE(src->device);
                if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; free(data_buffer); }
                //copy data to buffer
                ret = GPU_MEMCPY(data_buffer, src->data, src->len * sizeof(float), GPU_MEMCPY_KIND_D2H);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; free(data_buffer); }
                //choose dst gpu
                ret = GPU_SETDEVICE(dst->device);
                if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; free(data_buffer); }
                //copy buffer to dst gpu
                ret = GPU_MEMCPY(dst->data, data_buffer, src->len * sizeof(float), GPU_MEMCPY_KIND_H2D);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; free(data_buffer); }
                free(data_buffer);
            }
            else {
                data_buffer = (float*)malloc(src->len * sizeof(float));
                grad_buffer = (float*)malloc(src->len * sizeof(float));
                //choose src gpu
                ret = GPU_SETDEVICE(src->device);
                if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; free(data_buffer); free(grad_buffer); }
                //copy data,grad to buffer
                ret = GPU_MEMCPY(data_buffer, src->data, src->len * sizeof(float), GPU_MEMCPY_KIND_D2H);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; free(data_buffer); free(grad_buffer); }
                ret = GPU_MEMCPY(grad_buffer, src->grad, src->len * sizeof(float), GPU_MEMCPY_KIND_D2H);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; free(data_buffer); free(grad_buffer); }
                //choose dst gpu
                ret = GPU_SETDEVICE(dst->device);
                if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; free(data_buffer); free(grad_buffer); }
                //copy buffer to dst gpu
                ret = GPU_MEMCPY(dst->data, data_buffer, src->len * sizeof(float), GPU_MEMCPY_KIND_H2D);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; free(data_buffer); free(grad_buffer); }
                ret = GPU_MEMCPY(dst->grad, grad_buffer, src->len * sizeof(float), GPU_MEMCPY_KIND_H2D);
                if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; free(data_buffer); free(grad_buffer); }
                free(data_buffer);
                free(grad_buffer);
            }
        }
    }
#else
    if (flag) return -1;
#endif
    return ret;
}


void print_t(Tensor *T)
{
    int i, ct, dtp, len;
    Tensor *Tp = NULL;
    if (T == NULL || T->data == NULL) return;
    len = T->len;
    dtp = T->shape[T->dim - 1];
    printinfo(T);
    printf("\n");
    if (T->device < 0) {
        ct = 0;
        printf("data:\n");
        while (ct < len) {
            for (i = 0; i < dtp; i++) {
                printf("%-11.6f", T->data[ct++]);
            }
            printf("\n");
        }
        if (T->type != STATIC_TENSOR) {
            ct = 0;
            printf("grad:\n");
            while (ct < len) {
                for (i = 0; i < dtp; i++) {
                    printf("%-11.6f", T->grad[ct++]);
                }
                printf("\n");
            }
        }
    }
    else {
        Tp = copy_t(-1, "", T);
        if (memcpy_t(Tp, T)) {
            free_t(Tp);
            return;
        }
        ct = 0;
        printf("data:\n");
        while (ct < len) {
            for (i = 0; i < dtp; i++) {
                printf("%-11.6f", Tp->data[ct++]);
            }
            printf("\n");
        }
        if (T->type != STATIC_TENSOR) {
            ct = 0;
            printf("grad:\n");
            while (ct < len) {
                for (i = 0; i < dtp; i++) {
                    printf("%-11.6f", Tp->grad[ct++]);
                }
                printf("\n");
            }
        }
        free_t(Tp);
    }
    printf("\n");
}


void printshape(Tensor * T)
{
    int i;
    if (T == NULL) return;
    printf("(");
    for (i = 0; i < T->dim; i++) {
        printf("%d", T->shape[i]);
        if (i != T->dim - 1) printf(",");
    }
    printf(")");
}


void printinfo(Tensor * T)
{
    if (T == NULL) return;
    printf("%s(%d", T->name, T->device);
    if (T->type == STATIC_TENSOR) printf("S");
    else if (T->type == VARIABLE_TENSOR) printf("V");
    else if (T->type == COMPUTE_TENSOR) printf("C");
    printf(")");
    printshape(T);
}


int reset_gpu(int device)
{
    int ret = 0;
#ifdef GPU_EN
    //choose gpu
    ret = GPU_SETDEVICE(device);
    if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; }
    //reset
    ret = GPU_RESET();
    if (ret) { fprintf(stderr, "GPU_RESET failed!"); return ret; }
#else
    return -1;
#endif
    return ret;
}


int malloc_tp(Tensor *T, int pn, ...)
{
    int i;
    va_list vl;
    if (T == NULL) return -1;
    if (T->type != COMPUTE_TENSOR) return -1;
    T->pn = pn;
    T->p = (Tensor**)malloc(pn * sizeof(Tensor**));
    va_start(vl, pn);
    for (i = 0; i < pn; i++)
    {
        T->p[i] = va_arg(vl, Tensor*);
    }
    va_end(vl);
    return 0;
}


Tensor* copy_t(int device, const char *name, Tensor *src)
{
    int dim, i;
    Tensor *T = NULL;
    if (src == NULL || src->data == NULL) return NULL;
    T = malloc_t(device, name, src->type, 1, src->len);
    if (T == NULL) return NULL;
    memcpy_t(T, src);
    dim = src->dim;
    free(T->shape);
    T->shape = (int*)malloc(dim * sizeof(int));
    for (i = 0; i < dim; i++) {
        T->shape[i] = src->shape[i];
    }
    return T;
}


int memcpy_tdata(Tensor *T, const float * src, int srclen)
{
    int ret = 0;
    if (T == NULL || T->data == NULL) return -1;
    if (srclen != T->len) return -1;
    if (T->device < 0) { //cpu
        memcpy_s(T->data, T->len * sizeof(float), src, srclen * sizeof(float));
    }
    else { //gpu
#ifdef GPU_EN
        //choose gpu
        ret = GPU_SETDEVICE(T->device);
        if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; }
        //copy data
        ret = GPU_MEMCPY(T->data, src, srclen * sizeof(float), GPU_MEMCPY_KIND_H2D);
        if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
#else
        return -1;
#endif
    }
    return ret;
}


int memcpy_tgrad(Tensor *T, const float *src, int srclen)
{
    int ret = 0;
    if (T == NULL || T->grad == NULL) return -1;
    if (srclen != T->len) return -1;
    if (T->device < 0) { //cpu
        memcpy_s(T->grad, T->len * sizeof(float), src, srclen * sizeof(float));
    }
    else { //gpu
#ifdef GPU_EN
        //choose gpu
        ret = GPU_SETDEVICE(T->device);
        if (ret) { fprintf(stderr, "GPU_SETDEVICE failed!"); return ret; }
        //copy data
        ret = GPU_MEMCPY(T->grad, src, srclen * sizeof(float), GPU_MEMCPY_KIND_H2D);
        if (ret) { fprintf(stderr, "GPU_MEMCPY failed!"); return ret; }
#else
        return -1;
#endif
    }
    return ret;
}


void analyze_t(Tensor *T)
{
    int i;
    Tensor *Tp;
    tensor_sq queue_t;
    tensor_sq stack_t;
    if (T == NULL) return;
    tsq_malloc(&queue_t); 
    tsq_malloc(&stack_t);
    tq_init(&queue_t);
    ts_init(&stack_t);
    tq_enqueue(&queue_t, T);
    while (!tq_empty(&queue_t)) {
        Tp = tq_dequeue(&queue_t);
        ts_push(&stack_t, Tp);
        if (Tp->p != NULL) {
            printinfo(Tp); printf(":\n");
            for (i = 0; i < Tp->pn; i++) {
                printinfo(Tp->p[i]); if (i != Tp->pn - 1)printf(", ");
                tq_enqueue(&queue_t, Tp->p[i]);
            }
            printf("\n\n");
        }
    }
    printf("topological sequence:\n");
    while (!ts_empty(&stack_t)) {
        Tp = ts_pop(&stack_t);
        printinfo(Tp); printf(" -> ");
    }
    printf("\n\n");
    tsq_free(&queue_t);
    tsq_free(&stack_t);
}


int forward_t(Tensor * T)
{
    int i, ret = 0;
    Tensor *Tp;
    tensor_sq queue_t;
    tensor_sq stack_t;
    if (T == NULL || T->type != COMPUTE_TENSOR) return;
    tsq_malloc(&queue_t);
    tsq_malloc(&stack_t);
    tq_init(&queue_t);
    ts_init(&stack_t);
    tq_enqueue(&queue_t, T);
    while (!tq_empty(&queue_t)) {
        Tp = tq_dequeue(&queue_t);
        ts_push(&stack_t, Tp);
        if (Tp->p != NULL)
            for (i = 0; i < Tp->pn; i++)
                tq_enqueue(&queue_t, Tp->p[i]);
    }
    while (!ts_empty(&stack_t)) {
        Tp = ts_pop(&stack_t);
        if (Tp->gendata != NULL) {
            ret = Tp->gendata(Tp);
            if (ret) { tsq_free(&queue_t); tsq_free(&stack_t); return -1; }
        }
    }
    tsq_free(&queue_t);
    tsq_free(&stack_t);
    return ret;
}


void zerograd_t(Tensor * T)
{
    int i;
    Tensor *Tp;
    tensor_sq queue_t;
    float *buffer = NULL;
    if (T == NULL) return;
    tsq_malloc(&queue_t);
    tq_init(&queue_t);
    tq_enqueue(&queue_t, T);
    while (!tq_empty(&queue_t)) {
        Tp = tq_dequeue(&queue_t);
        if (Tp->p != NULL)
            for (i = 0; i < Tp->pn; i++)
                tq_enqueue(&queue_t, Tp->p[i]);
        if (Tp->grad != NULL) {
            buffer = (float*)malloc(Tp->len * sizeof(float));
            for (i = 0; i < Tp->len; i++) buffer[i] = 0;
            memcpy_tgrad(Tp, buffer, Tp->len);
            free(buffer);
        }
    }
    tsq_free(&queue_t);
}


int backward_t(Tensor * T)
{
    int i, ret = 0;
    float *fbuffer;
    Tensor *Tp;
    tensor_sq queue_t;
    if (T == NULL || T->grad == NULL) return;
    //init grad
    fbuffer = (float*)malloc(T->len * sizeof(float));
    for (i = 0; i < T->len; i++) fbuffer[i] = 1.0;
    memcpy_tgrad(T, fbuffer, T->len);
    free(fbuffer);
    //backward
    tsq_malloc(&queue_t);
    tq_init(&queue_t);
    tq_enqueue(&queue_t, T);
    while (!tq_empty(&queue_t)) {
        Tp = tq_dequeue(&queue_t);
        if (Tp->p != NULL)
            for (i = 0; i < Tp->pn; i++)
                tq_enqueue(&queue_t, Tp->p[i]);
        if (Tp->gengrad != NULL) {
            ret = Tp->gengrad(Tp);
            if (ret) { tsq_free(&queue_t);  return -1; }
        }
    }
    tsq_free(&queue_t);
    return ret;
}


void deepfree_t(Tensor *T)
{
    int i;
    Tensor *Tp;
    tensor_sq queue_t;
    if (T == NULL) return;
    tsq_malloc(&queue_t);
    tq_init(&queue_t);
    tq_enqueue(&queue_t, T);
    while (!tq_empty(&queue_t)) {
        Tp = tq_dequeue(&queue_t);
        if (Tp->p != NULL)
            for (i = 0; i < Tp->pn; i++)
                tq_enqueue(&queue_t, Tp->p[i]);
        free_t(Tp);
    }
    tsq_free(&queue_t);
}


Tensor** all_tensors(Tensor *T, int _static, int _variable, int _compute, int *len)
{
    int i, ct = 0;
    Tensor *Tp;
    Tensor **LT;
    tensor_sq queue_t;
    tensor_sq stack_t;
    if (T == NULL) return NULL;
    tsq_malloc(&queue_t);
    tsq_malloc(&stack_t);
    tq_init(&queue_t);
    ts_init(&stack_t);
    tq_enqueue(&queue_t, T);
    while (!tq_empty(&queue_t)) {
        Tp = tq_dequeue(&queue_t);
        ts_push(&stack_t, Tp);
        if (Tp->p != NULL)
            for (i = 0; i < Tp->pn; i++)
                tq_enqueue(&queue_t, Tp->p[i]);
    }
    LT = (Tensor**)malloc(ts_getcount(&stack_t) * sizeof(Tensor*));
    while (!ts_empty(&stack_t)) {
        Tp = ts_pop(&stack_t);
        if ((Tp->type == STATIC_TENSOR) && (_static))
            LT[ct++] = Tp;
        else if ((Tp->type == VARIABLE_TENSOR) && (_variable))
            LT[ct++] = Tp;
        else if ((Tp->type == COMPUTE_TENSOR) && (_compute))
            LT[ct++] = Tp;
    }
    len[0] = ct;
    tsq_free(&queue_t);
    tsq_free(&stack_t);
    return LT;
}


void _shuffle(int * L, int len)
{
    int i, p, t;
    for (i = len - 1; i > 0; i--) {
        p = rand() % i;
        t = L[i];
        L[i] = L[p];
        L[p] = t;
    }
}

tensor_sq.h：

#ifndef TENSOR_SQ_H
#define TENSOR_SQ_H

#include "../tensor.h"

#define SQ_BUFFER_BYTES 4096


typedef struct tensor_sq {

    unsigned char *buffer;

    int tq_datasize;
    int tq_front;
    int tq_rear;
    int tq_count;
    int tq_fastfg;

    int ts_datasize;
    int ts_top;
    int ts_fastfg;

}tensor_sq;


void tsq_malloc(tensor_sq *SQ);
void tsq_free(tensor_sq *SQ);

//queue
void tq_init(tensor_sq *Q);
int  tq_enqueue(tensor_sq *Q, Tensor *T);
Tensor* tq_dequeue(tensor_sq *Q);
int  tq_empty(tensor_sq *Q);
int  tq_getcount(tensor_sq *Q);

//stack
void ts_init(tensor_sq *S);
int  ts_push(tensor_sq *S, Tensor *T);
Tensor* ts_pop(tensor_sq *S);
Tensor* ts_gettop(tensor_sq *S);
int  ts_empty(tensor_sq *S);
int  ts_getcount(tensor_sq *S);


#endif

tensor_sq.c：

#include "stdafx.h"
#include "tensor_sq.h"
#include "../tensor.h"


void tsq_malloc(tensor_sq *SQ)
{
    SQ->buffer = (unsigned char*)malloc(SQ_BUFFER_BYTES * sizeof(char));
}

void tsq_free(tensor_sq *SQ)
{
    free(SQ->buffer);
}

void tq_init(tensor_sq *Q)
{
    Q->tq_datasize = sizeof(Tensor*);
    Q->tq_front =0;
    Q->tq_rear =0;
    Q->tq_count =0;
    Q->tq_fastfg = SQ_BUFFER_BYTES % Q->tq_datasize + Q->tq_datasize;
}

int tq_enqueue(tensor_sq *Q, Tensor *T)
{
    if ((Q->tq_rear + Q->tq_fastfg) % (SQ_BUFFER_BYTES) == Q->tq_front) return -1;
    memcpy(Q->buffer + Q->tq_rear, &T, Q->tq_datasize);
    Q->tq_rear =(Q->tq_rear + Q->tq_datasize)%(SQ_BUFFER_BYTES);
    Q->tq_count++;
    return 0;
}

Tensor* tq_dequeue(tensor_sq *Q)
{
    Tensor *T;
    if (Q->tq_rear == Q->tq_front) return NULL;
    memcpy(&T, Q->buffer + Q->tq_front, Q->tq_datasize);
    Q->tq_front = (Q->tq_front + Q->tq_datasize) % (SQ_BUFFER_BYTES);
    Q->tq_count--;
    return T;
}

int tq_empty(tensor_sq *Q)
{
    return (Q->tq_rear == Q->tq_front);
}

int tq_getcount(tensor_sq *Q)
{
    return Q->tq_count;
}

void ts_init(tensor_sq *S)
{
    S->ts_datasize = sizeof(Tensor*);
    S->ts_top = 0;
    S->ts_fastfg = SQ_BUFFER_BYTES - S->ts_datasize;
}

int ts_push(tensor_sq *S, Tensor *T)
{
    if (S->ts_top > S->ts_fastfg) return -1;
    memcpy(S->buffer + S->ts_top, &T, S->ts_datasize);
    S->ts_top += S->ts_datasize;
    return 0;
}

Tensor* ts_pop(tensor_sq *S)
{
    Tensor *T;
    if (S->ts_top <= 0) return NULL;
    memcpy(&T, S->buffer + S->ts_top - S->ts_datasize, S->ts_datasize);
    S->ts_top -= S->ts_datasize;
    return T;
}

Tensor* ts_gettop(tensor_sq *S)
{
    Tensor *T;
    if (S->ts_top <= 0) return NULL;
    memcpy(&T, S->buffer + S->ts_top - S->ts_datasize, S->ts_datasize);
    return T;
}

int ts_empty(tensor_sq *S)
{
    return (S->ts_top <= 0);
}

int ts_getcount(tensor_sq *S)
{
    return (S->ts_top / S->ts_datasize);
}

Calcular

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Tensor

tensor.h：/*This file is important that must be included*/#ifndef TENSOR_H#define TENSOR_H#define GPU_EN //Enable GPU or parallel-computing-device#define STATIC_TENSOR 0 //tensors have the...
复制链接

扫一扫