【codeBase_C++】C++ 读取npy文件

C++ 读取npy文件

npy_array.h

#ifndef __NPY_ARRAY_H__
#define __NPY_ARRAY_H__

#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>

#define NPY_ARRAY_MAX_DIMENSIONS 8

typedef struct _npy_array_t {
    char             *data;
    size_t            shape[ NPY_ARRAY_MAX_DIMENSIONS ];
    int32_t           ndim;
    char              endianness;
    char              typechar;
    size_t            elem_size;
    bool              fortran_order;
} npy_array_t;


npy_array_t*      npy_array_load       ( const char *filename );
void              npy_array_dump       ( const npy_array_t *m );
void              npy_array_save       ( const char *filename, const npy_array_t *m );
void              npy_array_free       ( npy_array_t *m );

/* Convenient functions - I'll make them public for now, but use these with care
   as I might remove these from the exported list of public functions. */
size_t            npy_array_calculate_datasize ( const npy_array_t *m );
size_t            npy_array_get_header         ( const npy_array_t *m,  char *buf );

static inline int64_t read_file( void *fp, void *buffer, uint64_t nbytes )
{
    return (int64_t) fread( buffer, 1, nbytes, (FILE *) fp );
}

/* _read_matrix() might be public in the future as a macro or something.
   Don't use it now as I will change name of it in case I make it public. */
typedef int64_t (*reader_func)( void *fp, void *buffer, uint64_t nbytes );
npy_array_t *     _read_matrix( void *fp, reader_func read_func );
#endif  /* __NPY_ARRAY_H__ */

npy_array.c

#include "npy_array.h"

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

#define NPY_ARRAY_MAGIC_STRING {0x93,'N','U','M','P','Y'}
#define NPY_ARRAY_MAJOR_VERSION_IDX 6
#define NPY_ARRAY_MINOR_VERSION_IDX 7

#define NPY_ARRAY_HEADER_LENGTH 2
#define NPY_ARRAY_HEADER_LENGTH_LOW_IDX 8
#define NPY_ARRAY_HEADER_LENGTH_HIGH_IDX 9

#define NPY_ARRAY_SHAPE_BUFSIZE 512

#define NPY_ARRAY_DICT_BUFSIZE 1024
#define NPY_ARRAY_MAGIC_LENGTH 6
#define NPY_ARRAY_VERSION_HEADER_LENGTH 4
#define NPY_ARRAY_PREHEADER_LENGTH (NPY_ARRAY_MAGIC_LENGTH + NPY_ARRAY_VERSION_HEADER_LENGTH)

size_t npy_array_get_header( const npy_array_t *m,  char *buf )
{
    char *p = buf;

    static char magic[] = NPY_ARRAY_MAGIC_STRING;
    memcpy( p, magic, NPY_ARRAY_MAGIC_LENGTH );
    p += NPY_ARRAY_MAGIC_LENGTH;

    static char version[NPY_ARRAY_HEADER_LENGTH] = { 1, 0 };
    memcpy( p, version, NPY_ARRAY_HEADER_LENGTH );
    p += NPY_ARRAY_HEADER_LENGTH;

    char dict[NPY_ARRAY_DICT_BUFSIZE] = { '\0' };
    char shape[NPY_ARRAY_SHAPE_BUFSIZE] = { '\0' };
    char *ptr = shape;

    for( int i = 0; i < m->ndim; i++)
        ptr += sprintf_s(ptr, "%d,", (int) m->shape[i]);
    assert( ptr - shape < NPY_ARRAY_SHAPE_BUFSIZE );
    
#define HEADER_LEN 108
    /* WARNING: This code looks inocent and simple, but it was really a struggle. Do not touch unless you like pain! */
    size_t len = sprintf_s(dict, "{'descr': '%c%c%zu', 'fortran_order': %s, 'shape': (%s), }",
            m->endianness,
            m->typechar,
            m->elem_size,
            m->fortran_order ? "True": "False",
            shape );

    assert( len < HEADER_LEN ); /* FIXME: This can go wrong for really big arrays with a lot of dimensions */
    len += sprintf_s( dict + len, "%*s\n", (int) (HEADER_LEN - len + NPY_ARRAY_PREHEADER_LENGTH - 1), " " );

    const uint16_t _len = (uint16_t) (len);
    memcpy( p, &_len, sizeof(uint16_t));
    p += sizeof(uint16_t);
    memcpy( p, dict, len);

    return len + NPY_ARRAY_PREHEADER_LENGTH;
#undef HEADER_LEN
}

size_t npy_array_calculate_datasize( const npy_array_t *m )
{
    size_t n_elements = 1;
    int idx = 0;
    while ( m->shape[ idx ] > 0 && (idx < m->ndim) )
        n_elements *= m->shape[ idx++ ];
    return n_elements * m->elem_size;
}


static char *find_header_item( const char *item, const char *header)
{
    char *s = strstr(header, item);
    return s ? s + strlen(item) : NULL;
}

static inline char endianness(){
    int val = 1;
    return (*(char *)&val == 1) ? '<' : '>';
}

/* consider if this function should be exported to the end user */
npy_array_t * _read_matrix( void *fp, reader_func read_func )
{
    char fixed_header[NPY_ARRAY_PREHEADER_LENGTH + 1];
    size_t chk = read_func( fp, fixed_header, NPY_ARRAY_PREHEADER_LENGTH );
    if( chk != NPY_ARRAY_PREHEADER_LENGTH ){
        fprintf(stderr, "Cannot read pre header bytes.\n");
        return NULL;
    }
    for( int i = 0; i < NPY_ARRAY_MAGIC_LENGTH; i++ ){
        static char magic[] = NPY_ARRAY_MAGIC_STRING;
        if( magic[i] != fixed_header[i] ){
            fprintf(stderr,"File format not recognised as numpy array.\n");
            return NULL;
        }
    }
    char major_version = fixed_header[NPY_ARRAY_MAJOR_VERSION_IDX];
    char minor_version = fixed_header[NPY_ARRAY_MINOR_VERSION_IDX];

    if(major_version != 1){
        fprintf(stderr,"Wrong numpy save version. Expected version 1.x This is version %d.%d\n", (int)major_version, (int)minor_version);
        return NULL;
    }

    /* FIXME! This may fail for version 2 and it may also fail on big endian systems.... */
    uint16_t header_length = 0;
    header_length |= fixed_header[NPY_ARRAY_HEADER_LENGTH_LOW_IDX];
    header_length |= fixed_header[NPY_ARRAY_HEADER_LENGTH_HIGH_IDX] << 8;   /* Is a byte always 8 bit? */
    
    char * header; 
    header= (char *) malloc((header_length + 1) * sizeof(char));
    chk = read_func( fp, header, header_length );
    if( chk != header_length){
        fprintf(stderr, "Cannot read header. %d bytes.\n", header_length);
        return NULL;
    }
    header[header_length] = '\0';
#if VERBOSE
    printf("Header length: %d\nHeader dictionary: \"%s\"\n", header_length, header);
#endif

    npy_array_t *m = calloc( 1, sizeof *m );
    if ( !m ){
        fprintf(stderr, "Cannot allocate memory dor matrix structure.\n");
        return NULL;
    }

    char *descr   = find_header_item("'descr': '", header);
    assert(descr);
    if ( strchr("<>|", descr[0] ) ){
        m->endianness = descr[0];
        if( descr[0] != '|' && ( descr[0] != endianness())){
            fprintf(stderr, "Warning: Endianess of system and file does not match.");
        }
    } else {
        fprintf(stderr,"Warning: Endianness not found.");
    }

    /* FIXME Potential bug: Is the typechar always one byte? */
    m->typechar = descr[1];

    /* FIXME: Check the **endptr (second argument which is still NULL here)*/
    m->elem_size = (size_t) strtoll( &descr[2], NULL, 10);
    assert( m->elem_size > 0 );

#if 0
    if(descr[0] == '<') printf("Little Endian\n");
    if(descr[0] == '>') printf("Big Endian (Be carefull)\n");
    if(descr[0] == '|') printf("Not relevant endianess\n");

    if(descr[1] == 'f') printf("float number\n");
    if(descr[1] == 'i') printf("integer number\n");

    printf("each item is %d bytes.\n", (int) m->elem_size );
#endif

    /* FIXME: This only works if there is one and only one leading spaces. */
    char *fortran = find_header_item("'fortran_order': ", header);
    assert( fortran );

    if(strncmp(fortran, "True", 4) == 0 )
        m->fortran_order = true;
    else if(strncmp(fortran, "False", 5) == 0 )
        m->fortran_order = false;
    else
        fprintf(stderr, "Warning: No matrix order found, assuming fortran_order=False");

    /* FIXME: This only works if there is one and only one leading spaces. */
    char *shape   = find_header_item("'shape': ", header);
    assert(shape);
    while (*shape != ')' ) {
        if( !isdigit( (int) *shape ) ){
            shape++;
            continue;
        }
        m->shape[m->ndim] = strtol( shape, &shape, 10);
        m->ndim++;
        assert( m->ndim < NPY_ARRAY_MAX_DIMENSIONS );
    }

    size_t n_elements = 1;
    int idx = 0;
    while ( m->shape[ idx ] > 0 )
        n_elements *= m->shape[ idx++ ];

#if VERBOSE
    printf("Number of elements: %llu\n", (unsigned long long) n_elements );
#endif

    m->data = malloc( n_elements * m->elem_size );
    if ( !m->data ){
        fprintf(stderr, "Cannot allocate memory for matrix data.\n");
        free( m );
        return NULL;
    }

    chk = read_func( fp, m->data, m->elem_size * n_elements ); /* Can the multiplication overflow? */ 
    if( chk != m->elem_size * n_elements){
        fprintf(stderr, "Could not read all data.\n");
        free( m->data );
        free( m );
        return NULL;
    }
    return m;
}

npy_array_t * npy_array_load( const char *filename )
{
    FILE *fp = fopen(filename, "rb");
    if( !fp ){
        fprintf(stderr,"Cannot open '%s' for reading.\n", filename );
        perror("Error");
        return NULL;
    }

    npy_array_t *m = _read_matrix( fp, &read_file);
    if(!m) { fprintf(stderr, "Cannot read matrix.\n"); }

    fclose(fp);
    return m;
}

void npy_array_dump( const npy_array_t *m )
{
    if(!m){
        fprintf(stderr, "Warning: No matrix found. (%s)\n", __func__);
        return;
    }
    printf("Dimensions   : %d\n", m->ndim);
    printf("Shape        : ( ");
    for( int i = 0; i < m->ndim - 1; i++) printf("%d, ", (int) m->shape[i]);
    printf("%d )\n", (int) m->shape[m->ndim-1]);
    printf("Type         : '%c' ", m->typechar);
    printf("(%d bytes each element)\n", (int) m->elem_size);
    printf("Fortran order: %s\n", m->fortran_order ? "True" : "False" );
    return;
}

void npy_array_save( const char *filename, const npy_array_t *m )
{
    if( !m ){
        fprintf(stderr, "Warning: No matrix found. (%s)\n", __func__);
        return;
    }

    FILE *fp = fopen( filename, "wb");
    if( !fp ){
        fprintf(stderr,"Cannot open '%s' for writing.\n", filename );
        perror("Error");
        return;
    }

    char header[NPY_ARRAY_DICT_BUFSIZE + NPY_ARRAY_PREHEADER_LENGTH] = {'\0'};
    size_t hlen = npy_array_get_header( m, header );

    size_t chk = fwrite( header, 1, hlen, fp );
    if( chk != hlen){
        fprintf(stderr, "Could not write header data.\n");
    }

    size_t datasize = npy_array_calculate_datasize( m );
    chk = fwrite( m->data, 1, datasize, fp );
    if( chk != datasize){
        fprintf(stderr, "Could not write all data.\n");
    }
    fclose(fp);
}

void npy_array_free( npy_array_t *m )
{
    if( !m ){
        fprintf(stderr, "Warning: No matrix found. (%s)\n", __func__);
        return;
    }

    free( m->data );
    free( m );
}

主函数main.cpp

#ifdef __cplusplus
extern "C" 
{
    #include "npy_array.h"
}
#include <iostream>
int main(int argc, char *argv[])
{	
	std::string filePath = dataDir + "/" + fileName;
    npy_array_t *arr = npy_array_load( filePath.c_str() );
    if( !arr )  {printf("nope! No array there!\n"); return -1;}
    //npy_array_dump( arr );
    printf("%s\n",filePath.c_str());
    int rows =  (int) arr->shape[0];
    int cols =  (int) arr->shape[1];
    int totalNumbers = rows * cols;
    float* points = new float[totalNumbers];
    //这里读取的是np.float32,即float数据,如果其他数据,需要改一下数据类型
    
    char* dpoints = reinterpret_cast<char*>(points);
    std::copy(arr->data, arr->data + totalNumbers * sizeof(float), dpoints);
	
	//something points

	free(dpoints);
    npy_array_free( arr );
	return 0;
}

在windows 和 linux下均可正常运行。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

新文观不止

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值