#ifndef HALIDE____data___entropy___deps___qcv_halide_plugin___codegen___conv_____________src___op_gen___aarch64___conv3x3_h
#define HALIDE____data___entropy___deps___qcv_halide_plugin___codegen___conv_____________src___op_gen___aarch64___conv3x3_h
#include <stdint.h>
// Forward declarations of the types used in the interface
// to the Halide pipeline.
//
// Definitions for these structs are below.
// Halide's representation of a multi-dimensional array.
// Halide::Runtime::Buffer is a more user-friendly wrapper
// around this. Its declaration is in HalideBuffer.h
struct halide_buffer_t;
// Metadata describing the arguments to the generated function.
// Used to construct calls to the _argv version of the function.
struct halide_filter_metadata_t;
#ifndef HALIDE_MUST_USE_RESULT
#ifdef __has_attribute
#if __has_attribute(nodiscard)
#define HALIDE_MUST_USE_RESULT [[nodiscard]]
#elif __has_attribute(warn_unused_result)
#define HALIDE_MUST_USE_RESULT __attribute__((warn_unused_result))
#else
#define HALIDE_MUST_USE_RESULT
#endif
#else
#define HALIDE_MUST_USE_RESULT
#endif
#endif
#ifndef HALIDE_FUNCTION_ATTRS
#define HALIDE_FUNCTION_ATTRS
#endif
#ifdef __cplusplus
extern "C" {
#endif
HALIDE_FUNCTION_ATTRS
int conv3x3(struct halide_buffer_t *_input_buffer, struct halide_buffer_t *_mask_buffer, struct halide_buffer_t *_output_buffer);
HALIDE_FUNCTION_ATTRS
int conv3x3_argv(void **args);
HALIDE_FUNCTION_ATTRS
const struct halide_filter_metadata_t *conv3x3_metadata();
#ifdef __cplusplus
} // extern "C"
#endif
// The generated object file that goes with this header
// includes a full copy of the Halide runtime so that it
// can be used standalone. Declarations for the functions
// in the Halide runtime are below.
//
// The runtime is defined using weak linkage, so it is legal
// to link multiple Halide-generated object files together,
// or to clobber any of these functions with your own
// definition.
//
// To generate an object file without a full copy of the
// runtime, use the -no_runtime target flag. To generate a
// standalone Halide runtime to use with such object files
// use the -r flag with any Halide generator binary, e.g.:
// $ ./my_generator -r halide_runtime -o . target=host
#ifndef HALIDE_HALIDERUNTIME_H
#define HALIDE_HALIDERUNTIME_H
#ifndef COMPILING_HALIDE_RUNTIME
#ifdef __cplusplus
#include <cstddef>
#include <cstdint>
#include <cstring>
#else
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#endif
#else
#include "runtime_internal.h"
#endif
#ifdef __cplusplus
// Forward declare type to allow naming typed handles.
// See Type.h for documentation.
template<typename T>
struct halide_handle_traits;
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _MSC_VER
// Note that (for MSVC) you should not use "inline" along with HALIDE_ALWAYS_INLINE;
// it is not necessary, and may produce warnings for some build configurations.
#define HALIDE_ALWAYS_INLINE __forceinline
#define HALIDE_NEVER_INLINE __declspec(noinline)
#else
// Note that (for Posixy compilers) you should always use "inline" along with HALIDE_ALWAYS_INLINE;
// otherwise some corner-case scenarios may erroneously report link errors.
#define HALIDE_ALWAYS_INLINE inline __attribute__((always_inline))
#define HALIDE_NEVER_INLINE __attribute__((noinline))
#endif
#ifndef HALIDE_MUST_USE_RESULT
#ifdef __has_attribute
#if __has_attribute(nodiscard)
// C++17 or later
#define HALIDE_MUST_USE_RESULT [[nodiscard]]
#elif __has_attribute(warn_unused_result)
// Clang/GCC
#define HALIDE_MUST_USE_RESULT __attribute__((warn_unused_result))
#else
#define HALIDE_MUST_USE_RESULT
#endif
#else
#define HALIDE_MUST_USE_RESULT
#endif
#endif
/** \file
*
* This file declares the routines used by Halide internally in its
* runtime. On platforms that support weak linking, these can be
* replaced with user-defined versions by defining an extern "C"
* function with the same name and signature.
*
* When doing Just In Time (JIT) compilation methods on the Func being
* compiled must be called instead. The corresponding methods are
* documented below.
*
* All of these functions take a "void *user_context" parameter as their
* first argument; if the Halide kernel that calls back to any of these
* functions has been compiled with the UserContext feature set on its Target,
* then the value of that pointer passed from the code that calls the
* Halide kernel is piped through to the function.
*
* Some of these are also useful to call when using the default
* implementation. E.g. halide_shutdown_thread_pool.
*
* Note that even on platforms with weak linking, some linker setups
* may not respect the override you provide. E.g. if the override is
* in a shared library and the halide object files are linked directly
* into the output, the builtin versions of the runtime functions will
* be called. See your linker documentation for more details. On
* Linux, LD_DYNAMIC_WEAK=1 may help.
*
*/
// Forward-declare to suppress warnings if compiling as C.
struct halide_buffer_t;
/** Print a message to stderr. Main use is to support tracing
* functionality, print, and print_when calls. Also called by the default
* halide_error. This function can be replaced in JITed code by using
* halide_custom_print and providing an implementation of halide_print
* in AOT code. See Func::set_custom_print.
*/
// @{
extern void halide_print(void *user_context, const char *);
extern void halide_vprint(void *user_context, const char *);
extern void halide_default_print(void *user_context, const char *);
typedef void (*halide_print_t)(void *, const char *);
extern halide_print_t halide_set_custom_print(halide_print_t print);
// @}
/** Halide calls this function on runtime errors (for example bounds
* checking failures). This function can be replaced in JITed code by
* using Func::set_error_handler, or in AOT code by calling
* halide_set_error_handler. In AOT code on platforms that support
* weak linking (i.e. not Windows), you can also override it by simply
* defining your own halide_error.
*/
// @{
extern void halide_error(void *user_context, const char *);
extern void halide_default_error(void *user_context, const char *);
typedef void (*halide_error_handler_t)(void *, const char *);
extern halide_error_handler_t halide_set_error_handler(halide_error_handler_t handler);
// @}
/** Cross-platform mutex. Must be initialized with zero and implementation
* must treat zero as an unlocked mutex with no waiters, etc.
*/
struct halide_mutex {
uintptr_t _private[1];
};
/** Cross platform condition variable. Must be initialized to 0. */
struct halide_cond {
uintptr_t _private[1];
};
/** A basic set of mutex and condition variable functions, which call
* platform specific code for mutual exclusion. Equivalent to posix
* calls. */
//@{
extern void halide_mutex_lock(struct halide_mutex *mutex);
extern void halide_mutex_unlock(struct halide_mutex *mutex);
extern void halide_cond_signal(struct halide_cond *cond);
extern void halide_cond_broadcast(struct halide_cond *cond);
extern void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex);
//@}
/** Functions for constructing/destroying/locking/unlocking arrays of mutexes. */
struct halide_mutex_array;
//@{
extern struct halide_mutex_array *halide_mutex_array_create(int sz);
extern void halide_mutex_array_destroy(void *user_context, void *array);
extern int halide_mutex_array_lock(struct halide_mutex_array *array, int entry);
extern int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry);
//@}
/** Define halide_do_par_for to replace the default thread pool
* implementation. halide_shutdown_thread_pool can also be called to
* release resources used by the default thread pool on platforms
* where it makes sense. See Func::set_custom_do_task and
* Func::set_custom_do_par_for. Should return zero if all the jobs
* return zero, or an arbitrarily chosen return value from one of the
* jobs otherwise.
*/
//@{
typedef int (*halide_task_t)(void *user_context, int task_number, uint8_t *closure);
extern int halide_do_par_for(void *user_context,
halide_task_t task,
int min, int size, uint8_t *closure);
extern void halide_shutdown_thread_pool();
//@}
/** Set a custom method for performing a parallel for loop. Returns
* the old do_par_for handler. */
typedef int (*halide_do_par_for_t)(void *, halide_task_t, int, int, uint8_t *);
extern halide_do_par_for_t halide_set_custom_do_par_for(halide_do_par_for_t do_par_for);
/** An opaque struct representing a semaphore. Used by the task system for async tasks. */
struct halide_semaphore_t {
uint64_t _private[2];
};
/** A struct representing a semaphore and a number of items that must
* be acquired from it. Used in halide_parallel_task_t below. */
struct halide_semaphore_acquire_t {
struct halide_semaphore_t *semaphore;
int count;
};
extern int halide_semaphore_init(struct halide_semaphore_t *, int n);
extern int halide_semaphore_release(struct halide_semaphore_t *, int n);
extern bool halide_semaphore_try_acquire(struct halide_semaphore_t *, int n);
typedef int (*halide_semaphore_init_t)(struct halide_semaphore_t *, int);
typedef int (*halide_semaphore_release_t)(struct halide_semaphore_t *, int);
typedef bool (*halide_semaphore_try_acquire_t)(struct halide_semaphore_t *, int);
/** A task representing a serial for loop evaluated over some range.
* Note that task_parent is a pass through argument that should be
* passed to any dependent taks that are invokved using halide_do_parallel_tasks
* underneath this call. */
typedef int (*halide_loop_task_t)(void *user_context, int min, int extent,
uint8_t *closure, void *task_parent);
/** A parallel task to be passed to halide_do_parallel_tasks. This
* task may recursively call halide_do_parallel_tasks, and there may
* be complex dependencies between seemingly unrelated tasks expressed
* using semaphores. If you are using a custom task system, care must
* be taken to avoid potential deadlock. This can be done by carefully
* respecting the static metadata at the end of the task struct.*/
struct halide_parallel_task_t {
// The function to call. It takes a user context, a min and
// extent, a closure, and a task system pass through argument.
halide_loop_task_t fn;
// The closure to pass it
uint8_t *closure;
// The name of the function to be called. For debugging purposes only.
const char *name;
// An array of semaphores that must be acquired before the
// function is called. Must be reacquired for every call made.
struct halide_semaphore_acquire_t *semaphores;
int num_semaphores;
// The entire range the function should be called over. This range
// may be sliced up and the function called multiple times.
int min, extent;
// A parallel task provides several pieces of metadata to prevent
// unbounded resource usage or deadlock.
// The first is the minimum number of execution contexts (call
// stacks or threads) necessary for the function to run to
// completion. This may be greater than one when there is nested
// parallelism with internal producer-consumer relationships
// (calling the function recursively spawns and blocks on parallel
// sub-tasks that communicate with each other via semaphores). If
// a parallel runtime calls the function when fewer than this many
// threads are idle, it may need to create more threads to
// complete the task, or else risk deadlock due to committing all
// threads to tasks that cannot complete without more.
//
// FIXME: Note that extern stages are assumed to only require a
// single thread to complete. If the extern stage is itself a
// Halide pipeline, this may be an underestimate.
int min_threads;
// The calls to the function should be in serial order from min to min+extent-1, with only
// one executing at a time. If false, any order is fine, and
// concurrency is fine.
bool serial;
};
/** Enqueue some number of the tasks described above and wait for them
* to complete. While waiting, the calling threads assists with either
* the tasks enqueued, or other non-blocking tasks in the task
* system. Note that task_parent should be NULL for top-level calls
* and the pass through argument if this call is being made from
* another task. */
extern int halide_do_parallel_tasks(void *user_context, int num_tasks,
struct halide_parallel_task_t *tasks,
void *task_parent);
/** If you use the default do_par_for, you can still set a custom
* handler to perform each individual task. Returns the old handler. */
//@{
typedef int (*halide_do_task_t)(void *, halide_task_t, int, uint8_t *);
extern halide_do_task_t halide_set_custom_do_task(halide_do_task_t do_task);
extern int halide_do_task(void *user_context, halide_task_t f, int idx,
uint8_t *closure);
//@}
/** The version of do_task called for loop tasks. By default calls the
* loop task with the same arguments. */
// @{
typedef int (*halide_do_loop_task_t)(void *, halide_loop_task_t, int, int, uint8_t *, void *);
extern halide_do_loop_task_t halide_set_custom_do_loop_task(halide_do_loop_task_t do_task);
extern int halide_do_loop_task(void *user_context, halide_loop_task_t f, int min, int extent,
uint8_t *closure, void *task_parent);
//@}
/** Provide an entire custom tasking runtime via function
* pointers. Note that do_task and semaphore_try_acquire are only ever
* called by halide_default_do_par_for and
* halide_default_do_parallel_tasks, so it's only necessary to provide
* those if you are mixing in the default implementations of
* do_par_for and do_parallel_tasks. */
// @{
typedef int (*halide_do_parallel_tasks_t)(void *, int, struct halide_parallel_task_t *,
void *task_parent);
extern void halide_set_custom_parallel_runtime(
halide_do_par_for_t,
halide_do_task_t,
halide_do_loop_task_t,
halide_do_parallel_tasks_t,
halide_semaphore_init_t,
halide_semaphore_try_acquire_t,
halide_semaphore_release_t);
// @}
/** The default versions of the parallel runtime functions. */
// @{
extern int halide_default_do_par_for(void *user_context,
halide_task_t task,
int min, int size, uint8_t *closure);
extern int halide_default_do_parallel_tasks(void *user_context,
int num_tasks,
struct halide_parallel_task_t *tasks,
void *task_parent);
extern int halide_default_do_task(void *user_context, halide_task_t f, int idx,
uint8_t *closure);
extern int halide_default_do_loop_task(void *user_context, halide_loop_task_t f,
int min, int extent,
uint8_t *closure, void *task_parent);
extern int halide_default_semaphore_init(struct halide_semaphore_t *, int n);
extern int halide_default_semaphore_release(struct halide_semaphore_t *, int n);
extern bool halide_default_semaphore_try_acquire(struct halide_semaphore_t *, int n);
// @}
struct halide_thread;
/** Spawn a thread. Returns a handle to the thread for the purposes of
* joining it. The thread must be joined in order to clean up any
* resources associated with it. */
extern struct halide_thread *halide_spawn_thread(void (*f)(void *), void *closure);
/** Join a thread. */
extern void halide_join_thread(struct halide_thread *);
/** Set the number of threads used by Halide's thread pool. Returns
* the old number.
*
* n < 0 : error condition
* n == 0 : use a reasonable system default (typically, number of cpus online).
* n == 1 : use exactly one thread; this will always enforce serial execution
* n > 1 : use a pool of exactly n threads.
*
* (Note that this is only guaranteed when using the default implementations
* of halide_do_par_for(); custom implementations may completely ignore values
* passed to halide_set_num_threads().)
*/
extern int halide_set_num_threads(int n);
/** Halide calls these functions to allocate and free memory. To
* replace in AOT code, use the halide_set_custom_malloc and
* halide_set_custom_free, or (on platforms that support weak
* linking), simply define these functions yourself. In JIT-compiled
* code use Func::set_custom_allocator.
*
* If you override them, and find yourself wanting to call the default
* implementation from within your override, use
* halide_default_malloc/free.
*
* Note that halide_malloc must return a pointer aligned to the
* maximum meaningful alignment for the platform for the purpose of
* vector loads and stores. The default implementation uses 32-byte
* alignment, which is safe for arm and x86. Additionally, it must be
* safe to read at least 8 bytes before the start and beyond the
* end.
*/
//@{
extern void *halide_malloc(void *user_context, size_t x);
extern void halide_free(void *user_context, void *ptr);
extern void *halide_default_malloc(void *user_context, size_t x);
extern void halide_default_free(void *user_context, void *ptr);
typedef void *(*halide_malloc_t)(void *, size_t);
typedef void (*halide_free_t)(void *, void *);
extern halide_malloc_t halide_set_custom_malloc(halide_malloc_t user_malloc);
extern halide_free_t halide_set_custom_free(halide_free_t user_free);
//@}
/** Halide calls these functions to interact with the underlying
* system runtime functions. To replace in AOT code on platforms that
* support weak linking, define these functions yourself, or use
* the halide_set_custom_load_library() and halide_set_custom_get_library_symbol()
* functions. In JIT-compiled code, use JITSharedRuntime::set_default_handlers().
*
* halide_load_library and halide_get_library_symbol are equivalent to
* dlopen and dlsym. halide_get_symbol(sym) is equivalent to
* dlsym(RTLD_DEFAULT, sym).
*/
//@{
extern void *halide_get_symbol(const char *name);
extern void *halide_load_library(const char *name);
extern void *halide_get_library_symbol(void *lib, const char *name);
extern void *halide_default_get_symbol(const char *name);
extern void *halide_default_load_library(const char *name);
extern void *halide_default_get_library_symbol(void *lib, const char *name);
typedef void *(*halide_get_symbol_t)(const char *name);
typedef void *(*halide_load_library_t)(const char *name);
typedef void *(*halide_get_library_symbol_t)(void *lib, const char *name);
extern halide_get_symbol_t halide_set_custom_get_symbol(halide_get_symbol_t user_get_symbol);
extern halide_load_library_t halide_set_custom_load_library(halide_load_library_t user_load_library);
extern halide_get_library_symbol_t halide_set_custom_get_library_symbol(halide_get_library_symbol_t user_get_library_symbol);
//@}
/** Called when debug_to_file is used inside %Halide code. See
* Func::debug_to_file for how this is called
*
* Cannot be replaced in JITted code at present.
*/
extern int32_t halide_debug_to_file(void *user_context, const char *filename,
int32_t type_code,
struct halide_buffer_t *buf);
/** Types in the halide type system. They can be ints, unsigned ints,
* or floats (of various bit-widths), or a handle (which is always 64-bits).
* Note that the int/uint/float values do not imply a specific bit width
* (the bit width is expected to be encoded in a separate value).
*/
typedef enum halide_type_code_t
#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
: uint8_t
#endif
{
halide_type_int = 0, ///< signed integers
halide_type_uint = 1, ///< unsigned integers
halide_type_float = 2, ///< IEEE floating point numbers
halide_type_handle = 3, ///< opaque pointer type (void *)
halide_type_bfloat = 4, ///< floating point numbers in the bfloat format
} halide_type_code_t;
// Note that while __attribute__ can go before or after the declaration,
// __declspec apparently is only allowed before.
#ifndef HALIDE_ATTRIBUTE_ALIGN
#ifdef _MSC_VER
#define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))
#else
#define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))
#endif
#endif
/** A runtime tag for a type in the halide type system. Can be ints,
* unsigned ints, or floats of various bit-widths (the 'bits'
* field). Can also be vectors of the same (by setting the 'lanes'
* field to something larger than one). This struct should be
* exactly 32-bits in size. */
struct halide_type_t {
/** The basic type code: signed integer, unsigned integer, or floating point. */
#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
HALIDE_ATTRIBUTE_ALIGN(1)
halide_type_code_t code; // halide_type_code_t
#else
HALIDE_ATTRIBUTE_ALIGN(1)
uint8_t code; // halide_type_code_t
#endif
/** The number of bits of precis
halide hexagon
最新推荐文章于 2023-10-05 17:15:02 发布
本文深入探讨了Halide编译框架与Hexagon DSP的集成,讲解如何使用C++和Python进行低级图像处理优化,以及如何利用Hexagon的硬件加速能力提升性能。同时,文章还涵盖了C、C++和Python在Halide库中的应用和交互。
摘要由CSDN通过智能技术生成