TinyChatEngine
Loading...
Searching...
No Matches
utils.h
1#ifndef UTILS_H
2#define UTILS_H
3
4#include <math.h>
5
6#include <cstdlib>
7#include <fstream>
8#include <typeinfo>
9
10#include "half.hpp" // Third-party header
11#include "profiler.h"
12
13#define STATS_START(x) Profiler::getInstance().start(x)
14#define STATS_FLOPS(x, y) Profiler::getInstance().start(x, y)
15#define STATS_END(x) Profiler::getInstance().stop(x)
16
17#ifdef PROFILER
18#define PROFILE_START(x) Profiler::getInstance().start(x)
19#define PROFILE_START_FLOPS(x, y) Profiler::getInstance().start(x, y)
20#define PROFILE_END(x) Profiler::getInstance().stop(x)
21#else
22#define PROFILE_START(x)
23#define PROFILE_START_FLOPS(x, y)
24#define PROFILE_END(x)
25#endif
26
27#define MAX_SQ_ERROR_MAX 5e-6
28#define ERROR_MAX 1e-9
29#define INT_ERROR_MAX 1e-5
30
31template <typename T>
32void read_to_array(const char* path, T* array, int size);
33
34template <typename T>
35bool check_two_equal(T* array, T* array2, int size);
36
37template <>
38bool check_two_equal(int8_t* array, int8_t* array2, int size);
39
40bool check_two_equal(int8_t* array, int8_t* array2, int size, float error);
41
42bool check_two_equal(float* array, float* array2, int size, float error);
43bool check_two_exact_equal(int8_t* array, int8_t* array2, int size);
44void print_MSE_max_diff(float* a, float* a2, int size);
45
46void print_first_k_elelment(std::string name, const int8_t* arr, int k, int start_idx = 0);
47void print_first_k_elelment(std::string name, const int32_t* arr, int k, int start_idx = 0);
48void print_first_k_elelment(std::string name, const float* arr, int k, int start_idx = 0);
49
50#ifdef QM_METAL
51template <typename T>
52void allocate_aligned_memory(T*& ptr, size_t size);
53#else
54template <typename T>
55void allocate_aligned_memory(T*& ptr, size_t size);
56#endif
57
58void deallocate_memory(void* ptr);
59
60#ifdef QM_CUDA
61#include <cuda.h>
62#include <cuda_fp16.h>
63#include <cuda_runtime.h>
64
65int make_divisible_c(int c, int divisor);
66int calculate_zeros_width(int in_features, int group_size=128, int pack_num=8);
67
68#define CHECK_CUDA(call) \
69 do { \
70 cudaError_t err = call; \
71 if (err != cudaSuccess) { \
72 printf("Error: %s:%d, ", __FILE__, __LINE__); \
73 printf("code: %d, reason: %s\n", err, cudaGetErrorString(err)); \
74 throw std::runtime_error(std::string("CUDA error calling \"") + #call + "\", code is " + \
75 std::to_string(err)); \
76 } \
77 } while (0)
78
79void read_to_array_half(const char* path, half* array, int size);
80
81bool check_two_equal_cpu_gpu(half_float::half* array, half* array2, int size, float error);
82bool check_two_equal_float_half(float* array, half* array2, int size);
83bool check_two_equal_half_half(half* array, half* array2, int size);
84
85template <typename T>
86void allocate_aligned_memory_gpu(T*& ptr, size_t size);
87
88template <typename T>
89void free_aligned_memory_gpu(T*& ptr);
90
91__global__ void float2half(float* floatArray, half* halfArray, int N);
92__global__ void half2float(half* halfArray, float* floatArray, int N);
93__global__ void half2float_merge_k_iters(half* halfArray, float* floatArray, int N, int split_k_iters);
94__global__ void merge_k_iters(half* input, half* output, int N, int split_k_iters);
95__global__ void merge_k_iters_qkv(half *input, half *output, int N, int split_k_iters);
96#endif
97
98#endif