Here is a list of all documented files with brief descriptions:
[detail level 1234]
▼ kernels | |
▼ metal | |
▼ include | |
MetalMatmulInt4.hpp | |
opParams.h | |
matmul_metal_int4_imp.h | |
matmul.h | |
pthread_pool.h | |
▼ llm | |
▼ include | |
▼ nn_modules | |
Fp32CLIPAttention.h | |
Fp32CLIPEncoder.h | |
Fp32CLIPEncoderLayer.h | |
Fp32CLIPVisionTransformer.h | |
Fp32GPTBigCodeAttention.h | |
Fp32GPTBigCodeDecoder.h | |
Fp32GPTBigCodeDecoderLayer.h | |
Fp32GPTBigCodeForCausalLM.h | |
Fp32llamaAttention.h | |
Fp32llamaDecoder.h | |
Fp32llamaDecoderLayer.h | |
Fp32llamaForCausalLM.h | |
Fp32OPTAttention.h | |
Fp32OPTDecoder.h | |
Fp32OPTDecoderLayer.h | |
Fp32OPTForCausalLM.h | |
Int4GPTBigCodeAttention.h | |
Int4GPTBigCodeDecoder.h | |
Int4GPTBigCodeDecoderLayer.h | |
Int4GPTBigCodeForCausalLM.h | |
Int4llamaAttention.h | |
Int4llamaDecoder.h | |
Int4llamaDecoderLayer.h | |
Int4llamaForCausalLM.h | |
Int4OPTAttention.h | |
Int4OPTDecoder.h | |
Int4OPTDecoderLayer.h | |
Int4OPTForCausalLM.h | |
Int8OPTAttention.h | |
Int8OPTDecoder.h | |
Int8OPTDecoderLayer.h | |
OPTForCausalLM.h | |
▼ ops | |
arg_max.h | |
BMM_F32T.h | |
BMM_S8T_S8N_F32T.h | |
BMM_S8T_S8N_S8T.h | |
Conv2D.h | |
Embedding.h | |
Gelu.h | |
LayerNorm.h | |
LayerNormQ.h | |
linear.h | |
LlamaRMSNorm.h | |
RotaryPosEmb.h | |
W8A8B8O8Linear.h | |
W8A8B8O8LinearReLU.h | |
W8A8BFP32OFP32Linear.h | |
common.h | |
Generate.h | |
GPTBigCodeTokenizer.h | |
interface.h | |
LLaMATokenizer.h | |
model.h | |
operators.h | |
OPTTokenizer.h | |
profiler.h | |
stb_image.h | |
utils.h | |