Here is a list of all documented files with brief descriptions:
[detail level 1234]
| ▼ kernels | |
| ▼ metal | |
| ▼ include | |
| MetalMatmulInt4.hpp | |
| opParams.h | |
| matmul_metal_int4_imp.h | |
| matmul.h | |
| pthread_pool.h | |
| ▼ llm | |
| ▼ include | |
| ▼ nn_modules | |
| Fp32CLIPAttention.h | |
| Fp32CLIPEncoder.h | |
| Fp32CLIPEncoderLayer.h | |
| Fp32CLIPVisionTransformer.h | |
| Fp32GPTBigCodeAttention.h | |
| Fp32GPTBigCodeDecoder.h | |
| Fp32GPTBigCodeDecoderLayer.h | |
| Fp32GPTBigCodeForCausalLM.h | |
| Fp32llamaAttention.h | |
| Fp32llamaDecoder.h | |
| Fp32llamaDecoderLayer.h | |
| Fp32llamaForCausalLM.h | |
| Fp32OPTAttention.h | |
| Fp32OPTDecoder.h | |
| Fp32OPTDecoderLayer.h | |
| Fp32OPTForCausalLM.h | |
| Int4GPTBigCodeAttention.h | |
| Int4GPTBigCodeDecoder.h | |
| Int4GPTBigCodeDecoderLayer.h | |
| Int4GPTBigCodeForCausalLM.h | |
| Int4llamaAttention.h | |
| Int4llamaDecoder.h | |
| Int4llamaDecoderLayer.h | |
| Int4llamaForCausalLM.h | |
| Int4OPTAttention.h | |
| Int4OPTDecoder.h | |
| Int4OPTDecoderLayer.h | |
| Int4OPTForCausalLM.h | |
| Int8OPTAttention.h | |
| Int8OPTDecoder.h | |
| Int8OPTDecoderLayer.h | |
| OPTForCausalLM.h | |
| ▼ ops | |
| arg_max.h | |
| BMM_F32T.h | |
| BMM_S8T_S8N_F32T.h | |
| BMM_S8T_S8N_S8T.h | |
| Conv2D.h | |
| Embedding.h | |
| Gelu.h | |
| LayerNorm.h | |
| LayerNormQ.h | |
| linear.h | |
| LlamaRMSNorm.h | |
| RotaryPosEmb.h | |
| W8A8B8O8Linear.h | |
| W8A8B8O8LinearReLU.h | |
| W8A8BFP32OFP32Linear.h | |
| common.h | |
| Generate.h | |
| GPTBigCodeTokenizer.h | |
| interface.h | |
| LLaMATokenizer.h | |
| model.h | |
| operators.h | |
| OPTTokenizer.h | |
| profiler.h | |
| stb_image.h | |
| utils.h | |