1#include "models.h"
2
3llm_build_wavtokenizer_dec::llm_build_wavtokenizer_dec(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
4 ggml_tensor * cur;
5 ggml_tensor * inpL;
6
7 inpL = build_inp_embd(model.tok_embd);
8
9 cur = ggml_cont(ctx0, ggml_transpose(ctx0, inpL));
10
11 cur = ggml_conv_1d_ph(ctx0, model.conv1d, cur, 1, 1);
12 cur = ggml_add(ctx0, cur, model.conv1d_b);
13
14 // posnet
15 for (uint32_t il = 0; il < hparams.posnet.n_layer; ++il) {
16 const auto & layer = model.layers[il].posnet;
17
18 inpL = cur;
19
20 switch (il) {
21 case 0:
22 case 1:
23 case 3:
24 case 4:
25 {
26 cur = build_norm(cur,
27 layer.norm1,
28 layer.norm1_b,
29 LLM_NORM_GROUP, 0);
30
31 cur = ggml_mul(ctx0, ggml_sigmoid(ctx0, cur), cur);
32
33 cur = ggml_conv_1d_ph(ctx0, layer.conv1, cur, 1, 1);
34 cur = ggml_add(ctx0, cur, layer.conv1_b);
35
36 cur = build_norm(cur,
37 layer.norm2,
38 layer.norm2_b,
39 LLM_NORM_GROUP, 0);
40
41 cur = ggml_mul(ctx0, ggml_sigmoid(ctx0, cur), cur);
42
43 cur = ggml_conv_1d_ph(ctx0, layer.conv2, cur, 1, 1);
44 cur = ggml_add(ctx0, cur, layer.conv2_b);
45
46 cur = ggml_add(ctx0, cur, inpL);
47 } break;
48 case 2:
49 {
50 cur = build_norm(cur,
51 layer.attn_norm,
52 layer.attn_norm_b,
53 LLM_NORM_GROUP, 0);
54
55 ggml_tensor * q;
56 ggml_tensor * k;
57 ggml_tensor * v;
58
59 q = ggml_conv_1d_ph(ctx0, layer.attn_q, cur, 1, 1);
60 k = ggml_conv_1d_ph(ctx0, layer.attn_k, cur, 1, 1);
61 v = ggml_conv_1d_ph(ctx0, layer.attn_v, cur, 1, 1);
62
63 q = ggml_add(ctx0, q, layer.attn_q_b);
64 k = ggml_add(ctx0, k, layer.attn_k_b);
65 v = ggml_add(ctx0, v, layer.attn_v_b);
66
67 q = ggml_cont(ctx0, ggml_transpose(ctx0, q));
68 k = ggml_cont(ctx0, ggml_transpose(ctx0, k));
69
70 ggml_tensor * kq = ggml_mul_mat(ctx0, k, q);
71
72 kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(hparams.posnet.n_embd)), 0.0f);
73
74 cur = ggml_mul_mat(ctx0, kq, v);
75
76 cur = ggml_conv_1d_ph(ctx0, layer.attn_o, cur, 1, 1);
77 cur = ggml_add(ctx0, cur, layer.attn_o_b);
78
79 cur = ggml_add(ctx0, cur, inpL);
80 } break;
81 case 5:
82 {
83 cur = build_norm(cur,
84 layer.norm,
85 layer.norm_b,
86 LLM_NORM_GROUP, 0);
87 } break;
88 default: GGML_ABORT("unknown posnet layer");
89 };
90 }
91 cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
92
93 cur = build_norm(cur,
94 model.tok_norm,
95 model.tok_norm_b,
96 LLM_NORM, -1);
97
98 cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
99
100 inpL = cur;
101
102 // convnext
103 for (uint32_t il = 0; il < hparams.convnext.n_layer; ++il) {
104 const auto & layer = model.layers[il].convnext;
105
106 cur = inpL;
107
108 cur = ggml_conv_1d_dw_ph(ctx0, layer.dw, cur, 1, 1);
109 cur = ggml_add(ctx0, cur, layer.dw_b);
110
111 cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
112
113 cur = build_norm(cur,
114 layer.norm,
115 layer.norm_b,
116 LLM_NORM, -1);
117
118 cur = build_ffn(cur,
119 layer.pw1, layer.pw1_b, NULL,
120 NULL, NULL, NULL,
121 layer.pw2, layer.pw2_b, NULL,
122 NULL,
123 LLM_FFN_GELU, LLM_FFN_SEQ, il);
124
125 cur = ggml_mul(ctx0, cur, layer.gamma);
126
127 cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
128
129 inpL = ggml_add(ctx0, cur, inpL);
130 }
131 cur = inpL;
132
133 cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
134
135 cur = build_norm(cur,
136 model.output_norm,
137 model.output_norm_b,
138 LLM_NORM, -1);
139
140 // lm_head
141 cur = build_lora_mm(model.output, cur);
142
143 cur = ggml_add(ctx0, cur, model.output_b);
144
145 cb(cur, "result_embd", -1);
146 res->t_embd = cur;
147
148 ggml_build_forward_expand(gf, cur);
149}