-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml
More file actions
125 lines (111 loc) · 3.25 KB
/
config.yaml
File metadata and controls
125 lines (111 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# any-embedding configuration
# Define all models to deploy as embedding workers.
gateway:
port: 8080
models:
# Each entry becomes a separate Cloud Run worker service.
# "name" is the model ID clients use in the API request (model field).
# "model" is the HuggingFace / sentence-transformers model identifier.
# "type": "text" (default) or "image" (multimodal, e.g. CLIP).
# Clients are responsible for adding any model-specific prefixes to their input.
#
# Resource overrides (optional, defaults in terraform variables):
# cpu, memory, gpu (bool), max_instances, min_instances, gated (bool)
- name: "bge-large-en-v1.5"
model: "BAAI/bge-large-en-v1.5"
type: "text"
max_tokens: 512
dimensions: 1024
- name: "gte-large-en-v1.5"
model: "Alibaba-NLP/gte-large-en-v1.5"
type: "text"
max_tokens: 8192
dimensions: 1024
sentence_transformers_version: "2.7.0"
transformers_version: "4.39.1"
- name: "e5-large-v2"
model: "intfloat/e5-large-v2"
type: "text"
max_tokens: 512
dimensions: 1024
- name: "qwen3-embedding-0.6b"
model: "Qwen/Qwen3-Embedding-0.6B"
type: "text"
max_tokens: 32768
dimensions: 1024
gpu: true
cpu: "4"
memory: "16Gi"
max_instances: 1
- name: "gte-multilingual-base"
model: "Alibaba-NLP/gte-multilingual-base"
type: "text"
max_tokens: 8192
dimensions: 768
sentence_transformers_version: "2.7.0"
transformers_version: "4.39.1"
gpu: true
cpu: "4"
memory: "16Gi"
max_instances: 1
- name: "multilingual-e5-large-instruct"
model: "intfloat/multilingual-e5-large-instruct"
type: "text"
max_tokens: 512
dimensions: 1024
gpu: true
cpu: "4"
memory: "16Gi"
max_instances: 1
# Gated model – requires HF_TOKEN with accepted license
- name: "embeddinggemma-300m"
model: "google/embeddinggemma-300m"
type: "text"
gated: true
max_tokens: 2048
dimensions: 768
gpu: true
cpu: "4"
memory: "16Gi"
max_instances: 1
# Jina v2 bilingual: EN/ZH – gated model, requires HF_TOKEN
# Jina v2 custom code is incompatible with transformers>=4.46, pin older version.
- name: "jina-embeddings-v2-base-zh"
model: "jinaai/jina-embeddings-v2-base-zh"
type: "text"
gated: true
max_tokens: 8192
dimensions: 768
memory: "8Gi"
transformers_version: "4.44.2"
# Jina v2 bilingual: DE/EN
- name: "jina-embeddings-v2-base-de"
model: "jinaai/jina-embeddings-v2-base-de"
type: "text"
max_tokens: 8192
dimensions: 768
memory: "8Gi"
transformers_version: "4.44.2"
# Jina v2 bilingual: EN/ES
- name: "jina-embeddings-v2-base-es"
model: "jinaai/jina-embeddings-v2-base-es"
type: "text"
max_tokens: 8192
dimensions: 768
memory: "8Gi"
transformers_version: "4.44.2"
# Jina v2 code: EN + 30 programming languages
- name: "jina-embeddings-v2-base-code"
model: "jinaai/jina-embeddings-v2-base-code"
type: "text"
max_tokens: 8192
dimensions: 768
transformers_version: "4.44.2"
# Example: multimodal image+text model with GPU
# - name: "clip-vit-large-patch14"
# model: "openai/clip-vit-large-patch14"
# type: "image"
# dimensions: 768
# gpu: true
# cpu: "4"
# memory: "16Gi"