Skip to content

Commit cd8b59d

Browse files
author
Montana Low
committed
freeze all requirements and document the project requirements
1 parent c310e19 commit cd8b59d

File tree

9 files changed

+150
-57
lines changed

9 files changed

+150
-57
lines changed

packages/postgresml-python/build.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ rm "$deb_dir/release.sh"
2929
(cat ${SCRIPT_DIR}/DEBIAN/postrm | envsubst '${PGVERSION}') > "$deb_dir/DEBIAN/postrm"
3030

3131
cp ${SCRIPT_DIR}/../../pgml-extension/requirements.txt "$deb_dir/etc/postgresml-python/requirements.txt"
32-
cp ${SCRIPT_DIR}/../../pgml-extension/requirements-autogptq.txt "$deb_dir/etc/postgresml-python/requirements-autogptq.txt"
33-
cp ${SCRIPT_DIR}/../../pgml-extension/requirements-xformers.txt "$deb_dir/etc/postgresml-python/requirements-xformers.txt"
3432

3533
virtualenv --python="python$PYTHON_VERSION" "$deb_dir/var/lib/postgresml-python/pgml-venv"
3634
source "$deb_dir/var/lib/postgresml-python/pgml-venv/bin/activate"

pgml-cms/docs/resources/developer-docs/installation.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ To install the necessary Python packages into a virtual environment, use the `vi
6363
```bash
6464
virtualenv pgml-venv && \
6565
source pgml-venv/bin/activate && \
66-
pip install -r requirements.txt && \
67-
pip install -r requirements-xformers.txt --no-dependencies
66+
pip install -r requirements.txt
6867
```
6968
{% endtab %}
7069

pgml-extension/examples/multi_classification.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ LIMIT 10;
3131

3232
-- linear models
3333
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'ridge');
34-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'stochastic_gradient_descent');
35-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'perceptron');
36-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'passive_aggressive');
34+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'stochastic_gradient_descent');
35+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'perceptron');
36+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'passive_aggressive');
3737

3838
-- support vector machines
3939
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'svm');

pgml-extension/examples/transformers.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}');
88

99
SELECT pgml.embed('hkunlp/instructor-xl', 'hi mom', '{"instruction": "Encode it with love"}');
1010

11+
SELECT pgml.transform_stream(
12+
task => '{
13+
"task": "text-generation",
14+
"model": "TheBloke/zephyr-7B-beta-GPTQ",
15+
"model_type": "mistral",
16+
"revision": "main",
17+
"device_map": "auto"
18+
}'::JSONB,
19+
input => 'AI is going to',
20+
args => '{
21+
"max_new_tokens": 100
22+
}'::JSONB
23+
);
1124
-- BitsAndBytes support
1225
SELECT pgml.transform(
1326
task => '{

pgml-extension/requirements-autogptq.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

pgml-extension/requirements-xformers.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

pgml-extension/requirements.base.txt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# The immediate dependencies of PostgresML are maintained here.
2+
3+
# ML
4+
catboost
5+
lightgbm
6+
torch==2.0.1 # 2.1.1 breaks sentence-transformers==2.2.2
7+
torchaudio
8+
torchvision
9+
xgboost
10+
11+
# Transformers
12+
accelerate
13+
auto-gptq; sys_platform == 'linux'
14+
bitsandbytes
15+
ctransformers
16+
huggingface-hub
17+
deepspeed
18+
einops
19+
tokenizers
20+
transformers
21+
xformers; sys_platform == 'linux'
22+
23+
# Embeddings
24+
InstructorEmbedding
25+
sentence-transformers
26+
27+
# Ratings
28+
rouge
29+
sacrebleu
30+
sacremoses
31+
32+
# Utils
33+
datasets
34+
orjson
35+
langchain

pgml-extension/requirements.txt

Lines changed: 89 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,98 @@
1-
accelerate==0.22.0
2-
bitsandbytes==0.41.1
3-
catboost==1.2
1+
accelerate==0.25.0
2+
aiohttp==3.9.1
3+
aiosignal==1.3.1
4+
annotated-types==0.6.0
5+
anyio==4.1.0
6+
attrs==23.1.0
7+
bitsandbytes==0.41.3.post2
8+
catboost==1.2.2
9+
certifi==2023.11.17
10+
charset-normalizer==3.3.2
11+
click==8.1.7
12+
colorama==0.4.6
13+
contourpy==1.2.0
414
ctransformers==0.2.27
5-
datasets==2.14.5
6-
deepspeed==0.10.3
7-
huggingface-hub==0.17.1
15+
cycler==0.12.1
16+
dataclasses-json==0.6.3
17+
datasets==2.15.0
18+
deepspeed==0.12.4
19+
dill==0.3.7
20+
einops==0.7.0
21+
filelock==3.13.1
22+
fonttools==4.46.0
23+
frozenlist==1.4.0
24+
fsspec==2023.10.0
25+
graphviz==0.20.1
26+
hjson==3.1.0
27+
huggingface-hub==0.19.4
28+
idna==3.6
829
InstructorEmbedding==1.0.1
30+
Jinja2==3.1.2
31+
joblib==1.3.2
32+
jsonpatch==1.33
33+
jsonpointer==2.4
34+
kiwisolver==1.4.5
35+
langchain==0.0.349
36+
langchain-community==0.0.1
37+
langchain-core==0.0.13
38+
langsmith==0.0.69
939
lightgbm==4.1.0
10-
orjson==3.9.7
11-
pandas==2.1.0
12-
rich==13.5.2
40+
lxml==4.9.3
41+
MarkupSafe==2.1.3
42+
marshmallow==3.20.1
43+
matplotlib==3.8.2
44+
mpmath==1.3.0
45+
multidict==6.0.4
46+
multiprocess==0.70.15
47+
mypy-extensions==1.0.0
48+
networkx==3.2.1
49+
ninja==1.11.1.1
50+
nltk==3.8.1
51+
numpy==1.26.2
52+
orjson==3.9.10
53+
packaging==23.2
54+
pandas==2.1.4
55+
Pillow==10.1.0
56+
plotly==5.18.0
57+
portalocker==2.8.2
58+
psutil==5.9.6
59+
py-cpuinfo==9.0.0
60+
pyarrow==14.0.1
61+
pyarrow-hotfix==0.6
62+
pydantic==2.5.2
63+
pydantic_core==2.14.5
64+
pynvml==11.5.0
65+
pyparsing==3.1.1
66+
python-dateutil==2.8.2
67+
pytz==2023.3.post1
68+
PyYAML==6.0.1
69+
regex==2023.10.3
70+
requests==2.31.0
1371
rouge==1.0.1
14-
sacrebleu==2.3.1
15-
sacremoses==0.0.53
16-
scikit-learn==1.3.0
17-
sentencepiece==0.1.99
72+
sacrebleu==2.3.3
73+
sacremoses==0.1.1
74+
safetensors==0.4.1
75+
scikit-learn==1.3.2
76+
scipy==1.11.4
1877
sentence-transformers==2.2.2
19-
tokenizers==0.14.1
78+
sentencepiece==0.1.99
79+
six==1.16.0
80+
sniffio==1.3.0
81+
SQLAlchemy==2.0.23
82+
sympy==1.12
83+
tabulate==0.9.0
84+
tenacity==8.2.3
85+
threadpoolctl==3.2.0
86+
tokenizers==0.15.0
2087
torch==2.0.1
2188
torchaudio==2.0.2
2289
torchvision==0.15.2
2390
tqdm==4.66.1
24-
transformers==4.34.1
25-
xgboost==2.0.0
26-
langchain==0.0.287
27-
einops==0.6.1
28-
pynvml==11.5.0
29-
transformers-stream-generator==0.0.4
30-
optimum==1.13.2
31-
peft==0.6.2
32-
pyarrow==11.0.0
91+
transformers==4.36.0
92+
typing-inspect==0.9.0
93+
typing_extensions==4.9.0
94+
tzdata==2023.3
95+
urllib3==2.1.0
96+
xgboost==2.0.2
97+
xxhash==3.4.1
98+
yarl==1.9.4

pgml-extension/src/bindings/transformers/transformers.py

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import shutil
44
import time
55
import queue
6-
import sys
76

87
import datasets
98
from InstructorEmbedding import INSTRUCTOR
@@ -42,7 +41,6 @@
4241
Trainer,
4342
)
4443
from threading import Thread
45-
from typing import Optional
4644

4745
__cache_transformer_by_model_id = {}
4846
__cache_sentence_transformer_by_name = {}
@@ -393,42 +391,28 @@ def transform(task, args, inputs, stream=False):
393391
return orjson.dumps(pipe(inputs, **args), default=orjson_default).decode()
394392

395393

396-
def create_embedding(transformer):
394+
def embed(transformer, inputs, kwargs):
395+
kwargs = orjson.loads(kwargs)
396+
ensure_device(kwargs)
397397
instructor = transformer.startswith("hkunlp/instructor")
398-
klass = INSTRUCTOR if instructor else SentenceTransformer
399-
return klass(transformer)
400398

399+
# Cache the model
400+
if transformer not in __cache_sentence_transformer_by_name:
401+
klass = INSTRUCTOR if instructor else SentenceTransformer
402+
__cache_sentence_transformer_by_name[transformer] = klass(transformer)
403+
model = __cache_sentence_transformer_by_name[transformer]
401404

402-
def embed_using(model, transformer, inputs, kwargs):
403-
if isinstance(kwargs, str):
404-
kwargs = orjson.loads(kwargs)
405-
406-
instructor = transformer.startswith("hkunlp/instructor")
405+
# Handle instruction encoding
407406
if instructor:
408407
texts_with_instructions = []
409408
instruction = kwargs.pop("instruction")
410409
for text in inputs:
411410
texts_with_instructions.append([instruction, text])
412-
413411
inputs = texts_with_instructions
414412

415413
return model.encode(inputs, **kwargs)
416414

417415

418-
def embed(transformer, inputs, kwargs):
419-
kwargs = orjson.loads(kwargs)
420-
421-
ensure_device(kwargs)
422-
423-
if transformer not in __cache_sentence_transformer_by_name:
424-
__cache_sentence_transformer_by_name[transformer] = create_embedding(
425-
transformer
426-
)
427-
model = __cache_sentence_transformer_by_name[transformer]
428-
429-
return embed_using(model, transformer, inputs, kwargs)
430-
431-
432416
def clear_gpu_cache(memory_usage: None):
433417
if not torch.cuda.is_available():
434418
raise PgMLException(f"No GPU available")

0 commit comments

Comments
 (0)