Skip to content

Commit 3a8716f

Browse files
authored
Rust extension - XGBoost (#286)
* Rust XGBoost POC * README * vendor * cleanup * remove submodule * readme * Vendor * readme * Cleaner * version * move files
1 parent b6de2af commit 3a8716f

File tree

1,451 files changed

+309176
-2
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,451 files changed

+309176
-2
lines changed

.editorconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ end_of_line = lf
55
insert_final_newline = true
66
charset = utf-8
77

8-
[*.py]
8+
[*.py, *.rs]
99
indent_style = space
1010
indent_size = 4
1111

pgml-extension/.dockerignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,5 @@ cython_debug/
158158
# and can be added to the global gitignore or merged into this file. For a more nuclear
159159
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160160
#.idea/
161+
162+
pgml_rust/target/

pgml-extension/Dockerfile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1+
# FROM rust:1-bullseye AS rust_builder
2+
# COPY pgml_rust /pgml_rust
3+
# WORKDIR /pgml_rust
4+
# RUN apt-get update && apt-get install -y postgresql-13 libpq-dev cmake libclang-dev
5+
# RUN cargo install cargo-pgx
6+
# RUN cargo pgx init
7+
# RUN cargo pgx package
8+
19
FROM debian:bullseye-slim
210
MAINTAINER docker@postgresml.com
311

412
RUN apt-get update
513
ARG DEBIAN_FRONTEND=noninteractive
614
ENV TZ=Etc/UTC
7-
RUN apt-get install -y postgresql-plpython3-13 python3 python3-pip postgresql-13 tzdata sudo cmake libpq-dev
15+
RUN apt-get install -y postgresql-plpython3-13 python3 python3-pip postgresql-13 tzdata sudo cmake libpq-dev libclang-dev
816

917
# Cache this, quicker
1018
RUN pip3 install xgboost sklearn diptest torch lightgbm transformers datasets sentencepiece sacremoses sacrebleu rouge
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[build]
2+
# Postgres symbols won't be available until runtime
3+
rustflags = ["-C", "link-args=-Wl,-undefined,dynamic_lookup"]

pgml-extension/pgml_rust/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
.DS_Store
2+
.idea/
3+
/target
4+
*.iml
5+
**/*.rs.bk
6+
Cargo.lock

pgml-extension/pgml_rust/Cargo.toml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[package]
2+
name = "pgml_rust"
3+
version = "0.0.0"
4+
edition = "2021"
5+
6+
[lib]
7+
crate-type = ["cdylib"]
8+
9+
[features]
10+
default = ["pg13"]
11+
pg10 = ["pgx/pg10", "pgx-tests/pg10" ]
12+
pg11 = ["pgx/pg11", "pgx-tests/pg11" ]
13+
pg12 = ["pgx/pg12", "pgx-tests/pg12" ]
14+
pg13 = ["pgx/pg13", "pgx-tests/pg13" ]
15+
pg14 = ["pgx/pg14", "pgx-tests/pg14" ]
16+
pg_test = []
17+
18+
[dependencies]
19+
pgx = "=0.4.5"
20+
xgboost = { path = "rust-xgboost" }
21+
rustlearn = "0.5"
22+
once_cell = "1"
23+
rand = "0.8"
24+
25+
[dev-dependencies]
26+
pgx-tests = "=0.4.5"
27+
28+
[profile.dev]
29+
panic = "unwind"
30+
lto = "thin"
31+
32+
[profile.release]
33+
panic = "unwind"
34+
opt-level = 3
35+
lto = "fat"
36+
codegen-units = 1

pgml-extension/pgml_rust/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Rust meet PostgresML
2+
3+
Here we have some POC code to use Rust for PostgresML.
4+
5+
## Dependencies
6+
7+
All dependencies are vendored. I downloaded XGBoost 1.62 and all its submodules. We're also using the `master` branch of `xgboost` Rust crate.
8+
9+
If you haven't already, install:
10+
11+
- `cmake`
12+
- `libclang-dev`
13+
14+
## Local development
15+
16+
1. `cargo install pgx`
17+
2. `cargo pgx run`
18+
3. `DROP EXTENSION IF EXISTS pgml_rust;`
19+
4. `CREATE EXTENSION pgml_rust;`
20+
5. `SELECT pgml_train('pgml.diabetes', ARRAY['age', 'sex'], 'target');`
21+
6. `SELECT * FROM pgml_predict(ARRAY[1, 5.0]);`
22+
23+
Lots of todos, but still a decent PoC.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
comment = 'pgml_rust: Created by pgx'
2+
default_version = '@CARGO_VERSION@'
3+
module_pathname = '$libdir/pgml_rust'
4+
relocatable = false
5+
superuser = false
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Generated by Cargo
2+
# will have compiled files and executables
3+
/target/
4+
/examples/*/target/
5+
/xgboost-sys/target/
6+
7+
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
8+
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
9+
Cargo.lock
10+
11+
# These are backup files generated by rustfmt
12+
**/*.rs.bk
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[submodule "xgboost-sys/xgboost"]
2+
path = xgboost-sys/xgboost
3+
url = https://github.com/davechallis/xgboost
4+
branch = master
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
language: rust
2+
3+
os:
4+
- linux
5+
- osx
6+
7+
rust:
8+
- stable
9+
- nightly
10+
matrix:
11+
allow_failures:
12+
- rust: nightly
13+
fast_finish: true
14+
15+
cache: cargo
16+
17+
script:
18+
- cd xgboost-sys && cargo test --verbose --all
19+
- cd .. && cargo test --verbose --all
20+
- cd examples/basic && cargo run
21+
- cd ../custom_objective && cargo run
22+
- cd ../generalised_linear_model && cargo run
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# 0.1.4 (2019-03-05)
2+
3+
* `Booster::load_buffer` method added (thanks [jonathanstrong](https://github.com/jonathanstrong))
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "xgboost"
3+
version = "0.2.0"
4+
authors = ["Dave Challis <dave@suicas.net>"]
5+
license = "MIT"
6+
repository = "https://github.com/davechallis/rust-xgboost"
7+
homepage = "https://github.com/davechallis/rust-xgboost"
8+
description = "Machine learning using XGBoost"
9+
documentation = "https://docs.rs/xgboost"
10+
readme = "README.md"
11+
12+
[dependencies]
13+
xgboost-sys = { path = "xgboost-sys" }
14+
libc = "0.2"
15+
derive_builder = "0.5"
16+
log = "0.4"
17+
tempfile = "3.0"
18+
indexmap = "1.0"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2018 Dave Challis
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# rust-xgboost
2+
3+
[![Travis Build Status](https://travis-ci.com/davechallis/rust-xgboost.svg?branch=master)](https://travis-ci.com/davechallis/rust-xgboost)
4+
[![Documentation link](https://docs.rs/xgboost/badge.svg)](https://docs.rs/xgboost/badge.svg)
5+
6+
Rust bindings for the [XGBoost](https://xgboost.ai) gradient boosting library.
7+
8+
* [Documentation](https://docs.rs/xgboost)
9+
10+
Basic usage example:
11+
12+
```rust
13+
extern crate xgboost;
14+
15+
use xgboost::{parameters, DMatrix, Booster};
16+
17+
fn main() {
18+
// training matrix with 5 training examples and 3 features
19+
let x_train = &[1.0, 1.0, 1.0,
20+
1.0, 1.0, 0.0,
21+
1.0, 1.0, 1.0,
22+
0.0, 0.0, 0.0,
23+
1.0, 1.0, 1.0];
24+
let num_rows = 5;
25+
let y_train = &[1.0, 1.0, 1.0, 0.0, 1.0];
26+
27+
// convert training data into XGBoost's matrix format
28+
let mut dtrain = DMatrix::from_dense(x_train, num_rows).unwrap();
29+
30+
// set ground truth labels for the training matrix
31+
dtrain.set_labels(y_train).unwrap();
32+
33+
// test matrix with 1 row
34+
let x_test = &[0.7, 0.9, 0.6];
35+
let num_rows = 1;
36+
let y_test = &[1.0];
37+
let mut dtest = DMatrix::from_dense(x_test, num_rows).unwrap();
38+
dtest.set_labels(y_test).unwrap();
39+
40+
// configure objectives, metrics, etc.
41+
let learning_params = parameters::learning::LearningTaskParametersBuilder::default()
42+
.objective(parameters::learning::Objective::BinaryLogistic)
43+
.build().unwrap();
44+
45+
// configure the tree-based learning model's parameters
46+
let tree_params = parameters::tree::TreeBoosterParametersBuilder::default()
47+
.max_depth(2)
48+
.eta(1.0)
49+
.build().unwrap();
50+
51+
// overall configuration for Booster
52+
let booster_params = parameters::BoosterParametersBuilder::default()
53+
.booster_type(parameters::BoosterType::Tree(tree_params))
54+
.learning_params(learning_params)
55+
.verbose(true)
56+
.build().unwrap();
57+
58+
// specify datasets to evaluate against during training
59+
let evaluation_sets = &[(&dtrain, "train"), (&dtest, "test")];
60+
61+
// overall configuration for training/evaluation
62+
let params = parameters::TrainingParametersBuilder::default()
63+
.dtrain(&dtrain) // dataset to train with
64+
.boost_rounds(2) // number of training iterations
65+
.booster_params(booster_params) // model parameters
66+
.evaluation_sets(Some(evaluation_sets)) // optional datasets to evaluate against in each iteration
67+
.build().unwrap();
68+
69+
// train model, and print evaluation data
70+
let bst = Booster::train(&params).unwrap();
71+
72+
println!("{:?}", bst.predict(&dtest).unwrap());
73+
}
74+
```
75+
76+
See the [examples](https://github.com/davechallis/rust-xgboost/tree/master/examples) directory for
77+
more detailed examples of different features.
78+
79+
## Status
80+
81+
Currently in a very early stage of development, so the API is changing as usability issues occur,
82+
or new features are supported.
83+
84+
Builds against XGBoost 0.81.
85+
86+
### Platforms
87+
88+
Tested:
89+
90+
* Linux
91+
* Mac OS
92+
93+
Unsupported:
94+
95+
* Windows
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# xgboost feature examples
2+
3+
* [Basic usage](basic/src/main.rs)
4+
* [Custom objective and evaluation functions](custom_objective/src/main.rs)
5+
* [Generalised linear model](generalised_linear_model/src/main.rs)
6+
* [Multiclass classification](multiclass_classification/src/main.rs)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "xgboost-basic-example"
3+
version = "0.1.0"
4+
authors = ["Dave Challis <dave@suicas.net>"]
5+
publish = false
6+
7+
[dependencies]
8+
xgboost = { path = "../../" }
9+
sprs = "0.11"
10+
log = "0.4"
11+
env_logger = "0.5"

0 commit comments

Comments
 (0)