Skip to content

Commit da81ecf

Browse files
authored
Merge pull request #16 from postgresml/montana/housing
add the california housing example
2 parents f917003 + ff78f3e commit da81ecf

File tree

5 files changed

+76
-3
lines changed

5 files changed

+76
-3
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,4 +211,9 @@ Run the test:
211211
psql -f sql/test.sql
212212
```
213213

214+
One liner:
215+
```
216+
cd pgml; sudo python3 setup.py install; cd ../; sudo -u postgres psql -f sql/test.sql
217+
```
218+
214219
Make sure to run it exactly like this, from the root directory of the repo.

examples/california_housing/run.sql

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
-- This example trains models on the sklean california_housing dataset
2+
-- which is a copy of the test set from the StatLib repository
3+
-- https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html
4+
--
5+
-- This demonstrates using a table with individual columns as features
6+
-- for regression.
7+
SELECT pgml.load_dataset('california_housing');
8+
9+
-- view the dataset
10+
SELECT * from pgml.california_housing;
11+
12+
-- train a simple model to classify the data
13+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target');
14+
15+
-- check out the predictions
16+
SELECT target, pgml.predict('California Housing Prediction', ARRAY[median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude]) AS prediction
17+
FROM pgml.california_housing
18+
LIMIT 10;
19+
20+
-- -- train some more models with different algorithms
21+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'svm');
22+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'random_forest');
23+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'gradient_boosting_trees');
24+
-- TODO SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'dense_neural_network');
25+
-- -- check out all that hard work
26+
SELECT * FROM pgml.trained_models;
27+
28+
-- deploy the random_forest model for prediction use
29+
SELECT pgml.deploy('California Housing Prediction', 'random_forest');
30+
-- check out that throughput
31+
SELECT * FROM pgml.deployed_models;
32+
33+
-- do some hyper param tuning
34+
-- TODO SELECT pgml.hypertune(100, 'California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'gradient_boosted_trees');
35+
-- deploy the "best" model for prediction use
36+
SELECT pgml.deploy('California Housing Prediction', 'best_fit');
37+
38+
-- check out the improved predictions
39+
SELECT target, pgml.predict('California Housing Prediction', ARRAY[median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude]) AS prediction
40+
FROM pgml.california_housing
41+
LIMIT 10;

examples/digits/run.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
-- which is a copy of the test set of the UCI ML hand-written digits datasets
33
-- https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
44
--
5+
-- This demonstrates using a table with a single array feature column
6+
-- for classification.
7+
--
58
-- The final result after a few seconds of training is not terrible. Maybe not perfect
69
-- enough for mission critical applications, but it's telling how quickly "off the shelf"
710
-- solutions can solve problems these days.

pgml/pgml/datasets.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,45 @@
11
import plpy
2-
from sklearn.datasets import load_digits as d
2+
import sklearn.datasets
33

44
from pgml.sql import q
55
from pgml.exceptions import PgMLException
66

77
def load(source: str):
88
if source == "digits":
99
load_digits()
10+
elif source == "california_housing":
11+
load_california_housing()
1012
else:
1113
raise PgMLException(f"Invalid dataset name: {source}. Valid values are ['digits'].")
1214
return "OK"
1315

1416
def load_digits():
15-
dataset = d()
17+
dataset = sklearn.datasets.load_digits()
1618
a = plpy.execute("DROP TABLE IF EXISTS pgml.digits")
1719
a = plpy.execute("CREATE TABLE pgml.digits (image SMALLINT[], target INTEGER)")
1820
a = plpy.execute(f"""COMMENT ON TABLE pgml.digits IS {q(dataset["DESCR"])}""")
1921
for X, y in zip(dataset["data"], dataset["target"]):
2022
X = ",".join("%i" % x for x in list(X))
2123
plpy.execute(f"""INSERT INTO pgml.digits (image, target) VALUES ('{{{X}}}', {y})""")
24+
25+
def load_california_housing():
26+
dataset = sklearn.datasets.fetch_california_housing()
27+
a = plpy.execute("DROP TABLE IF EXISTS pgml.california_housing")
28+
a = plpy.execute("""
29+
CREATE TABLE pgml.california_housing (
30+
median_income FLOAT4, -- median income in block group
31+
house_age FLOAT4, -- median house age in block group
32+
avg_rooms FLOAT4, -- average number of rooms per household
33+
avg_bedrooms FLOAT4, -- average number of bedrooms per household
34+
population FLOAT4, -- block group population
35+
avg_occupants FLOAT4, -- average number of household members
36+
latitude FLOAT4, -- block group latitude
37+
longitude FLOAT4, -- block group longitudetarget INTEGER
38+
target FLOAT4
39+
)""")
40+
a = plpy.execute(f"""COMMENT ON TABLE pgml.california_housing IS {q(dataset["DESCR"])}""")
41+
for X, y in zip(dataset["data"], dataset["target"]):
42+
plpy.execute(f"""
43+
INSERT INTO pgml.california_housing (median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude, target)
44+
VALUES ({q(X[0])}, {q(X[1])}, {q(X[2])}, {q(X[3])}, {q(X[4])}, {q(X[5])}, {q(X[6])}, {q(X[7])}, {q(y)})""")
45+

sql/install.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ $$ LANGUAGE plpython3u;
136136
---
137137
--- Predict
138138
---
139-
CREATE OR REPLACE FUNCTION pgml.predict(project_name TEXT, features NUMERIC[])
139+
CREATE OR REPLACE FUNCTION pgml.predict(project_name TEXT, features DOUBLE PRECISION[])
140140
RETURNS DOUBLE PRECISION
141141
AS $$
142142
from pgml.model import Project

0 commit comments

Comments
 (0)