Skip to content

Commit 6c47dbe

Browse files
committed
Metrics and inference in progress
1 parent cd25455 commit 6c47dbe

File tree

3 files changed

+50
-17
lines changed

3 files changed

+50
-17
lines changed

pgml-extension/src/llm_fine_tuning/.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,7 @@ results
33
__pycache__/
44
metrics.ipynb
55
netflix_titles_medium.csv
6-
netflix_titles_400.csv
6+
netflix_titles_400.csv
7+
*sample.csv
8+
*.db
9+
*.ipynb

pgml-extension/src/llm_fine_tuning/generate.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@
2929
show_default=True,
3030
)
3131
@click.option(
32-
"--max_length", default=50, help="Max length of the response", show_default=True
32+
"--min_length", default=50, help="Min length of the response", show_default=True
33+
)
34+
@click.option(
35+
"--max_length", default=50, help="Min length of the response", show_default=True
3336
)
3437
@click.option(
3538
"--num_return_sequences",
@@ -44,21 +47,33 @@
4447
show_default=True,
4548
)
4649
def generate(
47-
prompt, model_name, tokenizer_name, max_length, num_return_sequences, temperature
50+
prompt, model_name, tokenizer_name, min_length, max_length, num_return_sequences, temperature
4851
):
52+
cuda_available = torch.cuda.is_available()
4953
model = AutoModelForCausalLM.from_pretrained(model_name)
5054
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
55+
if cuda_available:
56+
device = "cuda:0"
57+
else:
58+
device = "cpu"
5159
generator = pipeline(
52-
"text-generation", model=model, tokenizer=tokenizer, max_length=max_length
60+
"text-generation", model=model, tokenizer=tokenizer, device=device, max_length = max_length
5361
)
62+
63+
min_length = min(min_length,max_length)
64+
5465
log.info("Prompt: %s" % prompt)
55-
log.info(
56-
"Generated: %s"
57-
% generator(
58-
prompt, num_return_sequences=num_return_sequences, temperature=temperature
59-
)
66+
outputs = generator(
67+
prompt,
68+
do_sample=True,
69+
min_length=min_length,
70+
num_return_sequences=num_return_sequences,
71+
temperature=temperature,
6072
)
6173

74+
for _id, output in enumerate(outputs):
75+
log.info("Generated %d: %s" % (_id, output["generated_text"]))
76+
6277

6378
if __name__ == "__main__":
6479
generate()

pgml-extension/src/llm_fine_tuning/metrics.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@
4444
help="Stride length for computing perplexity",
4545
show_default=True,
4646
)
47-
def metrics(filename, column_name, model_name, tokenizer_name, stride):
47+
@click.option(
48+
"--max_length_key",
49+
default="n_positions",
50+
help="Key in model configuration that maps to max length of the embeddings",
51+
show_default=True,
52+
)
53+
def metrics(filename, column_name, model_name, tokenizer_name, stride, max_length_key):
4854
if os.path.exists(filename):
4955
test = load_dataset("csv", data_files=filename)
5056
else:
@@ -55,16 +61,25 @@ def metrics(filename, column_name, model_name, tokenizer_name, stride):
5561

5662
device = "cpu"
5763
if cuda_available:
58-
device = "cuda"
59-
model = AutoModelForCausalLM.from_pretrained(model_name).cuda()
60-
else:
61-
model = AutoModelForCausalLM.from_pretrained(model_name)
64+
device = "cuda:0"
65+
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
6266

6367
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
6468

65-
encodings = tokenizer("\n\n".join(test["train"][column_name]), return_tensors="pt")
69+
full_text = ""
70+
for entry in test["train"][column_name]:
71+
if entry:
72+
full_text += "\n\n" + entry
73+
74+
encodings = tokenizer(full_text, return_tensors="pt")
75+
76+
config = model.config.to_dict()
77+
if max_length_key in config.keys():
78+
max_length = config[max_length_key]
79+
else:
80+
log.info("Configuration keys " + ",".join(config.keys()))
81+
raise ValueError("%s does not exist in model configuration"%max_length_key)
6682

67-
max_length = model.config.n_positions
6883
stride = min(stride, max_length)
6984
seq_len = encodings.input_ids.size(1)
7085

@@ -93,7 +108,7 @@ def metrics(filename, column_name, model_name, tokenizer_name, stride):
93108
break
94109

95110
ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
96-
log.info("Perplexity = %0.3f (lower is better)" % ppl)
111+
log.info("Number of parameters = %d, Perplexity = %0.3f (lower is better)" % (model.num_parameters(), ppl))
97112

98113

99114
if __name__ == "__main__":

0 commit comments

Comments
 (0)