Skip to content

Commit e97a8ab

Browse files
committed
Merge branch 'master' into windows5
2 parents 1776e4f + 0de57e5 commit e97a8ab

15 files changed

+154
-51
lines changed

CHANGELOG.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
## 0.3.3 (unreleased)
1+
## 0.4.0 (unreleased)
2+
3+
- Added support for hashes and Rover data frames to `predict` method
4+
- Changed `Dataset` to use column names for feature names with Rover and Daru
5+
- Changed `predict` method to match feature names with Daru
6+
- Dropped support for Ruby < 3.1
7+
8+
## 0.3.4 (2024-07-28)
9+
10+
- Updated LightGBM to 4.5.0
11+
12+
## 0.3.3 (2024-06-15)
213

314
- Updated LightGBM to 4.4.0
415

Gemfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ gemspec
55
gem "rake"
66
gem "minitest", ">= 5"
77
gem "daru"
8-
gem "matrix" # for daru
8+
gem "matrix"
99
gem "numo-narray", platform: [:ruby, :x64_mingw]
1010
gem "rover-df", platform: [:ruby, :x64_mingw]
1111
gem "csv"

LICENSE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
The MIT License (MIT)
22

33
Copyright (c) Microsoft Corporation
4-
Copyright (c) 2019-2023 Andrew Kane
4+
Copyright (c) 2019-2024 Andrew Kane
55

66
Permission is hereby granted, free of charge, to any person obtaining a copy
77
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
44

5-
[![Build Status](https://github.com/ankane/lightgbm-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/lightgbm-ruby/actions)
5+
[![Build Status](https://github.com/ankane/lightgbm-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/lightgbm-ruby/actions)
66

77
## Installation
88

Rakefile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def download_file(file, sha256)
2323
require "open-uri"
2424

2525
# also update licenses in vendor/
26-
version = "4.4.0"
26+
version = "4.5.0"
2727

2828
url =
2929
if file == "lib_lightgbm.arm64.dylib"
@@ -45,24 +45,24 @@ end
4545
# https://github.com/microsoft/LightGBM/releases
4646
namespace :vendor do
4747
task :linux do
48-
download_file("lib_lightgbm.so", "fdbb5b5786d4a99f661d453a62cc07c6607b780a1e4e774443df67aded6bb8b3")
48+
download_file("lib_lightgbm.so", "4b2b68c4d0fa99bace6cc540224b457ff899ccee0fdc8875e4625a38b00fc5e5")
4949
end
5050

5151
task :mac do
52-
download_file("lib_lightgbm.dylib", "c5824d085fd342c58f92291f40f02554f13ca1504fa26f1b2aef3151e8a70fdc")
53-
download_file("lib_lightgbm.arm64.dylib", "58b7d2c1e04c8af20c9558582e07957e3e227ef6bb31a10644b92cc93610a1fc")
52+
download_file("lib_lightgbm.dylib", "b02d48071ba4ae1e13e336a902dc5f82a5732de4448d47a20d8e9d94d5d3db2a")
53+
download_file("lib_lightgbm.arm64.dylib", "840e16754db0d3e4852bdfdecc1ee08bc367b138e0bf18fabb4ce3d9b39c936a")
5454
end
5555

5656
task :windows do
57-
download_file("lib_lightgbm.dll", "922c627c23e065f85d8e5e975be4ec78c65a424bdf12253f3168110cc2391185")
57+
download_file("lib_lightgbm.dll", "1d281ec96684806d83468469fb6052880308f39bf03a34d85ee9aa38195d260c")
5858
end
5959

6060
task all: [:linux, :mac, :windows]
6161

6262
task :platform do
6363
if Gem.win_platform?
6464
Rake::Task["vendor:windows"].invoke
65-
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
65+
elsif RbConfig::CONFIG["host_os"].match?(/darwin/i)
6666
Rake::Task["vendor:mac"].invoke
6767
else
6868
Rake::Task["vendor:linux"].invoke

lib/lightgbm.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true
145145
boosters.each(&:update)
146146

147147
scores = {}
148-
boosters.map(&:eval_valid).map(&:reverse).flatten(1).each do |r|
148+
boosters.map(&:eval_valid).flat_map(&:reverse).each do |r|
149149
(scores[r[1]] ||= []) << r[2]
150150
end
151151

lib/lightgbm/booster.rb

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def dump_model(num_iteration: nil, start_iteration: 0)
4141
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
4242
feature_importance_type = 0 # TODO add option
4343
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, out_str)
44-
actual_len = read_int64(out_len)
44+
actual_len = out_len.read_int64
4545
if actual_len > buffer_len
4646
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
4747
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, actual_len, out_len, out_str)
@@ -51,7 +51,7 @@ def dump_model(num_iteration: nil, start_iteration: 0)
5151
alias_method :to_json, :dump_model
5252

5353
def eval_valid
54-
@name_valid_sets.each_with_index.map { |n, i| inner_eval(n, i + 1) }.flatten(1)
54+
@name_valid_sets.each_with_index.flat_map { |n, i| inner_eval(n, i + 1) }
5555
end
5656

5757
def eval_train
@@ -99,6 +99,7 @@ def feature_name
9999
def model_from_string(model_str)
100100
out_num_iterations = ::FFI::MemoryPointer.new(:int)
101101
check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
102+
@cached_feature_name = nil
102103
self
103104
end
104105

@@ -109,7 +110,7 @@ def model_to_string(num_iteration: nil, start_iteration: 0)
109110
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
110111
feature_importance_type = 0 # TODO add option
111112
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, out_str)
112-
actual_len = read_int64(out_len)
113+
actual_len = out_len.read_int64
113114
if actual_len > buffer_len
114115
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
115116
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, feature_importance_type, actual_len, out_len, out_str)
@@ -140,7 +141,14 @@ def num_trees
140141
def predict(input, start_iteration: nil, num_iteration: nil, **params)
141142
input =
142143
if daru?(input)
143-
input.map_rows(&:to_a)
144+
input[*cached_feature_name].map_rows(&:to_a)
145+
elsif input.is_a?(Hash) # sort feature.values to match the order of model.feature_name
146+
sorted_feature_values(input)
147+
elsif input.is_a?(Array) && input.first.is_a?(Hash) # on multiple elems, if 1st is hash, assume they all are
148+
input.map(&method(:sorted_feature_values))
149+
elsif rover?(input)
150+
# TODO improve performance
151+
input[cached_feature_name].to_numo.to_a
144152
else
145153
input.to_a
146154
end
@@ -160,7 +168,7 @@ def predict(input, start_iteration: nil, num_iteration: nil, **params)
160168
out_len = ::FFI::MemoryPointer.new(:int64)
161169
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
162170
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, start_iteration, num_iteration, params_str(params), out_len, out_result)
163-
out = out_result.read_array_of_double(read_int64(out_len))
171+
out = out_result.read_array_of_double(out_len.read_int64)
164172
out = out.each_slice(num_class).to_a if num_class > 1
165173

166174
singular ? out.first : out
@@ -236,9 +244,12 @@ def num_class
236244
out.read_int
237245
end
238246

239-
# read_int64 not available on JRuby
240-
def read_int64(ptr)
241-
ptr.read_array_of_int64(1).first
247+
def sorted_feature_values(input_hash)
248+
input_hash.transform_keys(&:to_s).fetch_values(*cached_feature_name)
249+
end
250+
251+
def cached_feature_name
252+
@cached_feature_name ||= feature_name
242253
end
243254

244255
include Utils

lib/lightgbm/dataset.rb

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module LightGBM
22
class Dataset
33
attr_reader :data, :params
44

5-
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
5+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil)
66
@data = data
77
@label = label
88
@weight = weight
@@ -11,7 +11,7 @@ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference
1111
@reference = reference
1212
@used_indices = used_indices
1313
@categorical_feature = categorical_feature
14-
@feature_names = feature_names
14+
@feature_name = feature_name || feature_names || "auto"
1515

1616
construct
1717
end
@@ -24,7 +24,7 @@ def weight
2424
field("weight")
2525
end
2626

27-
def feature_names
27+
def feature_name
2828
# must preallocate space
2929
num_feature_names = ::FFI::MemoryPointer.new(:int)
3030
out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
@@ -48,6 +48,7 @@ def feature_names
4848
# from most recent call (instead of num_features)
4949
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
5050
end
51+
alias_method :feature_names, :feature_name
5152

5253
def label=(label)
5354
@label = label
@@ -64,12 +65,15 @@ def group=(group)
6465
set_field("group", group, type: :int32)
6566
end
6667

67-
def feature_names=(feature_names)
68+
def feature_name=(feature_names)
6869
@feature_names = feature_names
6970
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
70-
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
71+
# keep reference to string pointers
72+
str_ptrs = feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) }
73+
c_feature_names.write_array_of_pointer(str_ptrs)
7174
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
7275
end
76+
alias_method :feature_names=, :feature_name=
7377

7478
# TODO only update reference if not in chain
7579
def reference=(reference)
@@ -106,12 +110,7 @@ def subset(used_indices, params: nil)
106110
end
107111

108112
def handle_pointer
109-
@handle.read_pointer
110-
end
111-
112-
def self.finalize(addr)
113-
# must use proc instead of stabby lambda
114-
proc { FFI.LGBM_DatasetFree(::FFI::Pointer.new(:pointer, addr)) }
113+
@handle
115114
end
116115

117116
private
@@ -127,25 +126,33 @@ def construct
127126
end
128127
set_verbosity(params)
129128

130-
@handle = ::FFI::MemoryPointer.new(:pointer)
129+
handle = ::FFI::MemoryPointer.new(:pointer)
131130
parameters = params_str(params)
132131
reference = @reference.handle_pointer if @reference
133132
if used_indices
134133
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
135134
used_row_indices.write_array_of_int32(used_indices)
136-
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
135+
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, handle)
137136
elsif data.is_a?(String)
138-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
137+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, handle)
139138
else
140139
if matrix?(data)
141140
nrow = data.row_count
142141
ncol = data.column_count
143142
flat_data = data.to_a.flatten
144143
elsif daru?(data)
144+
if @feature_name == "auto"
145+
@feature_name = data.vectors.to_a
146+
end
145147
nrow, ncol = data.shape
146148
flat_data = data.map_rows(&:to_a).flatten
147-
elsif numo?(data) || rover?(data)
148-
data = data.to_numo if rover?(data)
149+
elsif numo?(data)
150+
nrow, ncol = data.shape
151+
elsif rover?(data)
152+
if @feature_name == "auto"
153+
@feature_name = data.keys
154+
end
155+
data = data.to_numo
149156
nrow, ncol = data.shape
150157
else
151158
nrow = data.count
@@ -161,14 +168,18 @@ def construct
161168
c_data.write_array_of_double(flat_data)
162169
end
163170

164-
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
171+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, handle)
172+
end
173+
if used_indices
174+
@handle = handle.read_pointer
175+
else
176+
@handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_DatasetFree))
165177
end
166-
ObjectSpace.define_finalizer(@handle, self.class.finalize(handle_pointer.to_i)) unless used_indices
167178

168179
self.label = @label if @label
169180
self.weight = @weight if @weight
170181
self.group = @group if @group
171-
self.feature_names = @feature_names if @feature_names
182+
self.feature_name = @feature_name if @feature_name && @feature_name != "auto"
172183
end
173184

174185
def dump_text(filename)

lib/lightgbm/utils.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def params_str(params)
1212
end
1313

1414
def check_param(v)
15-
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
15+
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match?(v)
1616
v
1717
end
1818

lib/lightgbm/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module LightGBM
2-
VERSION = "0.3.2"
2+
VERSION = "0.3.4"
33
end

0 commit comments

Comments
 (0)