Skip to content

Commit 5972bee

Browse files
authored
Merge pull request #13 from Eleven1Liu/unify_comments
Unify doc strings
2 parents e028e2d + 611adf1 commit 5972bee

16 files changed

+93
-92
lines changed

libmultilabel/common_utils.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ def __setattr__(self, key: str, value: any) -> None:
3434
self._used.discard(key)
3535

3636
def used_items(self) -> dict:
37-
"""Returns the items that have been used at least once after being set.
37+
"""Return the items that have been used at least once after being set.
3838
3939
Returns:
40-
dict: the used items.
40+
dict: The used items.
4141
"""
4242
return {k: self[k] for k in self._used}
4343

@@ -46,10 +46,10 @@ def dump_log(log_path, metrics=None, split=None, config=None):
4646
"""Write log including the used items of config and the evaluation scores.
4747
4848
Args:
49-
log_path(str): path to log path
50-
metrics (dict): metric and scores in dictionary format, defaults to None
51-
split (str): val or test, defaults to None
52-
config (dict): config to save, defaults to None
49+
log_path(str): Path to log path.
50+
metrics (dict): Metric and scores in dictionary format, defaults to None.
51+
split (str): One of `val` or `test`, defaults to None.
52+
config (dict): Config to save, defaults to None.
5353
"""
5454
os.makedirs(os.path.dirname(log_path), exist_ok=True)
5555
if os.path.isfile(log_path):
@@ -82,7 +82,8 @@ def argsort_top_k(vals, k, axis=-1):
8282
k: Consider only the top k elements for each query
8383
axis: Axis along which to sort. The default is -1 (the last axis).
8484
85-
Returns: Array of indices that sort vals along the specified axis.
85+
Returns:
86+
Array of indices that sort vals along the specified axis.
8687
"""
8788
unsorted_top_k_idx = np.argpartition(vals, -k, axis=axis)[:, -k:]
8889
unsorted_top_k_scores = np.take_along_axis(vals, unsorted_top_k_idx, axis=axis)
@@ -130,7 +131,7 @@ def is_multiclass_dataset(dataset, label="label"):
130131

131132

132133
def timer(func):
133-
"""Log info-level wall time"""
134+
"""Log info-level wall time."""
134135

135136
@wraps(func)
136137
def wrapper(*args, **kwargs):

libmultilabel/linear/linear.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __init__(
3939
self.multiclass = multiclass
4040

4141
def predict_values(self, x: sparse.csr_matrix) -> np.ndarray:
42-
"""Calculates the decision values associated with x.
42+
"""Calculate the decision values associated with x.
4343
4444
Args:
4545
x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
@@ -79,7 +79,7 @@ def train_1vsrest(
7979
options: str = "",
8080
verbose: bool = True,
8181
) -> FlatModel:
82-
"""Trains a linear model for multi-label data using a one-vs-rest strategy.
82+
"""Train a linear model for multi-label data using a one-vs-rest strategy.
8383
8484
Args:
8585
y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
@@ -169,9 +169,9 @@ def train_thresholding(
169169
options: str = "",
170170
verbose: bool = True,
171171
) -> FlatModel:
172-
"""Trains a linear model for multi-label data using a one-vs-rest strategy
172+
"""Train a linear model for multi-label data using a one-vs-rest strategy
173173
and cross-validation to pick decision thresholds optimizing the sum of Macro-F1 and Micro-F1.
174-
Outperforms train_1vsrest in most aspects at the cost of higher time complexity
174+
Outperform train_1vsrest in most aspects at the cost of higher time complexity
175175
due to an internal cross-validation.
176176
177177
This method is the micromacro-freq approach from this CIKM 2023 paper:
@@ -235,7 +235,7 @@ def _micromacro_one_label(
235235
negatives, and the number of labels processed.
236236
237237
Returns:
238-
tuple[np.ndarray, float, dict]: the weights, threshold, and the updated stats for calculating
238+
tuple[np.ndarray, float, dict]: The weights, threshold, and the updated stats for calculating
239239
Micro-F1.
240240
"""
241241

@@ -319,7 +319,7 @@ def micro_plus_macro(tp, fp, fn):
319319

320320

321321
def _do_train(y: np.ndarray, x: sparse.csr_matrix, options: str) -> np.matrix:
322-
"""Wrapper around liblinear.liblinearutil.train.
322+
"""Wrap around liblinear.liblinearutil.train.
323323
Forcibly suppresses all IO regardless of options.
324324
325325
Args:
@@ -328,7 +328,7 @@ def _do_train(y: np.ndarray, x: sparse.csr_matrix, options: str) -> np.matrix:
328328
options (str): The option string passed to liblinear.
329329
330330
Returns:
331-
np.matrix: the weights.
331+
np.matrix: The weights.
332332
"""
333333
if y.shape[0] == 0:
334334
return np.matrix(np.zeros((x.shape[1], 1)))
@@ -376,11 +376,11 @@ def _fmeasure(y_true: np.ndarray, y_pred: np.ndarray) -> float:
376376
"""Calculate F1 score.
377377
378378
Args:
379-
y_true (np.ndarray): array of +1/-1.
380-
y_pred (np.ndarray): array of +1/-1.
379+
y_true (np.ndarray): Array of +1/-1.
380+
y_pred (np.ndarray): Array of +1/-1.
381381
382382
Returns:
383-
float: the F1 score.
383+
float: The F1 score.
384384
"""
385385
tp = np.sum(np.logical_and(y_true == 1, y_pred == 1))
386386
fn = np.sum(np.logical_and(y_true == 1, y_pred == -1))
@@ -399,10 +399,10 @@ def train_cost_sensitive(
399399
options: str = "",
400400
verbose: bool = True,
401401
) -> FlatModel:
402-
"""Trains a linear model for multi-label data using a one-vs-rest strategy
402+
"""Train a linear model for multi-label data using a one-vs-rest strategy
403403
and cross-validation to pick an optimal asymmetric misclassification cost
404404
for Macro-F1.
405-
Outperforms train_1vsrest in most aspects at the cost of higher
405+
Outperform train_1vsrest in most aspects at the cost of higher
406406
time complexity.
407407
See user guide for more details.
408408
@@ -416,7 +416,7 @@ def train_cost_sensitive(
416416
Returns:
417417
A model which can be used in predict_values.
418418
"""
419-
# Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
419+
# Follow the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
420420
x, options, bias = _prepare_options(x, options)
421421

422422
y = y.tocsc()
@@ -449,7 +449,7 @@ def _cost_sensitive_one_label(y: np.ndarray, x: sparse.csr_matrix, options: str)
449449
options (str): The option string passed to liblinear.
450450
451451
Returns:
452-
np.ndarray: the weights.
452+
np.ndarray: The weights.
453453
"""
454454

455455
l = y.shape[0]
@@ -503,10 +503,10 @@ def train_cost_sensitive_micro(
503503
options: str = "",
504504
verbose: bool = True,
505505
) -> FlatModel:
506-
"""Trains a linear model for multi-label data using a one-vs-rest strategy
506+
"""Train a linear model for multi-label data using a one-vs-rest strategy
507507
and cross-validation to pick an optimal asymmetric misclassification cost
508508
for Micro-F1.
509-
Outperforms train_1vsrest in most aspects at the cost of higher
509+
Outperform train_1vsrest in most aspects at the cost of higher
510510
time complexity.
511511
See user guide for more details.
512512
@@ -574,7 +574,7 @@ def train_binary_and_multiclass(
574574
options: str = "",
575575
verbose: bool = True,
576576
) -> FlatModel:
577-
"""Trains a linear model for binary and multi-class data.
577+
"""Train a linear model for binary and multi-class data.
578578
579579
Args:
580580
y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
@@ -628,7 +628,7 @@ def train_binary_and_multiclass(
628628

629629

630630
def predict_values(model, x: sparse.csr_matrix) -> np.ndarray:
631-
"""Calculates the decision values associated with x, equivalent to model.predict_values(x).
631+
"""Calculate the decision values associated with x, equivalent to model.predict_values(x).
632632
633633
Args:
634634
model: A model returned from a training function.

libmultilabel/linear/metrics.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
def _argsort_top_k(preds: np.ndarray, top_k: int) -> np.ndarray:
11-
"""Sorts the top k indices in O(n + k log k) time.
11+
"""Sort the top k indices in O(n + k log k) time.
1212
The sorting order is ascending to be consistent with np.sort.
1313
This means the last element is the largest, the first element is the kth largest.
1414
"""
@@ -18,7 +18,7 @@ def _argsort_top_k(preds: np.ndarray, top_k: int) -> np.ndarray:
1818

1919

2020
def _dcg_argsort(argsort_preds: np.ndarray, target: np.ndarray, top_k: int) -> np.ndarray:
21-
"""Computes DCG@k with a sorted preds array and a target array."""
21+
"""Compute DCG@k with a sorted preds array and a target array."""
2222
top_k_idx = argsort_preds[:, -top_k:][:, ::-1]
2323
gains = np.take_along_axis(target, top_k_idx, axis=-1)
2424
discount = 1 / (np.log2(np.arange(top_k) + 2))
@@ -28,7 +28,7 @@ def _dcg_argsort(argsort_preds: np.ndarray, target: np.ndarray, top_k: int) -> n
2828

2929

3030
def _idcg(target: np.ndarray, top_k: int) -> np.ndarray:
31-
"""Computes IDCG@k for a 0/1 target array. A 0/1 target is a special case that
31+
"""Compute IDCG@k for a 0/1 target array. A 0/1 target is a special case that
3232
doesn't require sorting. If IDCG is computed with DCG,
3333
then target will need to be sorted, which incurs a large overhead.
3434
"""
@@ -247,7 +247,7 @@ def __init__(self, metrics):
247247
self.max_k = max(getattr(metric, "top_k", 0) for metric in self.metrics.values())
248248

249249
def update(self, preds: np.ndarray, target: np.ndarray):
250-
"""Adds a batch of decision values and labels.
250+
"""Add a batch of decision values and labels.
251251
252252
Args:
253253
preds (np.ndarray): A matrix of decision values with dimensions number of instances * number of classes.
@@ -268,7 +268,7 @@ def update(self, preds: np.ndarray, target: np.ndarray):
268268
metric.update(preds, target)
269269

270270
def compute(self) -> dict[str, float]:
271-
"""Computes the metrics from the accumulated batches of decision values and labels.
271+
"""Compute the metrics from the accumulated batches of decision values and labels.
272272
273273
Returns:
274274
dict[str, float]: A dictionary of metric values.
@@ -279,7 +279,7 @@ def compute(self) -> dict[str, float]:
279279
return ret
280280

281281
def reset(self):
282-
"""Clears the accumulated batches of decision values and labels."""
282+
"""Clear the accumulated batches of decision values and labels."""
283283
for metric in self.metrics.values():
284284
metric.reset()
285285

libmultilabel/linear/tree.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def predict_values(
5959
x: sparse.csr_matrix,
6060
beam_width: int = 10,
6161
) -> np.ndarray:
62-
"""Calculates the probability estimates associated with x.
62+
"""Calculate the probability estimates associated with x.
6363
6464
Args:
6565
x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
@@ -118,7 +118,7 @@ def train_tree(
118118
dmax=10,
119119
verbose: bool = True,
120120
) -> TreeModel:
121-
"""Trains a linear model for multi-label data using a divide-and-conquer strategy.
121+
"""Train a linear model for multi-label data using a divide-and-conquer strategy.
122122
The algorithm used is based on https://github.com/xmc-aalto/bonsai.
123123
124124
Args:
@@ -178,7 +178,7 @@ def visit(node):
178178

179179

180180
def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
181-
"""Builds the tree recursively by kmeans clustering.
181+
"""Build the tree recursively by kmeans clustering.
182182
183183
Args:
184184
label_representation (sparse.csr_matrix): A matrix with dimensions number of classes under this node * number of features.
@@ -235,7 +235,7 @@ def collect_stat(node: Node):
235235

236236

237237
def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node: Node):
238-
"""If node is internal, computes the metalabels representing each child and trains
238+
"""If node is internal, compute the metalabels representing each child and train
239239
on the metalabels. Otherwise, train on y.
240240
241241
Args:
@@ -258,7 +258,7 @@ def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node:
258258

259259

260260
def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
261-
"""Flattens tree weight matrices into a single weight matrix. The flattened weight
261+
"""Flatten tree weight matrices into a single weight matrix. The flattened weight
262262
matrix is used to predict all possible values, which is cached for beam search.
263263
This pessimizes complexity but is faster in practice.
264264
Consecutive values of the returned map denotes the start and end indices of the

libmultilabel/linear/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232

3333
def save_pipeline(checkpoint_dir: str, preprocessor: Preprocessor, model):
34-
"""Saves preprocessor and model to checkpoint_dir/linear_pipline.pickle.
34+
"""Save preprocessor and model to checkpoint_dir/linear_pipline.pickle.
3535
3636
Args:
3737
checkpoint_dir (str): The directory to save to.
@@ -53,7 +53,7 @@ def save_pipeline(checkpoint_dir: str, preprocessor: Preprocessor, model):
5353

5454

5555
def load_pipeline(checkpoint_path: str) -> tuple[Preprocessor, Any]:
56-
"""Loads preprocessor and model from checkpoint_path.
56+
"""Load preprocessor and model from checkpoint_path.
5757
5858
Args:
5959
checkpoint_path (str): The path to a previously saved pipeline.

0 commit comments

Comments
 (0)