Skip to content

Commit adadb0e

Browse files
Merge pull request #266 from terrafying/master
dependency updates! mainly for numpy v2
2 parents 5630513 + 491b711 commit adadb0e

File tree

5 files changed

+50
-28
lines changed

5 files changed

+50
-28
lines changed

hypertools/tools/format_data.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def format_data(x, vectorizer='CountVectorizer',
7575
from ..datageometry import DataGeometry
7676

7777
# if x is not a list, make it one
78-
if type(x) is not list:
78+
if not isinstance(x, list):
7979
x = [x]
8080

8181
if all([isinstance(xi, str) for xi in x]):
@@ -98,7 +98,7 @@ def format_data(x, vectorizer='CountVectorizer',
9898
text_data = []
9999
for i,j in zip(x, dtypes):
100100
if j in ['list_str', 'str', 'arr_str']:
101-
text_data.append(np.array(i).reshape(-1, 1))
101+
text_data.append(np.asarray(i, dtype=object).reshape(-1, 1))
102102
# convert text to numerical matrices
103103
text_data = text2mat(text_data, **text_args)
104104

@@ -164,14 +164,15 @@ def format_data(x, vectorizer='CountVectorizer',
164164

165165

166166
def fill_missing(x):
167-
167+
"""Fill missing values using PPCA"""
168168
# ppca if missing data
169169
m = PPCA()
170-
m.fit(data=np.vstack(x))
170+
x_stacked = np.vstack(x)
171+
m.fit(data=x_stacked)
171172
x_pca = m.transform()
172173

173174
# if the whole row is missing, return nans
174-
all_missing = [idx for idx, a in enumerate(np.vstack(x)) if all([type(b)==np.nan for b in a])]
175+
all_missing = [idx for idx, a in enumerate(x_stacked) if np.all(np.isnan(a))]
175176
if len(all_missing)>0:
176177
for i in all_missing:
177178
x_pca[i, :] = np.nan

hypertools/tools/normalize.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,23 @@ def normalize(x, normalize='across', internal=False, format_data=True):
4848
if normalize in [False, None]:
4949
return x
5050
else:
51-
5251
if format_data:
5352
x = formatter(x, ppca=True)
5453

55-
zscore = lambda X, y: (y - np.mean(X)) / np.std(X) if len(set(y)) > 1 else np.zeros(y.shape)
54+
def zscore(X, y):
55+
# Handle empty arrays and single-value arrays
56+
if len(y) == 0 or len(set(y.ravel())) <= 1:
57+
return np.zeros_like(y, dtype=np.float64)
58+
59+
mean = np.mean(X)
60+
std = np.std(X)
61+
# Avoid division by zero
62+
if std == 0:
63+
return np.zeros_like(y, dtype=np.float64)
64+
return (y - mean) / std
5665

5766
if normalize == 'across':
58-
x_stacked=np.vstack(x)
67+
x_stacked = np.vstack(x)
5968
normalized_x = [np.array([zscore(x_stacked[:,j], i[:,j]) for j in range(i.shape[1])]).T for i in x]
6069

6170
elif normalize == 'within':

hypertools/tools/reduce.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22

33
import warnings
4+
import numpy as np
45
from sklearn.decomposition import PCA, FastICA, IncrementalPCA, KernelPCA, FactorAnalysis, TruncatedSVD, SparsePCA, MiniBatchSparsePCA, DictionaryLearning, MiniBatchDictionaryLearning
56
from sklearn.manifold import TSNE, MDS, SpectralEmbedding, LocallyLinearEmbedding, Isomap
67
from umap import UMAP
@@ -92,7 +93,7 @@ def reduce(x, reduce='IncrementalPCA', ndims=None, normalize=None, align=None,
9293
if reduce is None:
9394
return x
9495

95-
elif isinstance(reduce, (str, np.string_)):
96+
elif isinstance(reduce, str): # Remove np.string_ check as it's deprecated in NumPy 2.0
9697
model_name = reduce
9798
model_params = {
9899
'n_components': ndims
@@ -112,7 +113,7 @@ def reduce(x, reduce='IncrementalPCA', ndims=None, normalize=None, align=None,
112113

113114
try:
114115
# if the model passed is a string, make sure it's one of the supported options
115-
if isinstance(model_name, (str, np.string_)):
116+
if isinstance(model_name, str): # Remove np.string_ check as it's deprecated in NumPy 2.0
116117
model = models[model_name]
117118
# otherwise check any custom object for necessary methods
118119
else:
@@ -142,16 +143,18 @@ def reduce(x, reduce='IncrementalPCA', ndims=None, normalize=None, align=None,
142143
if model_params['n_components'] is None or all([i.shape[1] <= model_params['n_components'] for i in x]):
143144
return x
144145

145-
stacked_x = np.vstack(x)
146+
# Handle empty arrays and type conversion
147+
stacked_x = np.vstack([np.asarray(arr, dtype=np.float64) for arr in x])
148+
146149
if stacked_x.shape[0] == 1:
147150
warnings.warn('Cannot reduce the dimensionality of a single row of'
148151
' data. Return zeros length of ndims')
149-
return [np.zeros((1, model_params['n_components']))]
150-
152+
return [np.zeros((1, model_params['n_components']), dtype=np.float64)]
151153

152154
elif stacked_x.shape[0] < model_params['n_components']:
153155
warnings.warn('The number of rows in your data is less than ndims.'
154156
' The data will be reduced to the number of rows.')
157+
model_params['n_components'] = stacked_x.shape[0]
155158

156159
# deprecation warnings
157160
if normalize is not None:
@@ -179,8 +182,17 @@ def reduce(x, reduce='IncrementalPCA', ndims=None, normalize=None, align=None,
179182

180183
# sub functions
181184
def reduce_list(x, model):
185+
"""Helper function to reduce a list of arrays"""
186+
# Ensure all arrays are float64 for consistent handling
187+
x = [np.asarray(arr, dtype=np.float64) for arr in x]
182188
split = np.cumsum([len(xi) for xi in x])[:-1]
183-
x_r = np.vsplit(model.fit_transform(np.vstack(x)), split)
189+
stacked = np.vstack(x)
190+
191+
# Handle potential NaN values
192+
if np.any(np.isnan(stacked)):
193+
warnings.warn('NaN values detected in input data. These may affect the reduction results.')
194+
195+
x_r = np.vsplit(model.fit_transform(stacked), split)
184196
if len(x) > 1:
185197
return [xi for xi in x_r]
186198
else:

requirements.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
PPCA>=0.0.2
2-
scikit-learn>=0.24
3-
pandas>=0.18.0
4-
seaborn>=0.8.1
5-
matplotlib>=1.5.1
6-
scipy>=1.0.0
7-
numpy>=1.10.4
8-
umap-learn>=0.4.6
9-
requests
10-
ipympl
2+
scikit-learn>=1.4.0
3+
pandas>=2.2.0
4+
seaborn>=0.13.0
5+
matplotlib>=3.8.0
6+
scipy>=1.13.0
7+
numpy>=2.0.0
8+
umap-learn>=0.5.5
9+
requests>=2.31.0
10+
ipympl>=0.9.3

setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
os.environ["MPLCONFIGDIR"] = "."
77

88
NAME = 'hypertools'
9-
VERSION = '0.8.0'
9+
VERSION = '0.8.1'
1010
AUTHOR = 'Contextual Dynamics Lab'
1111
AUTHOR_EMAIL = '[email protected]'
1212
URL = 'https://github.com/ContextLab/hypertools'
1313
DOWNLOAD_URL = URL
1414
LICENSE = 'MIT'
15-
REQUIRES_PYTHON = '>=3.6'
15+
REQUIRES_PYTHON = '>=3.9'
1616
PACKAGES = find_packages(exclude=('images', 'examples', 'tests'))
1717
with open('requirements.txt', 'r') as f:
1818
REQUIREMENTS = f.read().splitlines()
@@ -35,10 +35,10 @@
3535
"""
3636
CLASSIFIERS = [
3737
'Intended Audience :: Science/Research',
38-
'Programming Language :: Python :: 3.6',
39-
'Programming Language :: Python :: 3.7',
40-
'Programming Language :: Python :: 3.8',
4138
'Programming Language :: Python :: 3.9',
39+
'Programming Language :: Python :: 3.10',
40+
'Programming Language :: Python :: 3.11',
41+
'Programming Language :: Python :: 3.12',
4242
'Topic :: Scientific/Engineering :: Visualization',
4343
'Topic :: Multimedia :: Graphics',
4444
'Operating System :: POSIX',

0 commit comments

Comments
 (0)