Merge pull request #2248 from recommenders-team/staging

miguelgfierro · web-flow · commit e1a582509b0c · 2025-09-01T15:27:47.000+02:00
Staging to main: Fix the issue with Movielens
diff --git a/examples/01_prepare_data/data_split.ipynb b/examples/01_prepare_data/data_split.ipynb
@@ -73,7 +73,7 @@
             "metadata": {},
             "outputs": [],
             "source": [
-                "DATA_URL = \"https://files.grouplens.org/datasets/movielens/ml-100k/u.data\"\n",
+                "DATA_URL = \"http://files.grouplens.org/datasets/movielens/ml-100k/u.data\"\n",
                 "DATA_PATH = \"ml-100k.data\"\n",
                 "\n",
                 "COL_USER = \"UserId\"\n",
@@ -1195,4 +1195,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 2
-}
+}
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
@@ -159,7 +159,7 @@ def load_pandas_df(
 ):
     """Loads the MovieLens dataset as pd.DataFrame.
 
-    Download the dataset from https://files.grouplens.org/datasets/movielens, unzip, and load.
+    Download the dataset from http://files.grouplens.org/datasets/movielens, unzip, and load.
     To load movie information only, you can use load_item_df function.
 
     Args:
@@ -304,7 +304,7 @@ def _load_item_df(size, item_datapath, movie_col, title_col, genres_col, year_co
     genres_header_100k = None
     if genres_col is not None:
         # 100k data's movie genres are encoded as a binary array (the last 19 fields)
-        # For details, see https://files.grouplens.org/datasets/movielens/ml-100k-README.txt
+        # For details, see http://files.grouplens.org/datasets/movielens/ml-100k-README.txt
         if size == "100k":
             genres_header_100k = [*(str(i) for i in range(19))]
             item_header.extend(genres_header_100k)
@@ -366,7 +366,7 @@ def load_spark_df(
 ):
     """Loads the MovieLens dataset as `pyspark.sql.DataFrame`.
 
-    Download the dataset from https://files.grouplens.org/datasets/movielens, unzip, and load as `pyspark.sql.DataFrame`.
+    Download the dataset from http://files.grouplens.org/datasets/movielens, unzip, and load as `pyspark.sql.DataFrame`.
 
     To load movie information only, you can use `load_item_df` function.
 
@@ -552,7 +552,7 @@ def download_movielens(size, dest_path):
     if size not in DATA_FORMAT:
         raise ValueError(f"Size: {size}. " + ERROR_MOVIE_LENS_SIZE)
 
-    url = "https://files.grouplens.org/datasets/movielens/ml-" + size + ".zip"
+    url = "http://files.grouplens.org/datasets/movielens/ml-" + size + ".zip"
     dirs, file = os.path.split(dest_path)
     maybe_download(url, file, work_directory=dirs)
 
@@ -587,7 +587,7 @@ class MockMovielensSchema(pa.DataFrameModel):
     Mock dataset schema to generate fake data for testing purpose.
     This schema is configured to mimic the Movielens dataset
 
-    https://files.grouplens.org/datasets/movielens/ml-100k/
+    http://files.grouplens.org/datasets/movielens/ml-100k/
 
     Dataset schema and generation is configured using pandera.
     Please see https://pandera.readthedocs.io/en/latest/schema_models.html