polis-community · patcon · Mar 17, 2025 · Mar 11, 2025 · Mar 13, 2025 · Mar 13, 2025
diff --git a/debug.py b/debug.py
@@ -1,8 +1,10 @@
+from typing import Any
 from reddwarf.polis import PolisClient
 from reddwarf.data_presenter import DataPresenter
 import pandas as pd
 
 from reddwarf import utils
+import json
 
 
 CONVOS = {
@@ -39,27 +41,41 @@
     client = PolisClient()
     client.load_data(report_id=report_id)
 
+    # This allows us to skip recalculating clustering, and instead rely on
+    # clustering results from polismath's API endpoint. The value of this is
+    # being able to compare comment stat calculations even when clustering isn't
+    # yet reproducing Polis' exact behavior.
+    USE_POLISMATH_CLUSTERING = True
+    if USE_POLISMATH_CLUSTERING:
+        math_data: Any = client.data_loader.math_data # type:ignore
+        all_clustered_participant_ids, cluster_labels = utils.extract_data_from_polismath(math_data)
+        # Force same participant subset (Polis has some edge-cases where it
+        # keeps participants that would otherwise be cut)
+        client.keep_participant_ids = all_clustered_participant_ids
+
     # Generate vote matrix and run clustering
     vote_matrix = client.get_matrix(is_filtered=True)
     client.run_pca()
     client.scale_projected_data()
-    client.find_optimal_k()  # Find optimal number of clusters
-    cluster_labels = client.optimal_cluster_labels
-    group_count = cluster_labels.max()+1
-
-    for group_id in range(group_count):
-        print(f"representativeness for group {group_id}")
-        group_representativeness = utils.calculate_representativeness(
-            vote_matrix=vote_matrix,
-            cluster_labels=cluster_labels,
-            group_id=group_id,
-        )
-        print(group_representativeness)
+
+    if USE_POLISMATH_CLUSTERING:
+        # Fake optimal labels from polismath data.
+        client.optimal_cluster_labels = cluster_labels #type:ignore
+    else:
+        client.find_optimal_k()  # Find optimal number of clusters
+        cluster_labels = client.optimal_cluster_labels
+
+    stats_by_group = utils.calculate_comment_statistics_by_group(
+        vote_matrix=vote_matrix,
+        cluster_labels=cluster_labels, # type:ignore
+    )
+    polis_repness = utils.select_rep_comments(stats_by_group=stats_by_group)
+
+    print(json.dumps(polis_repness, indent=2))
 
     presenter = DataPresenter(client=client)
     presenter.render_optimal_cluster_figure()
 
-
 if False:
     # test agora method
     from reddwarf.agora import run_clustering

diff --git a/reddwarf/types/polis.py b/reddwarf/types/polis.py
@@ -0,0 +1,47 @@
+from typing import TypeAlias, Literal
+from typing_extensions import NotRequired, TypedDict
+
+IncrementingId: TypeAlias = int
+BaseClusterId: TypeAlias = IncrementingId
+GroupId: TypeAlias = IncrementingId
+ParticipantId: TypeAlias = IncrementingId
+
+class PolisGroupCluster(TypedDict):
+    id: GroupId
+    members: list[BaseClusterId]
+    center: list[float]
+
+class PolisGroupClusterExpanded(TypedDict):
+    id: GroupId
+    members: list[ParticipantId]
+    center: list[float]
+
+class PolisBaseClusters(TypedDict):
+    # Each outer list will be the same length, and will be 100 items or less.
+    id: list[BaseClusterId]
+    members: list[list[ParticipantId]]
+    x: list[float]
+    y: list[float]
+    count: list[int]
+
+# Use functional form when attributes have hyphens or are string numbers.
+PolisRepnessStatement = TypedDict("PolisRepnessStatement", {
+    "tid": int,
+    "n-success": int,
+    "n-trials": int,
+    "p-success": float,
+    "p-test": float,
+    "repness": float,
+    "repness-test": float,
+    "repful-for": Literal["agree", "disagree"],
+    "best-agree": NotRequired[bool],
+    "n-agree": NotRequired[int],
+})
+
+PolisRepness = TypedDict("PolisRepness", {
+    "0": list[PolisRepnessStatement],
+    "1": list[PolisRepnessStatement],
+    "2": NotRequired[list[PolisRepnessStatement]],
+    "3": NotRequired[list[PolisRepnessStatement]],
+    "4": NotRequired[list[PolisRepnessStatement]],
+})