Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
92c7a97
Add repness-test calculation and test.
patcon Mar 11, 2025
b2cd311
Calculate all comment stats in calculate_representativeness.
patcon Mar 13, 2025
949597f
Add helper functions for processing representative comments.
patcon Mar 13, 2025
cbb4775
fix: bug in calculation of probability significance test.
patcon Mar 13, 2025
20d7a91
Add rest of comment statistics. Complete unit test against real polis…
patcon Mar 13, 2025
4488e69
Got debug reproducing all repness comments. Needs cleanup.
patcon Mar 13, 2025
750162a
Clean up variable names. Add typings. Remove debug output.
patcon Mar 14, 2025
bd4fb1d
Move types into own file. Move functions from tests into utils.
patcon Mar 14, 2025
d02d467
Rename test.
patcon Mar 14, 2025
e246e75
debug: Ensure we gather all the active participant_ids before trying …
patcon Mar 14, 2025
cafaaa7
debug: Yay! Perform messy stub reproduction of Polis group repful sta…
patcon Mar 14, 2025
a5178a9
Cast N vars in numpy arrays as ints.
patcon Mar 14, 2025
640bd85
Improve var name in debug.
patcon Mar 14, 2025
5f8983c
Ensure agree come before disagree like in polismath output.
patcon Mar 14, 2025
eafd734
Clean up whitespace.
patcon Mar 14, 2025
f581925
Stop redundantly setting types.
patcon Mar 14, 2025
76e1f0a
Improve number typing so that regular ints and floats are in final ou…
patcon Mar 14, 2025
ad6cdf6
Added PolisRepness types for polismath output.
patcon Mar 14, 2025
9e4ba1e
Deal with new typing errors now that using pyright.
patcon Mar 14, 2025
e771d77
Created function to generally extract helpful data from polismath data.
patcon Mar 14, 2025
764b21a
Move first-pass select_rep_comments() into utils. Added test assertion.
patcon Mar 14, 2025
91765fd
Use types consistently.
patcon Mar 14, 2025
5129b45
Clean up select_rep_comments() and filter functions.
patcon Mar 15, 2025
cf423b2
Clean up and document select_rep_comments functions better.
patcon Mar 16, 2025
b14f9b0
More code cleanup.
patcon Mar 16, 2025
4f3d567
Code cleanup. More legible.
patcon Mar 16, 2025
6b3fb32
Make confidence interval configurable instead of hardcoded at 90%.
patcon Mar 16, 2025
f13b4b3
Improve function docs. Rename vars for clarity. Refector into clearer…
patcon Mar 17, 2025
0bc33c0
Improve docs more. Add pseudo_count into probability functions.
patcon Mar 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions debug.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Any
from reddwarf.polis import PolisClient
from reddwarf.data_presenter import DataPresenter
import pandas as pd

from reddwarf import utils
import json


CONVOS = {
Expand Down Expand Up @@ -39,27 +41,41 @@
client = PolisClient()
client.load_data(report_id=report_id)

# This allows us to skip recalculating clustering, and instead rely on
# clustering results from polismath's API endpoint. The value of this is
# being able to compare comment stat calculations even when clustering isn't
# yet reproducing Polis' exact behavior.
USE_POLISMATH_CLUSTERING = True
if USE_POLISMATH_CLUSTERING:
math_data: Any = client.data_loader.math_data # type:ignore
all_clustered_participant_ids, cluster_labels = utils.extract_data_from_polismath(math_data)
# Force same participant subset (Polis has some edge-cases where it
# keeps participants that would otherwise be cut)
client.keep_participant_ids = all_clustered_participant_ids

# Generate vote matrix and run clustering
vote_matrix = client.get_matrix(is_filtered=True)
client.run_pca()
client.scale_projected_data()
client.find_optimal_k() # Find optimal number of clusters
cluster_labels = client.optimal_cluster_labels
group_count = cluster_labels.max()+1

for group_id in range(group_count):
print(f"representativeness for group {group_id}")
group_representativeness = utils.calculate_representativeness(
vote_matrix=vote_matrix,
cluster_labels=cluster_labels,
group_id=group_id,
)
print(group_representativeness)

if USE_POLISMATH_CLUSTERING:
# Fake optimal labels from polismath data.
client.optimal_cluster_labels = cluster_labels #type:ignore
else:
client.find_optimal_k() # Find optimal number of clusters
cluster_labels = client.optimal_cluster_labels

stats_by_group = utils.calculate_comment_statistics_by_group(
vote_matrix=vote_matrix,
cluster_labels=cluster_labels, # type:ignore
)
polis_repness = utils.select_rep_comments(stats_by_group=stats_by_group)

print(json.dumps(polis_repness, indent=2))

presenter = DataPresenter(client=client)
presenter.render_optimal_cluster_figure()


if False:
# test agora method
from reddwarf.agora import run_clustering
Expand Down
47 changes: 47 additions & 0 deletions reddwarf/types/polis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import TypeAlias, Literal
from typing_extensions import NotRequired, TypedDict

IncrementingId: TypeAlias = int
BaseClusterId: TypeAlias = IncrementingId
GroupId: TypeAlias = IncrementingId
ParticipantId: TypeAlias = IncrementingId

class PolisGroupCluster(TypedDict):
id: GroupId
members: list[BaseClusterId]
center: list[float]

class PolisGroupClusterExpanded(TypedDict):
id: GroupId
members: list[ParticipantId]
center: list[float]

class PolisBaseClusters(TypedDict):
# Each outer list will be the same length, and will be 100 items or less.
id: list[BaseClusterId]
members: list[list[ParticipantId]]
x: list[float]
y: list[float]
count: list[int]

# Use functional form when attributes have hyphens or are string numbers.
PolisRepnessStatement = TypedDict("PolisRepnessStatement", {
"tid": int,
"n-success": int,
"n-trials": int,
"p-success": float,
"p-test": float,
"repness": float,
"repness-test": float,
"repful-for": Literal["agree", "disagree"],
"best-agree": NotRequired[bool],
"n-agree": NotRequired[int],
})

PolisRepness = TypedDict("PolisRepness", {
"0": list[PolisRepnessStatement],
"1": list[PolisRepnessStatement],
"2": NotRequired[list[PolisRepnessStatement]],
"3": NotRequired[list[PolisRepnessStatement]],
"4": NotRequired[list[PolisRepnessStatement]],
})
Loading