Source code for ergo.platforms.metaculus.metaculus

"""
This module lets you get question and prediction information from Metaculus
and submit predictions, via the API (https://www.metaculus.com/api2/)
"""
from datetime import datetime
import json
from typing import Dict, List, Optional, Union

import pandas as pd
import requests
from typing_extensions import Literal

from .question import (
    BinaryQuestion,
    LinearDateQuestion,
    LinearQuestion,
    LogQuestion,
    MetaculusQuestion,
)


[docs]class Metaculus:
    """
    The main class for interacting with Metaculus

    :param api_domain: A Metaculus subdomain (e.g., www, pandemic, finance)
    :param username: A Metaculus username (deprecated)
    :param password: The password for the given Metaculus username (deprecated)
    """

    player_status_to_api_wording = {
        "predicted": "guessed_by",
        "not-predicted": "not_guessed_by",
        "author": "author",
        "interested": "upvoted_by",
    }

    def __init__(
        self,
        api_domain: Optional[str] = "www",
        username: Optional[str] = None,
        password: Optional[str] = None,
    ):
        if username or password:
            raise ValueError(
                "Username and password are no longer accepted on initializaion. Use login_via_username_and_password after initialization instead."
            )
        self.api_domain = api_domain
        self.api_url = f"https://{api_domain}.metaculus.com/api2"
        self.s = requests.Session()

    def login_via_username_and_password(self, username: str, password: str):
        """
        log in to Metaculus using your credentials and store cookies,
        etc. in the session object for future use
        """
        loginURL = f"{self.api_url}/accounts/login/"
        r = self.s.post(
            loginURL,
            headers={"Content-Type": "application/json"},
            data=json.dumps({"username": username, "password": password}),
        )

        r.raise_for_status()

        self.user_id = r.json()["user_id"]

    @property
    def is_logged_in_via_uname_pwd(self):
        return hasattr(self, "user_id")

    def login_via_api_keys(self, user_api_key: str, org_api_key: str):
        self.user_api_key = user_api_key
        self.org_api_key = org_api_key

    @property
    def has_api_keys(self):
        return hasattr(self, "user_api_key") and hasattr(self, "org_api_key")

    def predict(self, q_id: str, data: Dict) -> requests.Response:
        url = f"{self.api_url}/questions/{q_id}/predict/"
        if self.is_logged_in_via_uname_pwd:
            r = self.s.post(
                url,
                headers={
                    "Content-Type": "application/json",
                    "Referer": self.api_url,
                    "X-CSRFToken": self.s.cookies.get_dict()["csrftoken"],
                },
                data=json.dumps(data),
            )
        elif self.has_api_keys:
            r = self.s.post(
                url,
                headers={
                    "Content-Type": "application/json",
                    "Referer": self.api_url,
                    "X-USERKEY": self.user_api_key,
                    "X-APIKEY": self.org_api_key,
                },
                data=json.dumps(data),
            )
        else:
            raise ValueError("Must be authenticated to make a prediction")

        try:
            r.raise_for_status()

        except requests.exceptions.HTTPError as e:
            e.args = (
                str(e.args),
                f"request body: {e.request.body}",
                f"response json: {e.response.json()}",
            )
            raise

        return r

    def make_question_from_data(self, data: Dict, name=None) -> MetaculusQuestion:
        """
        Make a MetaculusQuestion given data about the question
        of the sort returned by the Metaculus API.

        :param data: the question data (usually from the Metaculus API)
        :param name: a custom name for the question
        :return: A MetaculusQuestion from the appropriate subclass
        """
        if not name:
            name = data.get("title")
        if data["possibilities"]["type"] == "binary":
            return BinaryQuestion(data["id"], self, data, name)
        if data["possibilities"]["type"] == "continuous":
            if data["possibilities"]["scale"]["deriv_ratio"] != 1:
                if data["possibilities"].get("format") == "date":
                    raise NotImplementedError(
                        "Logarithmic date-valued questions are not currently supported"
                    )
                else:
                    return LogQuestion(data["id"], self, data, name)
            if data["possibilities"].get("format") == "date":
                return LinearDateQuestion(data["id"], self, data, name)
            else:
                return LinearQuestion(data["id"], self, data, name)
        raise NotImplementedError(
            "We couldn't determine whether this question was binary, continuous, or something else"
        )

[docs]    def get_question(self, id: int, name=None) -> MetaculusQuestion:
        """
        Load a question from Metaculus

        :param id: Question id (can be read off from URL)
        :param name: Name to assign to this question (used in models)
        """
        r = self.s.get(f"{self.api_url}/questions/{id}/")
        data = r.json()
        if not data.get("possibilities"):
            print(id)
            print(data)
            raise ValueError(
                "Unable to find a question with that id. Are you using the right api_domain?"
            )
        return self.make_question_from_data(data, name)

[docs]    def get_questions(
        self,
        question_status: Literal[
            "all", "upcoming", "open", "closed", "resolved", "discussion"
        ] = "all",
        player_status: Literal[
            "any", "predicted", "not-predicted", "author", "interested", "private"
        ] = "any",  # 20 results per page
        cat: Union[str, None] = None,
        pages: int = 1,
        fail_silent: bool = False,
        load_detail: bool = True,
    ) -> List["MetaculusQuestion"]:
        """
        Retrieve multiple questions from Metaculus API.

        :param question_status: Question status
        :param player_status: Player's status on this question
        :param cat: Category slug
        :param pages: Number of pages of questions to retrieve
        """

        questions_json = self.get_questions_json(
            question_status=question_status,
            player_status=player_status,
            cat=cat,
            pages=pages,
            load_detail=load_detail,
        )

        def is_log_date(data: Dict) -> bool:
            return (
                data["possibilities"]["type"] == "continuous"
                and data["possibilities"]["scale"]["deriv_ratio"] != 1
                and data["possibilities"]["format"] == "date"
            )

        questions = []
        for q in questions_json:
            if not is_log_date(q):
                questions.append(self.make_question_from_data(q))

        return questions

    def get_questions_json(
        self,
        question_status: Literal[
            "all", "upcoming", "open", "closed", "resolved", "discussion"
        ] = "all",
        player_status: Literal[
            "any", "predicted", "not-predicted", "author", "interested", "private"
        ] = "any",  # 20 results per page
        cat: Union[str, None] = None,
        pages: int = 1,
        include_discussion_questions: bool = False,
        load_detail: bool = True,
    ) -> List[Dict]:
        """
        Retrieve JSON for multiple questions from Metaculus API.

        :param question_status: Question status
        :param player_status: Player's status on this question
        :param cat: Category slug
        :param pages: Number of pages of questions to retrieve
        :include_discussion_questions: If true, data for non-prediction
            questions will be included
        """
        query_params = [f"status={question_status}", "order_by=-publish_time"]
        if player_status != "any":
            if player_status == "private":
                query_params.append("access=private")
            else:
                if hasattr(self, "user_id"):
                    query_params.append(
                        f"{self.player_status_to_api_wording[player_status]}={self.user_id}"
                    )
                else:
                    raise ValueError(
                        f"username_and_password login must be used in order to filter by status {player_status}"
                    )

        if cat is not None:
            query_params.append(f"search=cat:{cat}")

        query_string = "&".join(query_params)

        def get_questions_for_pages(
            query_string: str, max_pages: int = 1, current_page: int = 1, results=[]
        ) -> List[Dict]:
            if current_page > max_pages:
                return results

            r = self.s.get(
                f"{self.api_url}/questions/?{query_string}&limit=20&offset={20*current_page}"
            )

            if len(r.json()["results"]) == 0:
                return results

            r.raise_for_status()

            return get_questions_for_pages(
                query_string, max_pages, current_page + 1, results + r.json()["results"]
            )

        questions = get_questions_for_pages(query_string, pages)

        # Add fields omitted by previous query
        if load_detail:
            for i, q in enumerate(questions):
                r = self.s.get(f"{self.api_url}/questions/{q['id']}")
                questions[i] = dict(r.json(), **q)

        if not include_discussion_questions:
            questions = [
                q for q in questions if q["possibilities"]["type"] != "discussion"
            ]

        return questions

    def make_questions_df(
        self, questions_json: List[Dict], columns: Optional[List[str]] = None
    ) -> pd.DataFrame:
        """
        Convert JSON returned by Metaculus API to dataframe.

        :param questions_json: List of questions (as dicts)
        :param columns: Optional list of column names to include
            (if omitted, every column is included)
        """
        if columns is not None:
            questions_df = pd.DataFrame(
                [{k: v for (k, v) in q.items() if k in columns} for q in questions_json]
            )
        else:
            questions_df = pd.DataFrame(questions_json)

        for col in ["created_time", "publish_time", "close_time", "resolve_time"]:
            if col in questions_df.columns:
                questions_df[col] = questions_df[col].apply(
                    lambda x: datetime.strptime(x[:19], "%Y-%m-%dT%H:%M:%S")
                )

        if "author" in questions_df.columns:
            questions_df["i_created"] = questions_df["author"] == self.user_id

        if "my_predictions" in questions_df.columns:
            questions_df["i_predicted"] = questions_df["my_predictions"].apply(
                lambda x: x is not None
            )

        return questions_df