Skip to content

Bourdieu One Dimension Visualizer

A class to visualize data distribution along a unique continuum inspired by Bourdieu's theory using an embedding model.

Source code in bunkatopics/bourdieu/bourdieu_one_dimension.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
class BourdieuOneDimensionVisualizer:
    """
    A class to visualize data distribution along a unique continuum inspired by Bourdieu's theory using an embedding model.
    """

    def __init__(
        self,
        embedding_model: Embeddings,
        left: str = ["aggressivity"],
        right: str = ["peacefulness"],
        height=700,
        width=600,
        explainer: bool = False,
        explainer_ngrams: list = [1, 2],
    ) -> None:
        """
        Constructs all the necessary attributes for the BourdieuOneDimensionVisualizer object.

        Args:
            embedding_model: The embedding model used for encoding text.
            left (List[str]): Keywords indicating one end of the continuum. Defaults to ["aggressivity"].
            right (List[str]): Keywords indicating the other end of the continuum. Defaults to ["peacefulness"].
            height (int): Height of the visualization plot. Default is 700.
            width (int): Width of the visualization plot. Default is 600.
            explainer (bool): If True, includes an explanation component in the visualization. Default is False.
            explainer_ngrams (List[int]): N-grams to use for generating explanations. Default is [1, 2].
        """
        pass
        self.embedding_model = embedding_model
        self.left = left
        self.right = right
        self.height = height
        self.width = width
        self.explainer = explainer
        self.explainer_ngrams = explainer_ngrams

    def fit_transform(self, docs: t.List[Document]) -> go.Figure:
        """
        Analyzes a list of Document objects and visualizes their distribution along the unique continuum.

        Args:
            docs (List[Document]): A list of Document objects to be analyzed.

        Returns:
            Tuple[go.Figure, plt]: A tuple containing a Plotly figures and a matplolib Figure. The first figure represents the
                                         distribution of data along the continuum, and the second figure
                                         (if explainer is True) represents specific terms that characterize
                                         the distribution.
        """
        self.id = str(random.randint(0, 10000))
        self.docs = docs

        self.new_docs = _get_continuum(
            embedding_model=self.embedding_model,
            docs=self.docs,
            cont_name=self.id,
            left_words=self.left,
            right_words=self.right,
            scale=False,
        )

        fig = self.plot_unique_dimension()
        return fig

    def plot_unique_dimension(self) -> go.Figure:
        """
        Generates a Plotly figure representing the unique dimension continuum.

        This method is used internally by fit_transform to create the visualization.

        Returns:
            go.Figure: A Plotly figure visualizing the distribution of documents along the unique continuum.
        """
        left = " ".join(self.left)
        right = " ".join(self.right)

        distances = [
            x.distance
            for doc in self.new_docs
            for x in doc.bourdieu_dimensions
            if x.continuum.id == self.id
        ]
        doc_id = [x.doc_id for x in self.new_docs]
        content = [x.content for x in self.new_docs]

        df_distances = pd.DataFrame(
            {"doc_id": doc_id, "distances": distances, "content": content}
        )

        name = "<" + right + "-" + left + ">"

        df_fig = df_distances.rename(columns={"distances": name})
        df_fig["content"] = df_fig["content"].apply(lambda x: wrap_by_word(x, 10))

        fig = px.box(
            df_fig,
            y=name,
            points="all",
            hover_data=["content"],
            height=self.height,
            width=self.width,
            template="plotly_white",
        )

        fig.add_shape(
            dict(
                type="line",
                x0=df_fig[name].min(),  # Set the minimum x-coordinate of the line
                x1=df_fig[name].max(),  # Set the maximum x-coordinate of the line
                y0=0,
                y1=0,
                line=dict(color="red", width=4),
            )
        )

        """fig_specific_terms = plot_specific_terms(
            docs=self.new_docs,
            left_words=left,
            right_words=right,
            id=self.id,
            ngrams=self.explainer_ngrams,
            quantile=0.80,
            top_n=20,
        )"""

        return fig

__init__(embedding_model, left=['aggressivity'], right=['peacefulness'], height=700, width=600, explainer=False, explainer_ngrams=[1, 2])

Constructs all the necessary attributes for the BourdieuOneDimensionVisualizer object.

Parameters:

Name Type Description Default
embedding_model Embeddings

The embedding model used for encoding text.

required
left List[str]

Keywords indicating one end of the continuum. Defaults to ["aggressivity"].

['aggressivity']
right List[str]

Keywords indicating the other end of the continuum. Defaults to ["peacefulness"].

['peacefulness']
height int

Height of the visualization plot. Default is 700.

700
width int

Width of the visualization plot. Default is 600.

600
explainer bool

If True, includes an explanation component in the visualization. Default is False.

False
explainer_ngrams List[int]

N-grams to use for generating explanations. Default is [1, 2].

[1, 2]
Source code in bunkatopics/bourdieu/bourdieu_one_dimension.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(
    self,
    embedding_model: Embeddings,
    left: str = ["aggressivity"],
    right: str = ["peacefulness"],
    height=700,
    width=600,
    explainer: bool = False,
    explainer_ngrams: list = [1, 2],
) -> None:
    """
    Constructs all the necessary attributes for the BourdieuOneDimensionVisualizer object.

    Args:
        embedding_model: The embedding model used for encoding text.
        left (List[str]): Keywords indicating one end of the continuum. Defaults to ["aggressivity"].
        right (List[str]): Keywords indicating the other end of the continuum. Defaults to ["peacefulness"].
        height (int): Height of the visualization plot. Default is 700.
        width (int): Width of the visualization plot. Default is 600.
        explainer (bool): If True, includes an explanation component in the visualization. Default is False.
        explainer_ngrams (List[int]): N-grams to use for generating explanations. Default is [1, 2].
    """
    pass
    self.embedding_model = embedding_model
    self.left = left
    self.right = right
    self.height = height
    self.width = width
    self.explainer = explainer
    self.explainer_ngrams = explainer_ngrams

fit_transform(docs)

Analyzes a list of Document objects and visualizes their distribution along the unique continuum.

Parameters:

Name Type Description Default
docs List[Document]

A list of Document objects to be analyzed.

required

Returns:

Type Description
Figure

Tuple[go.Figure, plt]: A tuple containing a Plotly figures and a matplolib Figure. The first figure represents the distribution of data along the continuum, and the second figure (if explainer is True) represents specific terms that characterize the distribution.

Source code in bunkatopics/bourdieu/bourdieu_one_dimension.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def fit_transform(self, docs: t.List[Document]) -> go.Figure:
    """
    Analyzes a list of Document objects and visualizes their distribution along the unique continuum.

    Args:
        docs (List[Document]): A list of Document objects to be analyzed.

    Returns:
        Tuple[go.Figure, plt]: A tuple containing a Plotly figures and a matplolib Figure. The first figure represents the
                                     distribution of data along the continuum, and the second figure
                                     (if explainer is True) represents specific terms that characterize
                                     the distribution.
    """
    self.id = str(random.randint(0, 10000))
    self.docs = docs

    self.new_docs = _get_continuum(
        embedding_model=self.embedding_model,
        docs=self.docs,
        cont_name=self.id,
        left_words=self.left,
        right_words=self.right,
        scale=False,
    )

    fig = self.plot_unique_dimension()
    return fig

plot_unique_dimension()

Generates a Plotly figure representing the unique dimension continuum.

This method is used internally by fit_transform to create the visualization.

Returns:

Type Description
Figure

go.Figure: A Plotly figure visualizing the distribution of documents along the unique continuum.

Source code in bunkatopics/bourdieu/bourdieu_one_dimension.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def plot_unique_dimension(self) -> go.Figure:
    """
    Generates a Plotly figure representing the unique dimension continuum.

    This method is used internally by fit_transform to create the visualization.

    Returns:
        go.Figure: A Plotly figure visualizing the distribution of documents along the unique continuum.
    """
    left = " ".join(self.left)
    right = " ".join(self.right)

    distances = [
        x.distance
        for doc in self.new_docs
        for x in doc.bourdieu_dimensions
        if x.continuum.id == self.id
    ]
    doc_id = [x.doc_id for x in self.new_docs]
    content = [x.content for x in self.new_docs]

    df_distances = pd.DataFrame(
        {"doc_id": doc_id, "distances": distances, "content": content}
    )

    name = "<" + right + "-" + left + ">"

    df_fig = df_distances.rename(columns={"distances": name})
    df_fig["content"] = df_fig["content"].apply(lambda x: wrap_by_word(x, 10))

    fig = px.box(
        df_fig,
        y=name,
        points="all",
        hover_data=["content"],
        height=self.height,
        width=self.width,
        template="plotly_white",
    )

    fig.add_shape(
        dict(
            type="line",
            x0=df_fig[name].min(),  # Set the minimum x-coordinate of the line
            x1=df_fig[name].max(),  # Set the maximum x-coordinate of the line
            y0=0,
            y1=0,
            line=dict(color="red", width=4),
        )
    )

    """fig_specific_terms = plot_specific_terms(
        docs=self.new_docs,
        left_words=left,
        right_words=right,
        id=self.id,
        ngrams=self.explainer_ngrams,
        quantile=0.80,
        top_n=20,
    )"""

    return fig