Source code for pdcleaner.plots.freqandcount

"""Plot method for values count and freq detectors"""
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from pdcleaner.utils.utils import add_method
from pdcleaner.detection.values import counts, freq


[docs]@add_method(counts, 'plot') @add_method(freq, 'plot') def plot(self, nfirst=0, nlast=0, figsize=None, color='green', errors_color='red', not_displayed_color='grey', ): """plot a countplot of values frequency, with options to compact the graph Parameters ---------- nfirst : int Number of top n values to display nlast : Bool (Default: True) Number of n last values to display figsize : (float, float) (Default: None) width and height of the figure. color : palette name (Default: "green") Color associated to legitimate values. Should be something that can be interpreted by seaborn's color_palette() errors_color : palette name (Default: "red") Color associated to erroneous values. Should be something that can be interpreted by seaborn's color_palette() not_displayed_color : str, color name (Default = "grey") Box color for the number of hidden values Returns ------- axs : matplotlib.axes._subplots.AxesSubplot matplotlib axes objects representing the plots Raises ------ ValueError if nfirst or nlast is <0 TypeError if nfirst or nlast is not an integer Examples -------- >>> series = pd.Series(['cat','cat','dog', 'dog','dog','bird']) >>> detector = series.cleaner.detect.freq(freq=.2) >>> detector.plot() .. image:: ../../_static/plot_freq.png >>> detector.plot(nfirst=1, nlast=1) .. image:: ../../_static/plot_freq_nfirst_nlast.png """ if not isinstance(nfirst, int): raise TypeError('nfirst should be an integer') if not isinstance(nlast, int): raise TypeError('nlast should be an integer') if nfirst < 0: raise ValueError('nfirst should be >=0') if nlast < 0: raise ValueError('nlast should be >=0') vals = self.obj.dropna().value_counts() not_displayed = len(vals) - nfirst - nlast if not_displayed != len(vals): vals_chunks = [vals.iloc[:nfirst]] if not_displayed > 0: vals_chunks.append(pd.Series([0], index=[f"{not_displayed}"])) if nlast > 0: vals_chunks.append(vals.iloc[-nlast:]) compacted = pd.concat(vals_chunks) else: compacted = vals palette = [color if val in self.values else errors_color for val in compacted.index] _, ax = plt.subplots(figsize=figsize) sns.barplot(y=compacted.index, x=compacted, palette=palette, ax=ax) plt.ylabel('') if str(not_displayed) in compacted.index: pos = compacted.reset_index()[compacted.index == str(not_displayed)].index.values.item() ax.text(0, float(pos), f" +{not_displayed} ", color='white', weight='bold', ha='center', bbox=dict(facecolor=not_displayed_color, edgecolor=not_displayed_color,), ) return ax