Source code for pyensae.mlhelper.joins

# -*- coding: utf-8 -*-
"""
Complex joins with pandas.


:githublink:`%|py|6`
"""
import pandas


[docs]def df_crossjoin(df1, df2, **kwargs): """ Make a cross join (cartesian product) between two dataframes by using a constant temporary key. Also sets a MultiIndex which is the cartesian product of the indices of the input dataframes. Source: `Cross join / cartesian product between pandas DataFrames https://mkonrad.net/2016/04/16/cross-join--cartesian-product-between-pandas-dataframes.html>`_. :param df1: dataframe 1 :param df2: dataframe 2 :param kwargs: keyword arguments that will be passed to pd.merge() :return: cross join of df1 and df2 .. exref:: :title: Cross join with a pandas dataframe .. runpython:: :showcode: import pandas from pyensae.mlhelper import df_crossjoin df = pandas.DataFrame([{"x":3, "y": 4}, {"x":5, "y": 6}]) jj = df_crossjoin(df, df.copy()) A dataframe cannot be joined on itself, the second one musrt be copied. :githublink:`%|py|33` """ df1['_tmpkey'] = 1 df2['_tmpkey'] = 1 res = pandas.merge(df1, df2, on='_tmpkey', ** kwargs).drop('_tmpkey', axis=1) res.index = pandas.MultiIndex.from_product((df1.index, df2.index)) df1.drop('_tmpkey', axis=1, inplace=True) df2.drop('_tmpkey', axis=1, inplace=True) return res