Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Complex joins with pandas.
5"""
6import pandas
9def df_crossjoin(df1, df2, **kwargs):
10 """
11 Make a cross join (cartesian product) between two dataframes by using a constant temporary key.
12 Also sets a MultiIndex which is the cartesian product of the indices of the input dataframes.
13 Source: `Cross join / cartesian product between pandas DataFrames
14 https://mkonrad.net/2016/04/16/cross-join--cartesian-product-between-pandas-dataframes.html>`_.
16 @param df1 dataframe 1
17 @param df2 dataframe 2
18 @param kwargs keyword arguments that will be passed to pd.merge()
19 @return cross join of df1 and df2
21 .. exref::
22 :title: Cross join with a pandas dataframe
24 .. runpython::
25 :showcode:
27 import pandas
28 from pyensae.mlhelper import df_crossjoin
29 df = pandas.DataFrame([{"x":3, "y": 4}, {"x":5, "y": 6}])
30 jj = df_crossjoin(df, df.copy())
32 A dataframe cannot be joined on itself, the second one musrt be copied.
33 """
34 df1['_tmpkey'] = 1
35 df2['_tmpkey'] = 1
36 res = pandas.merge(df1, df2, on='_tmpkey',
37 ** kwargs).drop('_tmpkey', axis=1)
38 res.index = pandas.MultiIndex.from_product((df1.index, df2.index))
39 df1.drop('_tmpkey', axis=1, inplace=True)
40 df2.drop('_tmpkey', axis=1, inplace=True)
41 return res