Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Complex joins with pandas. 

5""" 

6import pandas 

7 

8 

9def df_crossjoin(df1, df2, **kwargs): 

10 """ 

11 Make a cross join (cartesian product) between two dataframes by using a constant temporary key. 

12 Also sets a MultiIndex which is the cartesian product of the indices of the input dataframes. 

13 Source: `Cross join / cartesian product between pandas DataFrames 

14 https://mkonrad.net/2016/04/16/cross-join--cartesian-product-between-pandas-dataframes.html>`_. 

15 

16 @param df1 dataframe 1 

17 @param df2 dataframe 2 

18 @param kwargs keyword arguments that will be passed to pd.merge() 

19 @return cross join of df1 and df2 

20 

21 .. exref:: 

22 :title: Cross join with a pandas dataframe 

23 

24 .. runpython:: 

25 :showcode: 

26 

27 import pandas 

28 from pyensae.mlhelper import df_crossjoin 

29 df = pandas.DataFrame([{"x":3, "y": 4}, {"x":5, "y": 6}]) 

30 jj = df_crossjoin(df, df.copy()) 

31 

32 A dataframe cannot be joined on itself, the second one musrt be copied. 

33 """ 

34 df1['_tmpkey'] = 1 

35 df2['_tmpkey'] = 1 

36 res = pandas.merge(df1, df2, on='_tmpkey', 

37 ** kwargs).drop('_tmpkey', axis=1) 

38 res.index = pandas.MultiIndex.from_product((df1.index, df2.index)) 

39 df1.drop('_tmpkey', axis=1, inplace=True) 

40 df2.drop('_tmpkey', axis=1, inplace=True) 

41 return res