import mermaid from ''; mermaid.initialize({ startOnLoad: true });
Material for the hackathon ENSAE / Red Cross / 2015. Schema of the databases.
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from jyquickhelper import add_notebook_menu
add_notebook_menu()
# don't leave these line in the notebook.
# from pyquickhelper.loghelper import get_password
# set_password("CRCREDENTIALS", "blobstorage", "...")
# set_password("CRCREDENTIALS", "password", "...")
from pyquickhelper.loghelper import get_password
hackathon = {}
hackathon["blob_storage"] = get_password("CRCREDENTIALS", "blobstorage")
hackathon["password"] = get_password("CRCREDENTIALS", "password")
blobstorage = hackathon["blob_storage"]
blobpassword = hackathon["password"]
%load_ext pyensae
cl, bs = %blob_open
cl, bs
(<pyensae.remote.azure_connection.AzureClient at 0x9272828>, <azure.storage.blob.blobservice.BlobService at 0x9272860>)
%blob_down croix-rouge/data/ITMMASTER.schema.txt ITMMASTER.schema.txt
%blob_down croix-rouge/data/SINVOICE.schema.txt SINVOICE.schema.txt
%blob_down croix-rouge/data/SINVOICEV.schema.txt SINVOICEV.schema.txt
'SINVOICEV.schema.txt'
%blob_ls croix-rouge
name | last_modified | content_type | content_length | blob_type | |
---|---|---|---|---|---|
0 | build/SINVOICE_M.csv | Wed, 18 Nov 2015 18:56:27 GMT | application/octet-stream | 533771533 | BlockBlob |
1 | build/Test_CRFFOR.GACCTMPD.csv | Sun, 22 Nov 2015 21:53:38 GMT | application/octet-stream | 822231942 | BlockBlob |
2 | data/ITMMASTER.schema.txt | Mon, 16 Nov 2015 23:00:34 GMT | application/octet-stream | 5658 | BlockBlob |
3 | data/ITMMASTER.txt | Mon, 09 Nov 2015 21:41:00 GMT | application/octet-stream | 103096479 | BlockBlob |
4 | data/SINVOICE.schema.txt | Mon, 16 Nov 2015 23:00:35 GMT | application/octet-stream | 10252 | BlockBlob |
5 | data/SINVOICE.txt | Mon, 09 Nov 2015 21:42:32 GMT | application/octet-stream | 1362433753 | BlockBlob |
6 | data/SINVOICEV.schema.txt | Mon, 16 Nov 2015 23:00:35 GMT | application/octet-stream | 7999 | BlockBlob |
7 | data/SINVOICEV.txt | Mon, 09 Nov 2015 21:44:08 GMT | application/octet-stream | 1252461865 | BlockBlob |
8 | data/enseignes_france.csv | Mon, 09 Nov 2015 21:40:54 GMT | application/octet-stream | 6303836 | BlockBlob |
9 | data/stojou.csv | Mon, 09 Nov 2015 21:55:23 GMT | application/octet-stream | 8821375868 | BlockBlob |
10 | readme.txt | Sun, 22 Nov 2015 18:00:34 GMT | application/octet-stream | 45 | BlockBlob |
df = %blob_head croix-rouge/data/ITMMASTER.schema.txt
df.head()
Intitulé long | Options | Table liée | Expression de lien | Copie législation | Annulation | Vérification | Obligatoire | RAZ | Mot-clé d'aide | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Article | NaN | ITMMASTER | NaN | NaN | Suppression | Oui | Non | Non | NaN |
1 | Famille statistique | NaN | ATABDIV | indice+20;TSICOD(indice) | NaN | Bloquant | Oui | Non | Non | NaN |
2 | Catégorie article | NaN | ITMCATEG | ;TCLCOD | NaN | Bloquant | Oui | Oui | Non | NaN |
3 | Clé recherche | NaN | NaN | NaN | NaN | NaN | NaN | Non | Non | NaN |
4 | Code axe | NaN | GDIE | NaN | NaN | Bloquant | Oui | Non | Non | NaN |
from pyquickhelper.pandashelper import df2rst
with open("sch_itmmaster.txt", "w", encoding="utf8") as f:
dfi = df.reset_index(drop=False)
dfi["index"] = dfi["index"]+1
f.write(df2rst(dfi.fillna("")))
from ensae_projects.datainc.croix_rouge import get_meaning
meaning = get_meaning("invoice")
meaning.head()
Champ | Description | |
---|---|---|
0 | NUM | Numéro de pièce |
1 | BPR | Tiers (identifiant du beneficiaire) |
2 | FCY | Site (code U2A) |
3 | ACCDAT | Date comptable |
4 | ACCNUM | Numéro interne de la transaction (ID unique po... |
meaning.columns
Index(['Champ', 'Description'], dtype='object')
df = %blob_head croix-rouge/data/SINVOICE.schema.txt
df.head()
Zone | Typ | Menu | Long | Act | Dim | Intitulé normal | Intitulé abrégé | Intitulé long | Options | Table liée | Expression de lien | Copie législation | Annulation | Vérification | Obligatoire | RAZ | Mot-clé d'aide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | SIVTYP | TSV | 0 | NaN | NaN | 1 | Type facture | Type fac | Type facture | NaN | TABSIVTYP | NaN | NaN | Bloquant | Oui | Non | Non | NaN |
1 | INVTYP | M | 645 | 15 | NaN | 1 | Catégorie facture | Catég fac | Catégorie facture | NaN | NaN | NaN | NaN | NaN | NaN | Non | Non | NaN |
2 | NUM | VCR | 0 | NaN | NaN | 1 | Numéro de pièce | Numéro | Numéro de pièce | NaN | NaN | NaN | NaN | NaN | NaN | Non | Non | NaN |
3 | ORIMOD | M | 14 | 10 | NaN | 1 | Module origine | Module | Module origine | NaN | NaN | NaN | NaN | NaN | NaN | Non | Non | NaN |
4 | BPR | BPR | 0 | NaN | NaN | 1 | Tiers | Tiers | Tiers | NaN | BPARTNER | NaN | NaN | Bloquant | Oui | Oui | Non | NaN |
merge = df.merge(meaning, right_on=meaning.columns[0], left_on="Zone", how="outer")
print(merge.shape, df.shape, meaning.shape)
merge = merge[~merge["Zone"].isnull()]
print(merge.shape)
merge.head(n=2)
(146, 20) (124, 18) (80, 2) (124, 20)
Zone | Typ | Menu | Long | Act | Dim | Intitulé normal | Intitulé abrégé | Intitulé long | Options | Table liée | Expression de lien | Copie législation | Annulation | Vérification | Obligatoire | RAZ | Mot-clé d'aide | Champ | Description | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | SIVTYP | TSV | 0 | NaN | NaN | 1 | Type facture | Type fac | Type facture | NaN | TABSIVTYP | NaN | NaN | Bloquant | Oui | Non | Non | NaN | SIVTYP | Type facture vente utilisateur (toujours egal ... |
1 | INVTYP | M | 645 | 15 | NaN | 1 | Catégorie facture | Catég fac | Catégorie facture | NaN | NaN | NaN | NaN | NaN | NaN | Non | Non | NaN | INVTYP | Catégorie facture (toujours egal a 1 ~ inutile) |
from pyquickhelper.pandashelper import df2rst
with open("sch_invoice.txt", "w", encoding="utf8") as f:
dfi = merge.reset_index(drop=False)
dfi["index"] = dfi["index"]+1
f.write(df2rst(dfi.fillna("")))
df = %blob_head croix-rouge/data/SINVOICEV.schema.txt
df.head(n=2)
Zone | Typ | Menu | Long | Act | Dim | Intitulé normal | Intitulé abrégé | Intitulé long | Options | Table liée | Expression de lien | Copie législation | Annulation | Vérification | Obligatoire | RAZ | Mot-clé d'aide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NUM | VCR | 0 | NaN | NaN | 1 | No facture | Facture | No facture | NaN | SINVOICE | NaN | NaN | Autre | Oui | Non | Non | NaN |
1 | CPY | CPY | 0 | NaN | NaN | 1 | Société | Société | Société | NaN | COMPANY | NaN | NaN | Bloquant | Oui | Oui | Non | NaN |
mergev = df.merge(meaning, right_on=meaning.columns[0], left_on="Zone", how="outer")
print(mergev.shape, df.shape, meaning.shape)
mergev = mergev[~mergev["Zone"].isnull()]
print(mergev.shape)
mergev.head(n=2)
(140, 20) (91, 18) (80, 2) (91, 20)
Zone | Typ | Menu | Long | Act | Dim | Intitulé normal | Intitulé abrégé | Intitulé long | Options | Table liée | Expression de lien | Copie législation | Annulation | Vérification | Obligatoire | RAZ | Mot-clé d'aide | Champ | Description | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NUM | VCR | 0 | NaN | NaN | 1 | No facture | Facture | No facture | NaN | SINVOICE | NaN | NaN | Autre | Oui | Non | Non | NaN | NUM | Numéro de pièce |
1 | CPY | CPY | 0 | NaN | NaN | 1 | Société | Société | Société | NaN | COMPANY | NaN | NaN | Bloquant | Oui | Oui | Non | NaN | CPY | Société (toujours egal a CRF) |
from pyquickhelper.pandashelper import df2rst
with open("sch_invoice_v.txt", "w", encoding="utf8") as f:
dfi = mergev.reset_index(drop=False)
dfi["index"] = dfi["index"]+1
f.write(df2rst(dfi.fillna("")))
Differences between INVOICE_V
and INVOICE
merge["table"] = "INVOICE"
mergev["table"] = "INVOICE_V"
diff = merge[["Zone", "table"]].merge(mergev[["Zone", "table"]], how="outer", on="Zone", suffixes=("", "_V")).copy()
diff["COMMON"] = True
diff.loc[diff["table"].isnull() | diff["table_V"].isnull(), "COMMON"] = False
diff = diff.sort_values(["COMMON", "Zone"])
diff.shape
(201, 4)
from pyquickhelper.pandashelper import df2rst
with open("sch_invoice_diff.txt", "w", encoding="utf8") as f:
dfi = diff.reset_index(drop=False)
dfi["index"] = dfi["index"]+1
f.write(df2rst(dfi.fillna("")))
df = cl.df_head(bs, "croix-rouge", "data/stojou.csv", as_df=False, stop_at=2000)
res = df.split("\r")[0]
res
'STOFCY_0,UPDCOD_0,ITMREF_0,IPTDAT_0,MVTSEQ_0,MVTIND_0,CSTDAT_0,CSTTIM_0,CSTCOU_0,OWNER_0,LOT_0,SLO_0,BPSLOT_0,LOC_0,WRH_0,SERNUM_0,TRSTYP_0,TRSFAM_0,MVTDES_0,PJT_0,BPRNUM_0,VCRTYP_0,VCRNUM_0,VCRLIN_0,VCRTYPORI_0,VCRNUMORI_0,VCRLINORI_0,VCRSEQORI_0,VCRTYPREG_0,VCRNUMREG_0,VCRLINREG_0,USRFLD1_0,USRFLD2_0,USRFLD3_0,USRFLD4_0,PCU_0,QTYPCU_0,PCUSTUCOE_0,QTYSTU_0,STU_0,PCUORI_0,PCUSTUORI_0,AMTORD_0,AMTVAL_0,VARORD_0,VARVAL_0,PRIORD_0,PRIVAL_0,PRINAT_0,AMTVAL2_0,VARVAL2_0,PRIVAL2_0,PRINAT2_0,PRIREGFLG_0,AMTDEV_0,AMTDEV2_0,SHLDAT_0,STA_0,POT_0,ACT_0,ACTQTY_0,QLYCTLDEM_0,PRNFLG_0,REGFLG_0,LBEFMT_0,LBENBR_0,DIE_0,DIE_1,DIE_2,DIE_3,DIE_4,DIE_5,DIE_6,DIE_7,DIE_8,DIE_9,DIE_10,DIE_11,DIE_12,DIE_13,DIE_14,DIE_15,DIE_16,DIE_17,DIE_18,DIE_19,CCE_0,CCE_1,CCE_2,CCE_3,CCE_4,CCE_5,CCE_6,CCE_7,CCE_8,CCE_9,CCE_10,CCE_11,CCE_12,CCE_13,CCE_14,CCE_15,CCE_16,CCE_17,CCE_18,CCE_19,ENTCOD_0,FINRSPFCY_0,AGGIFAFLG_0,GTE_0,NUMVCR_0,PALNUM_0,CTRNUM_0,BETCPY_0,DLUDAT_0,NEWLTIDAT_0,CREMVTDAT_0,CREMVTTIM_0,CREMVTSEQ_0,EXPNUM_0,PRONUM_0,CREDAT_0,CRETIM_0,CREUSR_0,UPDDAT_0,UPDUSR_0'
import pandas
stojou = pandas.DataFrame(data={ "Zone":[ _.replace("_0", "") for _ in res.split(",")]})
stojou.to_csv("stojou.schema.txt")
stojou["table_SJ"] = "stojou"
stojou.head()
Zone | table_SJ | |
---|---|---|
0 | STOFCY | stojou |
1 | UPDCOD | stojou |
2 | ITMREF | stojou |
3 | IPTDAT | stojou |
4 | MVTSEQ | stojou |
diff.head()
Zone | table | table_V | COMMON | |
---|---|---|---|---|
10 | ACCDAT | INVOICE | NaN | False |
11 | ACCNUM | INVOICE | NaN | False |
44 | AMTATI | INVOICE | NaN | False |
47 | AMTATIL | INVOICE | NaN | False |
45 | AMTNOT | INVOICE | NaN | False |
final = diff[["Zone", "table", "table_V"]].merge(stojou, how="outer", on="Zone", suffixes=("", "_STOJOU")).copy()
final = final.sort_values(["table_SJ", "Zone"])
final.shape
(313, 4)
from pyquickhelper.pandashelper import df2rst
with open("sch_stojou.txt", "w", encoding="utf8") as f:
dfi = final.reset_index(drop=False)
dfi["index"] = dfi["index"]+1
f.write(df2rst(dfi.fillna("")))
df = %blob_head croix-rouge/build/SINVOICE_M.csv --sep=,
df.head()
NUM | STARPT | CREUSR | CREDAT | BPR | FCY | ACCDAT | ACCNUM | BPRPAY | VAC | ... | EECSCH | EECSCHR | EECLOC | DSPTOTQTY | DSPTOTWEI | DSPTOTVOL | DSPWEU | DSPVOU | YCODTR | ZBATCHFLG | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | DIS100101000001 | 2 | REYC | 25/11/10 | 100000249 | U8201 | 01/01/10 | 705 | 100000249 | ... | FR21 | FR26 | 0 | 19 | 10,353 | 0,25 | KG | L | DIS | 2 | |
1 | DIS100101000002 | 2 | REYC | 25/11/10 | 100000174 | U8201 | 01/01/10 | 707 | 100000174 | ... | FR21 | FR26 | 0 | 24 | 16,98 | 0,62 | KG | L | DIS | 2 | |
2 | DIS100101000003 | 2 | REYC | 25/11/10 | 100000105 | U8201 | 01/01/10 | 709 | 100000105 | ... | FR21 | FR26 | 0 | 80 | 58,919 | 1,75 | KG | L | DIS | 2 | |
3 | DIS100101000004 | 2 | REYC | 25/11/10 | 100000244 | U8201 | 01/01/10 | 711 | 100000244 | ... | FR21 | FR26 | 0 | 52 | 34,426 | 1,87 | KG | L | DIS | 2 | |
4 | DIS100101000005 | 2 | REYC | 25/11/10 | 100000390 | U8201 | 01/01/10 | 713 | 100000390 | ... | FR21 | FR26 | 0 | 22 | 10,356 | 0,25 | KG | L | DIS | 2 |
5 rows × 80 columns
df.columns
Index(['NUM', 'STARPT', 'CREUSR', 'CREDAT', 'BPR', 'FCY', 'ACCDAT', 'ACCNUM', 'BPRPAY', 'VAC', 'STA', 'AMTATI', 'VATDAT', 'NBRTAX', 'TAX', 'BPAINV', 'YRESTEAV', 'YNBR', 'YTYPOFAM', 'YCSP', 'YSITPAR', 'YSITTRA', 'YSITLOG', 'YSSITLOG', 'YCRY', 'YT1', 'YT2', 'YT3', 'YT4', 'YT5', 'YT6', 'YT7', 'YT8', 'YSEXMAL', 'YSEXFEM', 'YSOLDINITIAL', 'YSOLDEACTUAL', 'YMTSANSREDUC', 'YMTAVECREDUC', 'YMTREDUCTION', 'YMTVALMER', 'YORIG1', 'YORIG2', 'YORIG3', 'YORIG4', 'YORIG5', 'YORIG6', 'YORIG7', 'YFREQPASS', 'YCSPCHEF', 'YTYPPROV', 'YREVREF', 'YQUOTFAM', 'YCNTTYP', 'CPY', 'BPAADD', 'CNINAM', 'BPDCRYNAM', 'SIVTYP', 'INVTYP', 'CUR', 'STOMVTFLG', 'PRITYP', 'LAN', 'ORIFCY', 'LINNBR', 'INVDTAAMT', 'TRSCOD', 'ENTCOD', 'EECNAT', 'EECSCH', 'EECSCHR', 'EECLOC', 'DSPTOTQTY', 'DSPTOTWEI', 'DSPTOTVOL', 'DSPWEU', 'DSPVOU', 'YCODTR', 'ZBATCHFLG'], dtype='object')
Just to check that suffix _0
is implicit in the documentation.
%blob_ls croix-rouge
name | last_modified | content_type | content_length | blob_type | |
---|---|---|---|---|---|
0 | build/SINVOICE_M.csv | Wed, 18 Nov 2015 18:56:27 GMT | application/octet-stream | 533771533 | BlockBlob |
1 | build/Test_CRFFOR.GACCTMPD.csv | Sun, 22 Nov 2015 21:53:38 GMT | application/octet-stream | 822231942 | BlockBlob |
2 | data/ITMMASTER.schema.txt | Mon, 16 Nov 2015 23:00:34 GMT | application/octet-stream | 5658 | BlockBlob |
3 | data/ITMMASTER.txt | Mon, 09 Nov 2015 21:41:00 GMT | application/octet-stream | 103096479 | BlockBlob |
4 | data/SINVOICE.schema.txt | Mon, 16 Nov 2015 23:00:35 GMT | application/octet-stream | 10252 | BlockBlob |
5 | data/SINVOICE.txt | Mon, 09 Nov 2015 21:42:32 GMT | application/octet-stream | 1362433753 | BlockBlob |
6 | data/SINVOICEV.schema.txt | Mon, 16 Nov 2015 23:00:35 GMT | application/octet-stream | 7999 | BlockBlob |
7 | data/SINVOICEV.txt | Mon, 09 Nov 2015 21:44:08 GMT | application/octet-stream | 1252461865 | BlockBlob |
8 | data/enseignes_france.csv | Mon, 09 Nov 2015 21:40:54 GMT | application/octet-stream | 6303836 | BlockBlob |
9 | data/stojou.csv | Mon, 09 Nov 2015 21:55:23 GMT | application/octet-stream | 8821375868 | BlockBlob |
10 | readme.txt | Sun, 22 Nov 2015 18:00:34 GMT | application/octet-stream | 45 | BlockBlob |
df = cl.df_head(bs, "croix-rouge", "data/stojou.csv", as_df=False, stop_at=2000)
df.split("\r")[0]
'STOFCY_0,UPDCOD_0,ITMREF_0,IPTDAT_0,MVTSEQ_0,MVTIND_0,CSTDAT_0,CSTTIM_0,CSTCOU_0,OWNER_0,LOT_0,SLO_0,BPSLOT_0,LOC_0,WRH_0,SERNUM_0,TRSTYP_0,TRSFAM_0,MVTDES_0,PJT_0,BPRNUM_0,VCRTYP_0,VCRNUM_0,VCRLIN_0,VCRTYPORI_0,VCRNUMORI_0,VCRLINORI_0,VCRSEQORI_0,VCRTYPREG_0,VCRNUMREG_0,VCRLINREG_0,USRFLD1_0,USRFLD2_0,USRFLD3_0,USRFLD4_0,PCU_0,QTYPCU_0,PCUSTUCOE_0,QTYSTU_0,STU_0,PCUORI_0,PCUSTUORI_0,AMTORD_0,AMTVAL_0,VARORD_0,VARVAL_0,PRIORD_0,PRIVAL_0,PRINAT_0,AMTVAL2_0,VARVAL2_0,PRIVAL2_0,PRINAT2_0,PRIREGFLG_0,AMTDEV_0,AMTDEV2_0,SHLDAT_0,STA_0,POT_0,ACT_0,ACTQTY_0,QLYCTLDEM_0,PRNFLG_0,REGFLG_0,LBEFMT_0,LBENBR_0,DIE_0,DIE_1,DIE_2,DIE_3,DIE_4,DIE_5,DIE_6,DIE_7,DIE_8,DIE_9,DIE_10,DIE_11,DIE_12,DIE_13,DIE_14,DIE_15,DIE_16,DIE_17,DIE_18,DIE_19,CCE_0,CCE_1,CCE_2,CCE_3,CCE_4,CCE_5,CCE_6,CCE_7,CCE_8,CCE_9,CCE_10,CCE_11,CCE_12,CCE_13,CCE_14,CCE_15,CCE_16,CCE_17,CCE_18,CCE_19,ENTCOD_0,FINRSPFCY_0,AGGIFAFLG_0,GTE_0,NUMVCR_0,PALNUM_0,CTRNUM_0,BETCPY_0,DLUDAT_0,NEWLTIDAT_0,CREMVTDAT_0,CREMVTTIM_0,CREMVTSEQ_0,EXPNUM_0,PRONUM_0,CREDAT_0,CRETIM_0,CREUSR_0,UPDDAT_0,UPDUSR_0'
df = cl.df_head(bs, "croix-rouge", "data/SINVOICEV.txt", as_df=False, stop_at=2000)
df.split("\r")[0]
'"NUM_0"\t"CPY_0"\t"SALFCY_0"\t"STOFCY_0"\t"BPCINV_0"\t"BPCORD_0"\t"BPCGRU_0"\t"BPAADD_0"\t"BPINAM_0"\t"BPINAM_1"\t"BPIEECNUM_0"\t"CNINAM_0"\t"BPDNAM_0"\t"BPDNAM_1"\t"BPDADDLIG_0"\t"BPDADDLIG_1"\t"BPDADDLIG_2"\t"BPDPOSCOD_0"\t"BPDCTY_0"\t"BPDSAT_0"\t"BPDCRY_0"\t"BPDCRYNAM_0"\t"CNDNAM_0"\t"BPRPAY_0"\t"BPRFCT_0"\t"SIVTYP_0"\t"INVTYP_0"\t"SIHORI_0"\t"SIHORINUM_0"\t"SIHORIDAT_0"\t"INVDAT_0"\t"CUR_0"\t"INVSTA_0"\t"INVCNOSTA_0"\t"STOMVTFLG_0"\t"INVREF_0"\t"PJT_0"\t"PRITYP_0"\t"REP_0"\t"REP_1"\t"CNOREN_0"\t"DEP_0"\t"LAN_0"\t"TSCCOD_0"\t"TSCCOD_1"\t"TSCCOD_2"\t"TSCCOD_3"\t"TSCCOD_4"\t"SIHTEX1_0"\t"SIHTEX2_0"\t"BETFCY_0"\t"BETCPY_0"\t"ORIFCY_0"\t"PIHNUM_0"\t"DISCRGTYP_0"\t"DISCRGTYP_1"\t"DISCRGTYP_2"\t"DISCRGTYP_3"\t"DISCRGTYP_4"\t"DISCRGTYP_5"\t"DISCRGTYP_6"\t"DISCRGTYP_7"\t"DISCRGTYP_8"\t"INVDTALIN_0"\t"INVDTALIN_1"\t"INVDTALIN_2"\t"INVDTALIN_3"\t"INVDTALIN_4"\t"INVDTALIN_5"\t"INVDTALIN_6"\t"INVDTALIN_7"\t"INVDTALIN_8"\t"LINNBR_0"\t"INVDTA_0"\t"INVDTA_1"\t"INVDTA_2"\t"INVDTA_3"\t"INVDTA_4"\t"INVDTA_5"\t"INVDTA_6"\t"INVDTA_7"\t"INVDTA_8"\t"INVDTA_9"\t"INVDTA_10"\t"INVDTA_11"\t"INVDTA_12"\t"INVDTA_13"\t"INVDTA_14"\t"INVDTA_15"\t"INVDTA_16"\t"INVDTA_17"\t"INVDTA_18"\t"INVDTA_19"\t"INVDTA_20"\t"INVDTA_21"\t"INVDTA_22"\t"INVDTA_23"\t"INVDTA_24"\t"INVDTA_25"\t"INVDTA_26"\t"INVDTA_27"\t"INVDTA_28"\t"INVDTA_29"\t"INVDTAAMT_0"\t"INVDTAAMT_1"\t"INVDTAAMT_2"\t"INVDTAAMT_3"\t"INVDTAAMT_4"\t"INVDTAAMT_5"\t"INVDTAAMT_6"\t"INVDTAAMT_7"\t"INVDTAAMT_8"\t"INVDTAAMT_9"\t"INVDTAAMT_10"\t"INVDTAAMT_11"\t"INVDTAAMT_12"\t"INVDTAAMT_13"\t"INVDTAAMT_14"\t"INVDTAAMT_15"\t"INVDTAAMT_16"\t"INVDTAAMT_17"\t"INVDTAAMT_18"\t"INVDTAAMT_19"\t"INVDTAAMT_20"\t"INVDTAAMT_21"\t"INVDTAAMT_22"\t"INVDTAAMT_23"\t"INVDTAAMT_24"\t"INVDTAAMT_25"\t"INVDTAAMT_26"\t"INVDTAAMT_27"\t"INVDTAAMT_28"\t"INVDTAAMT_29"\t"INVDTATYP_0"\t"INVDTATYP_1"\t"INVDTATYP_2"\t"INVDTATYP_3"\t"INVDTATYP_4"\t"INVDTATYP_5"\t"INVDTATYP_6"\t"INVDTATYP_7"\t"INVDTATYP_8"\t"INVDTATYP_9"\t"INVDTATYP_10"\t"INVDTATYP_11"\t"INVDTATYP_12"\t"INVDTATYP_13"\t"INVDTATYP_14"\t"INVDTATYP_15"\t"INVDTATYP_16"\t"INVDTATYP_17"\t"INVDTATYP_18"\t"INVDTATYP_19"\t"INVDTATYP_20"\t"INVDTATYP_21"\t"INVDTATYP_22"\t"INVDTATYP'
df = cl.df_head(bs, "croix-rouge", "data/SINVOICE.txt", as_df=False, stop_at=2000)
df.split("\r")[0]
'"SIVTYP_0"\t"INVTYP_0"\t"NUM_0"\t"ORIMOD_0"\t"BPR_0"\t"BPRSAC_0"\t"CPY_0"\t"FCY_0"\t"GTE_0"\t"JOU_0"\t"ACCDAT_0"\t"ACCNUM_0"\t"BPRDAT_0"\t"BPRVCR_0"\t"CUR_0"\t"CURTYP_0"\t"LED_0"\t"LED_1"\t"LED_2"\t"LED_3"\t"LED_4"\t"LED_5"\t"LED_6"\t"LED_7"\t"LED_8"\t"LED_9"\t"CURLED_0"\t"CURLED_1"\t"CURLED_2"\t"CURLED_3"\t"CURLED_4"\t"CURLED_5"\t"CURLED_6"\t"CURLED_7"\t"CURLED_8"\t"CURLED_9"\t"RATMLT_0"\t"RATMLT_1"\t"RATMLT_2"\t"RATMLT_3"\t"RATMLT_4"\t"RATMLT_5"\t"RATMLT_6"\t"RATMLT_7"\t"RATMLT_8"\t"RATMLT_9"\t"RATDIV_0"\t"RATDIV_1"\t"RATDIV_2"\t"RATDIV_3"\t"RATDIV_4"\t"RATDIV_5"\t"RATDIV_6"\t"RATDIV_7"\t"RATDIV_8"\t"RATDIV_9"\t"RATDAT_0"\t"BPRPAY_0"\t"BPAPAY_0"\t"BPYNAM_0"\t"BPYNAM_1"\t"BPYADDLIG_0"\t"BPYADDLIG_1"\t"BPYADDLIG_2"\t"BPYPOSCOD_0"\t"BPYCTY_0"\t"BPYSAT_0"\t"BPYCRY_0"\t"BPYCRYNAM_0"\t"BPRFCT_0"\t"FCTVCR_0"\t"FCTVCRFLG_0"\t"QTCACCNUM_0"\t"STRDUDDAT_0"\t"PTE_0"\t"DEP_0"\t"VAC_0"\t"DIRINVFLG_0"\t"EECNUMDEB_0"\t"STA_0"\t"DES_0"\t"DES_1"\t"DES_2"\t"DES_3"\t"DES_4"\t"INVNUM_0"\t"SNS_0"\t"AMTATI_0"\t"AMTNOT_0"\t"AMTNOTL_0"\t"AMTATIL_0"\t"VATDAT_0"\t"NBRTAX_0"\t"TAX_0"\t"TAX_1"\t"TAX_2"\t"TAX_3"\t"TAX_4"\t"TAX_5"\t"TAX_6"\t"TAX_7"\t"TAX_8"\t"TAX_9"\t"BASTAX_0"\t"BASTAX_1"\t"BASTAX_2"\t"BASTAX_3"\t"BASTAX_4"\t"BASTAX_5"\t"BASTAX_6"\t"BASTAX_7"\t"BASTAX_8"\t"BASTAX_9"\t"AMTTAX_0"\t"AMTTAX_1"\t"AMTTAX_2"\t"AMTTAX_3"\t"AMTTAX_4"\t"AMTTAX_5"\t"AMTTAX_6"\t"AMTTAX_7"\t"AMTTAX_8"\t"AMTTAX_9"\t"THEAMTTAX_0"\t"EXEAMTTAX_0"\t"EXEAMTTAX_1"\t"EXEAMTTAX_2"\t"EXEAMTTAX_3"\t"EXEAMTTAX_4"\t"EXEAMTTAX_5"\t"EXEAMTTAX_6"\t"EXEAMTTAX_7"\t"EXEAMTTAX_8"\t"EXEAMTTAX_9"\t"DIE_0"\t"DIE_1"\t"DIE_2"\t"DIE_3"\t"DIE_4"\t"DIE_5"\t"DIE_6"\t"DIE_7"\t"DIE_8"\t"DIE_9"\t"DIE_10"\t"DIE_11"\t"DIE_12"\t"DIE_13"\t"DIE_14"\t"DIE_15"\t"DIE_16"\t"DIE_17"\t"DIE_18"\t"DIE_19"\t"CCE_0"\t"CCE_1"\t"CCE_2"\t"CCE_3"\t"CCE_4"\t"CCE_5"\t"CCE_6"\t"CCE_7"\t"CCE_8"\t"CCE_9"\t"CCE_10"\t"CCE_11"\t"CCE_12"\t"CCE_13"\t"CCE_14"\t"CCE_15"\t"CCE_16"\t"CCE_17"\t"CCE_18"\t"CCE_19"\t"BPAINV_0"\t"BPRNAM_0"\t"BPRNAM_1"\t"BPAADDLIG_0"\t"BPAADDLIG_1"\t"BPAADDLIG_2"\t"POSCOD_0"\t"CTY_0"\t"SAT_0"\t"CRY_0"\t"CRYNAM_0"\t"BILVCR_0"\t"TRSFAM_0"\t"FIY_0"\t"PER_0"\t"STRDATSVC_0"\t"ENDDATSVC_0"\t"LASDATSVC_0"\t"AMTTAXUSA_0"\t"CAI_0"\t"DATVLYCA'
df = cl.df_head(bs, "croix-rouge", "data/ITMMASTER.txt", as_df=False, stop_at=2000)
df.split("\r")[0]
'"3166290200616","ALI","FL","LGM","HRB","HRB","PRSEC"," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," "," ","STD","Herbes provence"," "," ",3," ","3BGQMTA*caagbg+",1,0,0,31/12/99,31/12/99," ","UN","UN","UN","UN","UN",1,1,1,1,1,1,"KG",0,001," ",0,02,0,0," "," "," "," ",0,0,0,0,0,0,0,0,0,0,0,0," "," "," "," ",2,1," "," ",1,1," ",0,0,1," ",1," "," ",0," ","EXO"," "," ",1,0,0,1," "," ",0,0,2,1," "," "," "," "," ",0,0,0,0,0,0," "," "," "," "," "," "," "," "," "," ",31/12/99,1," "," "," "," ",0," "," ",0,1,1," "," "," ",0,2,2,2,1,1,1,1,1,2,2,1,0," ",1,1,1,1,22/09/10,"GPE",31/12/99,"HLAM"," "," "," "," ","1285113600000","1361358448615","0e8b8e42-3071-4e2b-a4fe-b467e722b78f",0," ",0,0,0,0,0,0,0,0,0,0'
df = cl.df_head(bs, "croix-rouge", "data/enseignes_france.csv", as_df=True, stop_at=None)
df.head()
ADRESSE_POSTALE_TEXT | TEXT_1 | IMAGE_1/_title | SITE_INTERNET_LINK | shop_name | IMAGE_1 | TEXT_3 | SITE_INTERNET_LINK/_text | brand | TEXT_4 | ... | result_address | result_score | result_type | result_id | result_name | result_street | result_postcode | result_city | result_context | result_citycode | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Le Grand Rivolet | Simply Market MONTCEAUX GUEREINS | Simply-Market | http://www.simplymarket.fr/montceaux-guereins/ | Simply Market MONTCEAUX GUEREINS | http://images.grandes-enseignes.com/Simply-Mar... | 01090 MONTCEAUX GUEREINS | Site Internet | Simply-Market | NaN | ... | Grand Rivolet 01090 Montceaux | 0.64 | locality | 01258_B011_254fe4 | Grand Rivolet | NaN | 1090 | Montceaux | 01, Ain, Rhône-Alpes | 1258 |
1 | 4 Rue Du Marche, | SPAR Montmerle Sur Saone | Spar | http://annuaire.casino-proximite.fr/c/c/magasi... | SPAR Montmerle Sur Saone | http://images.grandes-enseignes.com/Spar.png | 01090 Montmerle Sur Saone | Site Internet | Spar | Tel. 04 74 06 88 04 - | ... | 4 Rue du Marche 01090 Montmerle-sur-Saône | 0.91 | housenumber | ADRNIVX_0000000285677493 | Rue du Marche | NaN | 1090 | Montmerle-sur-Saône | 01, Ain, Rhône-Alpes | 1263 |
2 | Le Pré de la Cloche | INTERMARCHE SUPER Belleville-sur-Saone | Intermarche | http://www.intermarche.com/magasin_accueil/sas... | INTERMARCHE SUPER Belleville-sur-Saone | http://images.grandes-enseignes.com/Intermarch... | 69220 Belleville-sur-Saone | Site Internet | Intermarche | Tel. 04 74 06 45 85 - | ... | Prés de la Cloche 69220 Belleville | 0.61 | locality | 69019_B043_44c71c | Prés de la Cloche | NaN | 69220 | Belleville | 69, Rhône, Rhône-Alpes | 69019 |
3 | 47 Rue De La Republique, | PETIT CASINO Belleville Sur Saone | Petit-Casino | http://annuaire.casino-proximite.fr/c/c/magasi... | PETIT CASINO Belleville Sur Saone | http://images.grandes-enseignes.com/Petit-Casi... | 69220 Belleville Sur Saone | Site Internet | Petit-Casino | Tel. 04 74 66 10 34 - | ... | 47 Rue de la République 69220 Belleville | 0.74 | housenumber | ADRNIVX_0000000259819137 | Rue de la République | NaN | 69220 | Belleville | 69, Rhône, Rhône-Alpes | 69019 |
4 | AVENUE DE VERDUN | Carrefour Market BELLEVILLE SUR SAONE | Carrefour-Market | http://www.carrefour.fr/magasin/market-bellevi... | Carrefour Market BELLEVILLE SUR SAONE | http://images.grandes-enseignes.com/Carrefour-... | 69220 BELLEVILLE SUR SAONE | Site Internet | Carrefour-Market | Tel. 04 74 06 44 10 - | ... | Avenue de Verdun 69220 Belleville | 0.71 | street | 69019_XXXX_9d303b | Avenue de Verdun | NaN | 69220 | Belleville | 69, Rhône, Rhône-Alpes | 69019 |
5 rows × 22 columns
df.columns
Index(['ADRESSE_POSTALE_TEXT', 'TEXT_1', 'IMAGE_1/_title', 'SITE_INTERNET_LINK', 'shop_name', 'IMAGE_1', 'TEXT_3', 'SITE_INTERNET_LINK/_text', 'brand', 'TEXT_4', 'latitude', 'longitude', 'result_address', 'result_score', 'result_type', 'result_id', 'result_name', 'result_street', 'result_postcode', 'result_city', 'result_context', 'result_citycode'], dtype='object')
df = cl.df_head(bs, "croix-rouge", "build/Test_CRFFOR.GACCTMPD.csv", as_df=False, stop_at=2000)
df.split("\n")[0]
'"TYP_0","NUM_0","LIN_0","LEDTYP_0","LED_0","ACCNUM_0","CHRNUM_0","IDTLIN_0","CPY_0","FCYLIN_0","ACCDAT_0","FIY_0","PER_0","COA_0","SAC_0","ACC_0","BPR_0","DSP_0","SNS_0","CUR_0","AMTCUR_0","CURLED_0","AMTLED_0","AMTFLG_0","AMTLED1_0","UOM_0","QTY_0","DES_0","REFINTLIN_0","OFFACC_0","CSLCOD_0","CSLFLO_0","STT1_0","STT2_0","STT3_0","MTC_0","MTCDAT_0","MTCDATMIN_0","MTCDATMAX_0","FLGMTC_0","FREREF_0","CHK_0","CHKDAT_0","MRK_0","TAX_0","TAX2_0","TAX3_0","AMTVAT_0","INDEDVAT_0","ACCNUMORI_0","CODAUTACE_0","CRIMTC_0","OLDLIG_0","ACCNUMDOE_0"'
%blob_close
True
Module ensae_projects includes helpers for this event. Here is how to get the joined schemas for all tables in one Excel file. Some data is not included in the module but it is encrypted. You need a password given to any participant. You can store it in environment variable PWDCROIXROUGE
to avoid typing it each time you need it.
from ensae_projects.datainc.croix_rouge import merge_schema
df = merge_schema()
df.head(n=1)
index | Zone | name_ie | name_IR | name_SE | name_SV | name_su | Intitulé long | Typ | Menu | Long | Act | Dim | Table liée | Expression de lien | Vérification | Obligatoire | RAZ | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | ACCDAT | invoice | NaN | SINVOICE | NaN | NaN | Date comptable | D | Non | Non |
df.to_excel("joined_schemas.xlsx")
from ensae_projects.datainc.croix_rouge import get_meaning
df = get_meaning("ITMMASTER")
df.head(n=2)
Intitulé long | Options | Table liée | Expression de lien | Copie législation | Annulation | Vérification | Obligatoire | RAZ | Zone | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Article | NaN | ITMMASTER | NaN | NaN | Suppression | Oui | Non | Non | ITM_001 |
1 | Famille statistique | NaN | ATABDIV | indice+20;TSICOD(indice) | NaN | Bloquant | Oui | Non | Non | ITM_002 |
from ensae_projects.datainc.croix_rouge import get_meaning
df = get_meaning("SINVOICE")
df.head(n=2)
Zone | Typ | Menu | Long | Act | Dim | Intitulé normal | Intitulé abrégé | Intitulé long | Options | Table liée | Expression de lien | Annulation | Vérification | Obligatoire | RAZ | Mot-clé d'aide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | SIVTYP | TSV | 0 | NaN | NaN | 1 | Type facture | Type fac | Type facture | NaN | TABSIVTYP | NaN | Bloquant | Oui | Non | Non | NaN |
1 | INVTYP | M | 645 | 15 | NaN | 1 | Catégorie facture | Catég fac | Catégorie facture | NaN | NaN | NaN | NaN | NaN | Non | Non | NaN |
from ensae_projects.datainc.croix_rouge import get_meaning
df = get_meaning("SINVOICE_V")
df.head(n=2)
Zone | Typ | Menu | Long | Act | Dim | Intitulé normal | Intitulé abrégé | Intitulé long | Table liée | Expression de lien | Annulation | Vérification | Obligatoire | RAZ | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NUM | VCR | 0 | NaN | NaN | 1 | No facture | Facture | No facture | SINVOICE | NaN | Autre | Oui | Non | Non |
1 | CPY | CPY | 0 | NaN | NaN | 1 | Société | Société | Société | COMPANY | NaN | Bloquant | Oui | Oui | Non |