fromfunctoolsimportwraps,partialfromloguruimportloggerfromanovos.shared.utilsimportattributeType_segregationdefcheck_list_of_columns(func=None,columns="list_of_cols",target_idx:int=1,target:str="idf_target",drop="drop_cols",):iffuncisNone:returnpartial(check_list_of_columns,columns=columns,target=target,drop=drop)@wraps(func)defvalidate(*args,**kwargs):logger.debug("check the list of columns")idf_target=kwargs.get(target,"")orargs[target_idx]cols_raw=kwargs.get(columns,"all")ifisinstance(cols_raw,str):ifcols_raw=="all":num_cols,cat_cols,other_cols=attributeType_segregation(idf_target)cols=num_cols+cat_colselse:cols=[x.strip()forxincols_raw.split("|")]elifisinstance(cols_raw,list):cols=cols_rawelse:raiseTypeError(f"'{columns}' must be either a string or a list of strings."f" Received {type(cols_raw)}.")drops_raw=kwargs.get(drop,[])ifisinstance(drops_raw,str):drops=[x.strip()forxindrops_raw.split("|")]elifisinstance(drops_raw,list):drops=drops_rawelse:raiseTypeError(f"'{drop}' must be either a string or a list of strings. "f"Received {type(drops_raw)}.")final_cols=list(set(eforeincolsifenotindrops))ifnotfinal_cols:raiseValueError(f"Empty set of columns is given. Columns to select: {cols}, columns to drop: {drops}.")ifany(xnotinidf_target.columnsforxinfinal_cols):raiseValueError(f"Not all columns are in the input dataframe. "f"Missing columns: {set(final_cols)-set(idf_target.columns)}")kwargs[columns]=final_colskwargs[drop]=[]returnfunc(*args,**kwargs)returnvalidatedefcheck_distance_method(func=None,param="method_type"):iffuncisNone:returnpartial(check_distance_method,param=param)@wraps(func)defvalidate(*args,**kwargs):dist_distance_methods=kwargs.get(param,"PSI")ifisinstance(dist_distance_methods,str):ifdist_distance_methods=="all":dist_distance_methods=["PSI","JSD","HD","KS"]else:dist_distance_methods=[x.strip()forxindist_distance_methods.split("|")]ifany(xnotin("PSI","JSD","HD","KS")forxindist_distance_methods):raiseTypeError(f"Invalid input for {param}")kwargs[param]=dist_distance_methodsreturnfunc(*args,**kwargs)returnvalidate
defcheck_distance_method(func=None,param="method_type"):iffuncisNone:returnpartial(check_distance_method,param=param)@wraps(func)defvalidate(*args,**kwargs):dist_distance_methods=kwargs.get(param,"PSI")ifisinstance(dist_distance_methods,str):ifdist_distance_methods=="all":dist_distance_methods=["PSI","JSD","HD","KS"]else:dist_distance_methods=[x.strip()forxindist_distance_methods.split("|")]ifany(xnotin("PSI","JSD","HD","KS")forxindist_distance_methods):raiseTypeError(f"Invalid input for {param}")kwargs[param]=dist_distance_methodsreturnfunc(*args,**kwargs)returnvalidate
defcheck_list_of_columns(func=None,columns="list_of_cols",target_idx:int=1,target:str="idf_target",drop="drop_cols",):iffuncisNone:returnpartial(check_list_of_columns,columns=columns,target=target,drop=drop)@wraps(func)defvalidate(*args,**kwargs):logger.debug("check the list of columns")idf_target=kwargs.get(target,"")orargs[target_idx]cols_raw=kwargs.get(columns,"all")ifisinstance(cols_raw,str):ifcols_raw=="all":num_cols,cat_cols,other_cols=attributeType_segregation(idf_target)cols=num_cols+cat_colselse:cols=[x.strip()forxincols_raw.split("|")]elifisinstance(cols_raw,list):cols=cols_rawelse:raiseTypeError(f"'{columns}' must be either a string or a list of strings."f" Received {type(cols_raw)}.")drops_raw=kwargs.get(drop,[])ifisinstance(drops_raw,str):drops=[x.strip()forxindrops_raw.split("|")]elifisinstance(drops_raw,list):drops=drops_rawelse:raiseTypeError(f"'{drop}' must be either a string or a list of strings. "f"Received {type(drops_raw)}.")final_cols=list(set(eforeincolsifenotindrops))ifnotfinal_cols:raiseValueError(f"Empty set of columns is given. Columns to select: {cols}, columns to drop: {drops}.")ifany(xnotinidf_target.columnsforxinfinal_cols):raiseValueError(f"Not all columns are in the input dataframe. "f"Missing columns: {set(final_cols)-set(idf_target.columns)}")kwargs[columns]=final_colskwargs[drop]=[]returnfunc(*args,**kwargs)returnvalidate