pod_schemas
Config YAML specification classes related to pods/datasource configuration.
Classes
DatasourceConfig
class DatasourceConfig( datasource: str, name: str, data_config: PodDataConfig = PodDataConfig(force_stypes=None, column_descriptions=None, table_descriptions=None, description=None, ignore_cols=None, modifiers=None, datasource_args={}, data_split=None, auto_tidy=False, file_system_filters=None), datasource_details_config: Optional[PodDetailsConfig] = None, schema: Optional[Path] = None,):
Datasource configuration for a multi-datasource Pod.
Variables
- static
data_config : PodDataConfig
- static
datasource : str
- static
datasource_details_config : Optional[PodDetailsConfig]
- static
name : str
- static
schema : Optional[pathlib.Path]
FileSystemFilterConfig
class FileSystemFilterConfig( file_extension: Optional[SingleOrMulti[str]] = None, strict_file_extension: bool = False, file_creation_min_date: Optional[Date] = None, file_modification_min_date: Optional[Date] = None, file_creation_max_date: Optional[Date] = None, file_modification_max_date: Optional[Date] = None, min_file_size: Optional[float] = None, max_file_size: Optional[float] = None,):
Filter files based on various criteria.
Arguments
file_extension
: File extension(s) of the data files. If None, all files will be searched. Can either be a single file extension or a list of file extensions. Case-insensitive. Defaults to None.strict_file_extension
: Whether File loading should be strictly done on files with the explicit file extension provided. If set to True will only load those files in the dataset. Otherwise, it will scan the given path for files of the same type as the provided file extension. Only relevant iffile_extension
is provided. Defaults to False.file_creation_min_date
: The oldest possible date to consider for file creation. If None, this filter will not be applied. Defaults to None.file_modification_min_date
: The oldest possible date to consider for file modification. If None, this filter will not be applied. Defaults to None.file_creation_max_date
: The newest possible date to consider for file creation. If None, this filter will not be applied. Defaults to None.file_modification_max_date
: The newest possible date to consider for file modification. If None, this filter will not be applied. Defaults to None.min_file_size
: The minimum file size in megabytes to consider. If None, all files will be considered. Defaults to None.max_file_size
: The maximum file size in megabytes to consider. If None, all files will be considered. Defaults to None.
Variables
- static
file_creation_max_date : Optional[Date]
- static
file_creation_min_date : Optional[Date]
- static
file_extension : Union[str, collections.abc.Sequence[str], ForwardRef(None)]
- static
file_modification_max_date : Optional[Date]
- static
file_modification_min_date : Optional[Date]
- static
max_file_size : Optional[float]
- static
min_file_size : Optional[float]
- static
strict_file_extension : bool
PodConfig
class PodConfig( name: str, secrets: Optional[Union[APIKeys, JWT]] = None, pod_details_config: Optional[PodDetailsConfig] = None, datasource: Optional[str] = None, data_config: Optional[PodDataConfig] = None, schema: Optional[Path] = None, datasources: Optional[list[DatasourceConfig]] = None, access_manager: AccessManagerConfig = AccessManagerConfig(url='https://am.hub.bitfount.com'), hub: HubConfig = HubConfig(url='https://hub.bitfount.com'), message_service: MessageServiceConfig = MessageServiceConfig(url='messaging.staging.bitfount.com', port=443, tls=True, use_local_storage=False), differential_privacy: Optional[DPPodConfig] = None, approved_pods: Optional[list[str]] = None, username: str = '_default', update_schema: bool = False, pod_db: Union[bool, PodDbConfig] = False, show_datapoints_with_results_in_db: bool = True, version: Optional[str] = None,):
Full configuration for the pod.
Raises
ValueError
: If a username is not provided alongside API keys.
Variables
- static
access_manager : AccessManagerConfig
- static
approved_pods : Optional[list[str]]
- static
data_config : Optional[PodDataConfig]
- static
datasource : Optional[str]
- static
datasources : Optional[list[DatasourceConfig]]
- static
differential_privacy : Optional[DPPodConfig]
- static
hub : HubConfig
- static
message_service : MessageServiceConfig
- static
name : str
- static
pod_db : Union[bool, PodDbConfig]
- static
pod_details_config : Optional[PodDetailsConfig]
- static
schema : Optional[pathlib.Path]
- static
show_datapoints_with_results_in_db : bool
- static
update_schema : bool
- static
username : str
- static
version : Optional[str]
pod_id : str
- The pod ID of the pod specified.
PodDataConfig
class PodDataConfig( force_stypes: Optional[dict] = None, column_descriptions: Optional[Union[Mapping[str, Mapping[str, str]], Mapping[str, str]]] = None, table_descriptions: Optional[Mapping[str, str]] = None, description: Optional[str] = None, ignore_cols: Optional[Union[list[str], Mapping[str, list[str]]]] = None, modifiers: Optional[dict[str, DataPathModifiers]] = None, datasource_args: _JSONDict = {}, data_split: Optional[DataSplitConfig] = None, auto_tidy: bool = False, file_system_filters: Optional[FileSystemFilterConfig] = None,):
Configuration for the Schema, BaseSource and Pod.
Arguments
force_stypes
: The semantic types to force for the data. Can either be: - A mapping from pod name to type-to-column mapping (e.g.{"pod_name": {"categorical": ["col1", "col2"]}}
). - A direct mapping from type to column names (e.g.{"categorical": ["col1", "col2"]}
).ignore_cols
: The columns to ignore. This is passed to the data source.modifiers
: The modifiers to apply to the data. This is passed to theBaseSource
.datasource_args
: Key-value pairs of arguments to pass to the data source constructor.data_split
: The data split configuration. This is passed to the data source.auto_tidy
: Whether to automatically tidy the data. This is used by thePod
and will result in removal of NaNs and normalisation of numeric values. Defaults to False.file_system_filters
: Filter files based on various criteria for datasources that areFileSystemIterable
. Defaults to None.
Variables
- static
auto_tidy : bool
- static
column_descriptions : Union[Mapping[str, Mapping[str, str]], Mapping[str, str], ForwardRef(None)]
- static
data_split : Optional[DataSplitConfig]
- static
datasource_args : dict[str, typing.Any]
- static
description : Optional[str]
- static
file_system_filters : Optional[FileSystemFilterConfig]
- static
force_stypes : Optional[dict]
- static
ignore_cols : Union[list[str], Mapping[str, list[str]], ForwardRef(None)]
- static
modifiers : Optional[dict[str, DataPathModifiers]]
- static
table_descriptions : Optional[Mapping[str, str]]
PodDbConfig
class PodDbConfig(path: Path):
Configuration of the Pod DB.
Variables
- static
path : pathlib.Path
PodDetailsConfig
class PodDetailsConfig(display_name: str, description: str = ''):
Configuration for the pod details.
Arguments
display_name
: The display name of the pod.description
: The description of the pod.