paddlets.models.dl.paddlepaddle.adapter.data_adapter 源代码

# !/usr/bin/env python3
# -*- coding:utf-8 -*-
import paddle.io

from paddlets.models.dl.paddlepaddle.adapter.paddle_dataset_impl import PaddleDatasetImpl
from paddlets.datasets import TSDataset
from paddlets.logger import Logger

from paddle.io import DataLoader as PaddleDataLoader
from typing import Callable, Tuple, Optional

logger = Logger(__name__)


[文档]class DataAdapter(object): """ Data adapter, converts :class:`paddlets.TSDataset` to :class:`paddle.io.Dataset` and :class:`paddle.io.DataLoader`. """ def __init__(self): pass
[文档] def to_paddle_dataset( self, rawdataset: TSDataset, in_chunk_len: int = 1, out_chunk_len: int = 1, skip_chunk_len: int = 0, sampling_stride: int = 1, time_window: Optional[Tuple] = None ) -> PaddleDatasetImpl: """ Converts :class:`paddlets.TSDataset` to :class:`paddle.io.Dataset`. Args: rawdataset(TSDataset): Raw TSDataset for converting to :class:`paddle.io.Dataset`. in_chunk_len(int): The size of the loopback window, i.e., the number of time steps feed to the model. out_chunk_len(int): The size of the forecasting horizon, i.e., the number of time steps output by the model. skip_chunk_len(int): Optional, the number of time steps between in_chunk and out_chunk for a single sample. The skip chunk is neither used as a feature (i.e. X) nor a label (i.e. Y) for a single sample. By default, it will NOT skip any time steps. sampling_stride(int, optional): Time steps to stride over the i-th sample and (i+1)-th sample. More precisely, let `t` be the time index of target time series, `t[i]` be the start time of the i-th sample, `t[i+1]` be the start time of the (i+1)-th sample, then `sampling_stride` represents the result of `t[i+1] - t[i]`. time_window(Tuple, optional): A two-element-tuple-shaped time window that allows adapter to build samples. time_window[0] refers to the window lower bound, while time_window[1] refers to the window upper bound. Each element in the left-closed-and-right-closed interval refers to the TAIL index of each sample. Returns: PaddleDatasetImpl: A built PaddleDatasetImpl. """ return PaddleDatasetImpl( rawdataset=rawdataset, in_chunk_len=in_chunk_len, out_chunk_len=out_chunk_len, skip_chunk_len=skip_chunk_len, sampling_stride=sampling_stride, time_window=time_window )
[文档] def to_paddle_dataloader( self, paddle_dataset: PaddleDatasetImpl, batch_size: int, collate_fn: Callable = None, shuffle: bool = True ) -> PaddleDataLoader: """ Converts :class:`paddle.io.Dataset` to :class:`paddle.io.DataLoader`. Args: paddle_dataset(PaddleDatasetImpl): Raw :class:`~paddlets.TSDataset` for building :class:`paddle.io.DataLoader`. batch_size(int): The number of samples for a single batch. collate_fn(Callable, optional): User-defined collate function for each batch, optional. shuffle(bool, optional): Whether to shuffle indices order before generating batch indices, default True. TODO: add this argument to :func:`__init__` construct method allow caller to set its value. Returns: PaddleDataLoader: A built paddle DataLoader. Examples: .. code-block:: python # Given: batch_size = 4 in_chunk_len = 3 out_chunk_len = 2 known_cov_chunk_len = in_chunk_len + out_chunk_len = 3 + 2 = 5 observed_cov_chunk_len = in_chunk_len = 3 target_col_num = 2 (target column number, e.g. ["t0", "t1"]) known_cov_col_num = 3 (known covariates column number, e.g. ["k0", "k1", "k2"]) observed_cov_col_num = 1 (observed covariates column number, e.g. ["obs0"]) # Built DataLoader instance: dataloader = [ # 1st batch { "past_target": paddle.Tensor(shape=(batch_size, in_chunk_len, target_col_num)), "future_target": paddle.Tensor(shape=(batch_size, out_chunk_len, target_col_num)), "known_cov": paddle.Tensor(shape=(batch_size, known_cov_chunk_len, known_cov_col_num)), "observed_cov": paddle.Tensor(shape=(batch_size, observed_cov_chunk_len, observed_cov_col_num)) }, # ... # N-th batch { "past_target": paddle.Tensor(shape=(batch_size, in_chunk_len, target_col_num)), "future_target": paddle.Tensor(shape=(batch_size, out_chunk_len, target_col_num)), "known_cov": paddle.Tensor(shape=(batch_size, known_cov_chunk_len, known_cov_col_num)), "observed_cov": paddle.Tensor(shape=(batch_size, observed_cov_chunk_len, observed_cov_col_num)) } ] """ return PaddleDataLoader(dataset=paddle_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=shuffle)