Fix error info bug and multiprocessing shared memory error
This commit is contained in:
parent
bde38a582c
commit
7e26677534
|
@ -39,7 +39,7 @@ namespace vision {
|
|||
class DvppDecodeResizeJpeg final : public TensorTransform {
|
||||
public:
|
||||
/// \brief Constructor.
|
||||
/// \param[in] resize A vector of int value for each dimension, with respect to H,W order.
|
||||
/// \param[in] resize Parameter vector of two integers for each dimension, with respect to H,W order.
|
||||
explicit DvppDecodeResizeJpeg(std::vector<uint32_t> resize);
|
||||
|
||||
/// \brief Destructor.
|
||||
|
@ -62,8 +62,8 @@ class DvppDecodeResizeJpeg final : public TensorTransform {
|
|||
class DvppDecodeResizeCropJpeg final : public TensorTransform {
|
||||
public:
|
||||
/// \brief Constructor.
|
||||
/// \param[in] crop A vector of int value for each dimension after final cropping, with respect to H,W order.
|
||||
/// \param[in] resize A vector of int value for each dimension after resizing, with respect to H,W order.
|
||||
/// \param[in] crop Parameter vector of two integers for each dimension after final crop, with respect to H,W order.
|
||||
/// \param[in] resize Parameter vector of two integers for each dimension after resize, with respect to H,W order.
|
||||
explicit DvppDecodeResizeCropJpeg(std::vector<uint32_t> crop, std::vector<uint32_t> resize);
|
||||
|
||||
/// \brief Destructor.
|
||||
|
|
|
@ -18,6 +18,7 @@ General Validators.
|
|||
import inspect
|
||||
from multiprocessing import cpu_count
|
||||
import os
|
||||
from pickle import dumps
|
||||
import numpy as np
|
||||
|
||||
import mindspore._c_dataengine as cde
|
||||
|
@ -62,6 +63,23 @@ def is_iterable(obj):
|
|||
return True
|
||||
|
||||
|
||||
def is_serializable(obj):
|
||||
"""
|
||||
Helper function to check if object is serializable.
|
||||
|
||||
Args:
|
||||
obj (any): object to check if serializable
|
||||
|
||||
Returns:
|
||||
bool, true if object is serializable
|
||||
"""
|
||||
try:
|
||||
dumps(obj)
|
||||
except TypeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def pad_arg_name(arg_name):
|
||||
"""
|
||||
Appends a space to the arg_name (if not empty)
|
||||
|
|
|
@ -448,13 +448,9 @@ class Dataset:
|
|||
len(output_columns). The size of this list must match the number of output
|
||||
columns of the last operation. (default=None, output columns will have the same
|
||||
name as the input columns, i.e., the columns will be replaced).
|
||||
column_order (Union[str, list[str]], optional): List of all the desired columns to propagate to
|
||||
the child node. This list must be a permutation of all the columns in the dataset after
|
||||
all operations are applied. The order of the columns in each row propagated to the
|
||||
child node follow the order they appear in this list. The parameter is mandatory
|
||||
if the len(input_columns) != len(output_columns). (default=None, all columns
|
||||
will be propagated to the child node, the order of the columns will remain the
|
||||
same).
|
||||
column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
|
||||
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
||||
is not just the columns specified in parameter input_columns and output_columns.
|
||||
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
|
||||
would pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
|
||||
python_multiprocessing (bool, optional): Parallelize Python function per_batch_map with multi-processing.
|
||||
|
@ -645,13 +641,9 @@ class Dataset:
|
|||
len(output_columns). The size of this list must match the number of output
|
||||
columns of the last operation. (default=None, output columns will have the same
|
||||
name as the input columns, i.e., the columns will be replaced).
|
||||
column_order (list[str], optional): List of all the desired columns to propagate to the
|
||||
child node. This list must be a subset of all the columns in the dataset after
|
||||
all operations are applied. The order of the columns in each row propagated to the
|
||||
child node follow the order they appear in this list. The parameter is mandatory
|
||||
if the len(input_columns) != len(output_columns). (default=None, all columns
|
||||
will be propagated to the child node, the order of the columns will remain the
|
||||
same).
|
||||
column_order (list[str], optional): Specifies the list of all the columns you need in the whole
|
||||
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
||||
is not just the columns specified in parameter input_columns and output_columns.
|
||||
num_parallel_workers (int, optional): Number of threads used to process the dataset in
|
||||
parallel (default=None, the value from the configuration will be used).
|
||||
python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This
|
||||
|
@ -782,7 +774,7 @@ class Dataset:
|
|||
@check_repeat
|
||||
def repeat(self, count=None):
|
||||
"""
|
||||
Repeat this dataset N times where N = count. Repeat stochastically if the count is None or -1.
|
||||
Repeat this dataset N times where N = count. Repeat infinitely if the count is None or -1.
|
||||
|
||||
Note:
|
||||
The order of using repeat and batch reflects the number of batches. It is recommended that
|
||||
|
@ -2069,13 +2061,9 @@ class BatchDataset(Dataset):
|
|||
len(output_columns). The size of this list must match the number of output
|
||||
columns of the last operation. (default=None, output columns will have the same
|
||||
name as the input columns, i.e., the columns will be replaced).
|
||||
column_order (Union[str, list[str]], optional): List of all the desired columns to propagate to the
|
||||
child node. This list must be a subset of all the columns in the dataset after
|
||||
all operations are applied. The order of the columns in each row propagated to the
|
||||
child node follow the order they appear in this list. The parameter is mandatory
|
||||
if the len(input_columns) != len(output_columns). (default=None, all columns
|
||||
will be propagated to the child node, the order of the columns will remain the
|
||||
same).
|
||||
column_order (Union[str, list[str]], optional): Specifies the list of all the columns you need in the whole
|
||||
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
||||
is not just the columns specified in parameter input_columns and output_columns.
|
||||
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
|
||||
will pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
|
||||
max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
|
||||
|
@ -2558,8 +2546,9 @@ class MapDataset(Dataset):
|
|||
The size of the list should match the number of outputs of the last operator
|
||||
(default=None, output columns will be the input columns, i.e., the columns will
|
||||
be replaced).
|
||||
column_order (list[str], optional): List of all the desired columns of the dataset (default=None).
|
||||
The argument is mandatory if len(input_columns) != len(output_columns).
|
||||
column_order (list[str], optional): Specifies the list of all the columns you need in the whole
|
||||
dataset. The parameter is required when len(input_column) != len(output_column). Caution: the list here
|
||||
is not just the columns specified in parameter input_columns and output_columns.
|
||||
num_parallel_workers (int, optional): Number of workers to process the dataset
|
||||
in parallel (default=None).
|
||||
python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
|
||||
|
|
|
@ -293,7 +293,7 @@ class GraphData:
|
|||
node_list (Union[list, numpy.ndarray]): The given list of nodes.
|
||||
neighbor_type (int): Specify the type of neighbor.
|
||||
output_format (OutputFormat, optional): Output storage format (default=OutputFormat.NORMAL)
|
||||
It can be any of [OutputFormat.NORMAL, OutputFormat.COO, OutputFormat.CSR].
|
||||
It can be any of [OutputFormat.NORMAL, OutputFormat.COO, OutputFormat.CSR].
|
||||
|
||||
Returns:
|
||||
For NORMAL format or COO format
|
||||
|
|
|
@ -22,6 +22,7 @@ import multiprocessing.queues
|
|||
import multiprocessing
|
||||
import numpy as np
|
||||
from mindspore import log as logger
|
||||
from ..core.validator_helpers import is_serializable
|
||||
from ..transforms.py_transforms_util import ExceptionHandler
|
||||
|
||||
|
||||
|
@ -75,6 +76,9 @@ class _SharedQueue(multiprocessing.queues.Queue):
|
|||
count = 0
|
||||
start_bytes = 0
|
||||
for r in data:
|
||||
if not is_serializable(obj=r):
|
||||
raise TypeError("Can not pickle {} object, please verify pyfunc return with numpy array"
|
||||
.format(type(r)))
|
||||
if (isinstance(r, np.ndarray) and r.size > self.min_shared_mem
|
||||
and start_bytes + r.nbytes < self.seg_size):
|
||||
# need to convert start_bytes to offset in array
|
||||
|
|
|
@ -40,7 +40,7 @@ def serialize(dataset, json_filepath=""):
|
|||
dict containing the serialized dataset graph.
|
||||
|
||||
Raises:
|
||||
OSError cannot open a file
|
||||
OSError: Can not open a file
|
||||
|
||||
Examples:
|
||||
>>> dataset = ds.MnistDataset(mnist_dataset_dir, 100)
|
||||
|
|
|
@ -682,7 +682,7 @@ def check_repeat(method):
|
|||
type_check(count, (int, type(None)), "repeat")
|
||||
if isinstance(count, int):
|
||||
if (count <= 0 and count != -1) or count > INT32_MAX:
|
||||
raise ValueError("count should be either -1 or positive integer.")
|
||||
raise ValueError("count should be either -1 or positive integer, range[1, INT32_MAX].")
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
|
Loading…
Reference in New Issue