Repository URL to install this package:
|
Version:
4.5.4.dev1 ▾
|
syntax = "proto3";
package sarus_data_spec;
message Scalar {
// A Scalar represents data that does not fulfill the promise of a Dataset.
// A Dataset promises to have a schema and the possibility to iterate on
// pyarrow.RecordBatches
// A Scalar does not ensure this possibility. As a consequence, oprations
// from standard libraries are allowed (pandas.mean, numpy.std,...) but
// operations implemented for Datasets by Sarus like computing marginals or
// fitting a Keras model cannot be performed on a Scalar.
// Scalars are generated by transforms that explicitly require a specific
// format (e.g. as_pandas, as_numpy,...) or as byproducts of transforms
// (model weights, training history,...).
string uuid = 1; // e.g. RFC 4122 id used to refer to the dataset (content linked?)
string name = 2;
string doc = 3;
Spec spec = 4;
map<string, string> properties = 5; // Other properties
// Definitions
message Spec { // How to obtain the dataset
oneof spec {
Transformed transformed = 1;
Model model = 2;
PrivacyParameters privacy_params=3;
RandomSeed random_seed = 4;
SyntheticModel synthetic_model=5;
}
}
message Transformed {
string transform = 1; // Transform id
repeated string arguments = 2; // Dataset or other object ids
map<string,string> named_arguments = 3;
}
message Model {
oneof type {
PretrainedModel pretrained_model = 1;
}
message PretrainedModel {
string foundation_model_name = 1;
string checkpoint_path=2;
string sample_type=3;
}
}
message PrivacyParameters {
repeated Point points = 1;
message Point {
double epsilon = 1;
double delta = 2;
}
}
message RandomSeed {
int32 value=1;
}
message SyntheticModel{}
}