INGOR
ytData.h
1/*
2 util/ytData.{h,c} : Data container
3 Copyright (C) 2018, Yoshinori Tamada <tamada A T ytlab.jp>
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 * Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in
15 the documentation and/or other materials provided with the
16 distribution.
17
18 * Neither the name of The Kyoto University nor the names of its
19 contributors may be used to endorse or promote products derived
20 from this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34*/
35
36#ifndef __YTLIB_DATA_H
37#define __YTLIB_DATA_H
38
39#include <stdlib.h>
40
41#include "lang/ytObject.h"
42#include "ytArray.h"
43#include "ytIntArray.h"
44#include "ytStrArray.h"
45#include "ytKeyValues.h"
46
47#include "math/ytRNG.h"
48
49#ifndef DOXY
50#define ytData_TYPE_REAL 0
51#define ytData_TYPE_ORDINAL 1
52#define ytData_TYPE_CATEGORICAL 2
53#define ytData_TYPE_DISCRETE 3
54#endif /* DOXY */
55
73typedef struct {
74 ytObject obj;
75
77 int n;
78
80 int p;
81
91 double * X;
92
98 double * Y;
99
102
108
113
116
122
125
126} ytData;
127
129void ytData_delete(ytData * this);
130
131const char * ytData_typeName(int type);
132void ytData_summary(const ytData * data, FILE * fp);
133void ytData_varSummary(const ytData * data, FILE * fp);
134void ytData_stat(const ytData * this, FILE * fp, int level);
135int ytData_getType(const ytData * this, int j);
136const char * ytData_getTypeName(const ytData * this, int j);
137int ytData_parseType(const char * name);
138const char * ytData_getName(const ytData * this, int j);
139int ytData_findName(const ytData * this, const char * name);
140int ytData_numSamples(const ytData * this);
141int ytData_numVariables(const ytData * this);
142ytData * ytData_bootstrap(const ytData * this, ytRNG * rng, ytData * data);
143void ytData_dynamic(ytData * this);
144ytData * ytData_dynamic2(const ytData * this, int * T);
145void ytData_print(ytData * this, FILE * fp);
146ytData * ytData_pseudoBootstrap(const ytData * this, ytRNG * rng, int blocks, ytData * data);
147ytData * ytData_pidBootstrap(const ytData * this, ytRNG * rng, int n, int F, ytData * data);
148ytArray * ytData_readPrimaryIDList(const ytData * this, const char * file);
149ytData * ytData_listBootstrap(const ytData * this, ytRNG * rng, int n, ytArray * listSet, int F, ytData * data);
150void ytData_extractRange(const ytData * this, ytKeyValues * kv);
151void ytData_checkRange(const ytData * this, ytDoubleArray * xlar, ytDoubleArray * xrar);
152ytData * ytData_hybrid(ytData * this, int n);
153ytData * ytData_dehybrid(ytData * this, int N);
154ytData * ytData_dbn(ytData * this, int T);
156void ytData_varInfo(const ytData * this, int j, FILE * fp);
157const ytStrArray * ytData_getCategories(const ytData * this, int j);
159void ytData_splitXY(ytData * this);
160ytData * ytData_selectVars(const ytData * this, const ytStrArray * names);
161void ytData_dump(const ytData * this, FILE * fp);
162int ytData_countNAN(const ytData * this);
163int ytData_debug(int argc, char * argv[]);
164
165#ifdef USE_MPI
166void ytData_MPI_Bcast(ytData ** data, int root, MPI_Comm comm);
167#endif /* USE_MPI */
168
169#endif /* __YTLIB_DATA_H */
Expandable array.
Expandable array.
Expandable array.
key-value pairs.
The basis class.
Expandable array.
General data container.
Definition: ytData.h:73
int ytData_getType(const ytData *this, int j)
Returns the type of the variable.
Definition: ytData.c:436
void ytData_checkRange(const ytData *this, ytDoubleArray *xlar, ytDoubleArray *xrar)
Checks if the range arrays are valid.
Definition: ytData.c:1498
ytData * ytData_selectVars(const ytData *this, const ytStrArray *names)
Selects variables by their names.
Definition: ytData.c:2090
ytData * ytData_dehybrid(ytData *this, int N)
De-hybridize time-extended static-dynamic hybrid data.
Definition: ytData.c:1697
const char * ytData_getName(const ytData *this, int j)
Returns the name of the variable.
Definition: ytData.c:488
void ytData_stat(const ytData *this, FILE *fp, int level)
Prints or checks data statistics.
Definition: ytData.c:208
int ytData_findName(const ytData *this, const char *name)
Returns the index of the variable of the specified name.
Definition: ytData.c:505
int p
The number of variables.
Definition: ytData.h:80
ytArray * ytData_readPrimaryIDList(const ytData *this, const char *file)
Definition: ytData.c:1402
ytData * ytData_dbn(ytData *this, int T)
Converts data for the time-expanded DBN model.
Definition: ytData.c:1767
void ytData_print(ytData *this, FILE *fp)
Prints the contents.
Definition: ytData.c:359
void ytData_convertAllToReal(ytData *this)
Converts all values to real values.
Definition: ytData.c:1994
void ytData_extractRange(const ytData *this, ytKeyValues *kv)
Extrats value ranges.
Definition: ytData.c:1470
ytArray * ytData_collectPrimaryId(const ytData *this)
Collects sample IDs with repsect to the primary ID.
Definition: ytData.c:1925
const char * ytData_getTypeName(const ytData *this, int j)
Returns the string expression of the type of the specified variable.
Definition: ytData.c:498
double * X
n x p explanatory data matrix.
Definition: ytData.h:91
int ytData_numVariables(const ytData *this)
Returns the number of variables (ytData::p).
Definition: ytData.c:115
const ytStrArray * ytData_getCategories(const ytData *this, int j)
Returns the dictionary (categories) of the variable.
Definition: ytData.c:1981
ytKeyValues * varAttrs
attributes for variables.
Definition: ytData.h:115
ytIntArray * types
Value types of the variables. The j-th element represents the type ID of the j-th variable....
Definition: ytData.h:107
void ytData_MPI_Bcast(ytData **data, int root, MPI_Comm comm)
Broadcasts the ytData instance with MPI.
Definition: ytData.c:2253
ytData * ytData_pidBootstrap(const ytData *this, ytRNG *rng, int n, int F, ytData *data)
Resampling primary IDs for the bootstrap method.
Definition: ytData.c:1081
ytData * ytData_listBootstrap(const ytData *this, ytRNG *rng, int n, ytArray *listSet, int F, ytData *data)
Resampling lists of primary IDs for the bootstrap method.
Definition: ytData.c:1241
int n
The number of samples.
Definition: ytData.h:77
ytData * ytData_bootstrap(const ytData *this, ytRNG *rng, ytData *data)
Performs the boostrap resampling.
Definition: ytData.c:889
double * Y
n x p target data matrix.
Definition: ytData.h:98
int ytData_parseType(const char *name)
Returns the type ID of the given type name.
Definition: ytData.c:452
void ytData_splitXY(ytData *this)
Converts the data to explanatory/objective variable separated data.
Definition: ytData.c:2032
ytData * ytData_pseudoBootstrap(const ytData *this, ytRNG *rng, int blocks, ytData *data)
Performs the pseudo bootstrap resampling for dynamic data.
Definition: ytData.c:969
ytStrArray * names
Names of variables.
Definition: ytData.h:101
ytData * ytData_new()
Generates the empty ytData instance.
Definition: ytData.c:68
ytArray * dict
dictionary for categories. The elements are ytStrArray instances, and the j-th element corresponds to...
Definition: ytData.h:121
ytData * ytData_hybrid(ytData *this, int N)
Generates a new ytData instance for static-dynamic hybrid model.
Definition: ytData.c:1540
void ytData_delete(ytData *this)
Deletes the ytData instance.
Definition: ytData.c:86
ytData * ytData_dynamic2(const ytData *this, int *T)
Generates time expanded data.
Definition: ytData.c:688
const char * ytData_typeName(int type)
Returns the string expression of the type value.
Definition: ytData.c:124
void ytData_dynamic(ytData *this)
Converts data for the dynamic model.
Definition: ytData.c:519
int ytData_countNAN(const ytData *this)
Counts up the number of NaNs.
Definition: ytData.c:2228
ytKeyValues * meta
meta data
Definition: ytData.h:124
ytKeyValues * sampleAttrs
attributes for samples. The value associated with the key is an array. The type of the array depends ...
Definition: ytData.h:112
int ytData_numSamples(const ytData *this)
Returns the number of samples (ytData::n).
Definition: ytData.c:108
Structure for encapsulating the random number generator.
Definition: ytRNG.h:60