@@ -40,6 +40,8 @@ public DatasetPass load_data(string path = "imdb.npz",
40
40
int oov_char = 2 ,
41
41
int index_from = 3 )
42
42
{
43
+ if ( maxlen == - 1 ) throw new InvalidArgumentError ( "maxlen must be assigned." ) ;
44
+
43
45
var dst = Download ( ) ;
44
46
45
47
var lines = File . ReadAllLines ( Path . Combine ( dst , "imdb_train.txt" ) ) ;
@@ -51,7 +53,7 @@ public DatasetPass load_data(string path = "imdb.npz",
51
53
x_train_string [ i ] = lines [ i ] . Substring ( 2 ) ;
52
54
}
53
55
54
- var x_train = np . array ( x_train_string ) ;
56
+ var x_train = keras . preprocessing . sequence . pad_sequences ( PraseData ( x_train_string ) , maxlen : maxlen ) ;
55
57
56
58
File . ReadAllLines ( Path . Combine ( dst , "imdb_test.txt" ) ) ;
57
59
var x_test_string = new string [ lines . Length ] ;
@@ -62,7 +64,7 @@ public DatasetPass load_data(string path = "imdb.npz",
62
64
x_test_string [ i ] = lines [ i ] . Substring ( 2 ) ;
63
65
}
64
66
65
- var x_test = np . array ( x_test_string ) ;
67
+ var x_test = keras . preprocessing . sequence . pad_sequences ( PraseData ( x_test_string ) , maxlen : maxlen ) ;
66
68
67
69
return new DatasetPass
68
70
{
@@ -93,5 +95,23 @@ string Download()
93
95
return dst ;
94
96
// return Path.Combine(dst, file_name);
95
97
}
98
+
99
+ protected IEnumerable < int [ ] > PraseData ( string [ ] x )
100
+ {
101
+ var data_list = new List < int [ ] > ( ) ;
102
+ for ( int i = 0 ; i < len ( x ) ; i ++ )
103
+ {
104
+ var list_string = x [ i ] ;
105
+ var cleaned_list_string = list_string . Replace ( "[" , "" ) . Replace ( "]" , "" ) . Replace ( " " , "" ) ;
106
+ string [ ] number_strings = cleaned_list_string . Split ( ',' ) ;
107
+ int [ ] numbers = new int [ number_strings . Length ] ;
108
+ for ( int j = 0 ; j < number_strings . Length ; j ++ )
109
+ {
110
+ numbers [ j ] = int . Parse ( number_strings [ j ] ) ;
111
+ }
112
+ data_list . Add ( numbers ) ;
113
+ }
114
+ return data_list ;
115
+ }
96
116
}
97
117
}
0 commit comments