fix: model zoo aclIMDB preprocess format
This commit is contained in:
parent
d5e02cf474
commit
203e864425
|
@ -84,44 +84,62 @@ Download aclImdb dataset, transfer it to mindrecord, use MindDataset to read min
|
|||
|
||||
2. Output like this:
|
||||
```
|
||||
example 24992: {'input_ids': array([ -1, -1, 65, 0, 89, 0, 367, 0, -1,
|
||||
-1, -1, -1, 488, 0, 0, 0, 206, 0,
|
||||
816, 0, -1, -1, 16, 0, -1, -1, 11998,
|
||||
0, 0, 0, 852, 0, 1, 0, 111, 0,
|
||||
-1, -1, -1, -1, 765, 0, 9, 0, 17,
|
||||
0, 35, 0, 72, 0, -1, -1, -1, -1,
|
||||
40, 0, 895, 0, 41, 0, 0, 0, 6952,
|
||||
0, 170, 0, -1, -1, -1, -1, 3, 0,
|
||||
28, 0, -1, -1, 0, 0, 111, 0, 58,
|
||||
0, 110, 0, 569, 0, -1, -1, -1, -1,
|
||||
-1, -1, 0, 0, 24512, 0, 3, 0, 0,
|
||||
0], dtype=int32), 'id': array(8045, dtype=int32), 'input_mask': array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=int32), 'segment_ids': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32), 'score': array(1, dtype=int32), 'label': array(1, dtype=int32)}
|
||||
example 24993: {'input_ids': array([ -1, -1, 11, 0, 7400, 0, 189, 0, 4, 0, 1247,
|
||||
0, 9, 0, 17, 0, 29, 0, 0, 0, -1, -1,
|
||||
-1, -1, -1, -1, 1, 0, -1, -1, 218, 0, 131,
|
||||
0, 10, 0, -1, -1, 52, 0, 72, 0, 488, 0,
|
||||
6, 0, -1, -1, -1, -1, -1, -1, 1749, 0, 0,
|
||||
0, -1, -1, 42, 0, 21, 0, 65, 0, 6895, 0,
|
||||
-1, -1, -1, -1, -1, -1, 11, 0, 52, 0, 72,
|
||||
0, 1498, 0, 10, 0, 21, 0, 65, 0, 19, 0,
|
||||
-1, -1, -1, -1, 36, 0, 130, 0, 88, 0, 210,
|
||||
0], dtype=int32), 'id': array(9903, dtype=int32), 'input_mask': array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=int32), 'segment_ids': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32), 'score': array(7, dtype=int32), 'label': array(0, dtype=int32)}
|
||||
example 24992: {
|
||||
'input_ids': array(
|
||||
[ -1, -1, 65, 0, 89, 0, 367, 0, -1,
|
||||
-1, -1, -1, 488, 0, 0, 0, 206, 0,
|
||||
816, 0, -1, -1, 16, 0, -1, -1, 11998,
|
||||
0, 0, 0, 852, 0, 1, 0, 111, 0,
|
||||
-1, -1, -1, -1, 765, 0, 9, 0, 17,
|
||||
0, 35, 0, 72, 0, -1, -1, -1, -1,
|
||||
40, 0, 895, 0, 41, 0, 0, 0, 6952,
|
||||
0, 170, 0, -1, -1, -1, -1, 3, 0,
|
||||
28, 0, -1, -1, 0, 0, 111, 0, 58,
|
||||
0, 110, 0, 569, 0, -1, -1, -1, -1,
|
||||
-1, -1, 0, 0, 24512, 0, 3, 0, 0,
|
||||
0], dtype=int32),
|
||||
'id': array(8045, dtype=int32),
|
||||
'input_mask': array(
|
||||
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=int32),
|
||||
'segment_ids': array(
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32),
|
||||
'score': array(1, dtype=int32),
|
||||
'label': array(1, dtype=int32)}
|
||||
example 24993: {
|
||||
'input_ids': array(
|
||||
[ -1, -1, 11, 0, 7400, 0, 189, 0, 4, 0, 1247,
|
||||
0, 9, 0, 17, 0, 29, 0, 0, 0, -1, -1,
|
||||
-1, -1, -1, -1, 1, 0, -1, -1, 218, 0, 131,
|
||||
0, 10, 0, -1, -1, 52, 0, 72, 0, 488, 0,
|
||||
6, 0, -1, -1, -1, -1, -1, -1, 1749, 0, 0,
|
||||
0, -1, -1, 42, 0, 21, 0, 65, 0, 6895, 0,
|
||||
-1, -1, -1, -1, -1, -1, 11, 0, 52, 0, 72,
|
||||
0, 1498, 0, 10, 0, 21, 0, 65, 0, 19, 0,
|
||||
-1, -1, -1, -1, 36, 0, 130, 0, 88, 0, 210,
|
||||
0], dtype=int32),
|
||||
'id': array(9903, dtype=int32),
|
||||
'input_mask': array(
|
||||
[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=int32),
|
||||
'segment_ids': array(
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32),
|
||||
'score': array(7, dtype=int32),
|
||||
'label': array(0, dtype=int32)}
|
||||
```
|
||||
- id : the id "3219" is from review docs like **3219**_10.txt.
|
||||
- label : indicates whether the review is positive or negative, positive: 0, negative: 1.
|
||||
|
|
Loading…
Reference in New Issue