1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
//===- File.cpp - Reading/writing sparse tensors from/to files ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements reading and writing sparse tensor files.
//
//===----------------------------------------------------------------------===//
#include "mlir/ExecutionEngine/SparseTensor/File.h"
#include <cctype>
#include <cstring>
using namespace mlir::sparse_tensor;
/// Opens the file for reading.
void SparseTensorReader::openFile() {
if (file) {
fprintf(stderr, "Already opened file %s\n", filename);
exit(1);
}
file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Cannot find file %s\n", filename);
exit(1);
}
}
/// Closes the file.
void SparseTensorReader::closeFile() {
if (file) {
fclose(file);
file = nullptr;
}
}
/// Attempts to read a line from the file.
void SparseTensorReader::readLine() {
if (!fgets(line, kColWidth, file)) {
fprintf(stderr, "Cannot read next line of %s\n", filename);
exit(1);
}
}
/// Reads and parses the file's header.
void SparseTensorReader::readHeader() {
assert(file && "Attempt to readHeader() before openFile()");
if (strstr(filename, ".mtx")) {
readMMEHeader();
} else if (strstr(filename, ".tns")) {
readExtFROSTTHeader();
} else {
fprintf(stderr, "Unknown format %s\n", filename);
exit(1);
}
assert(isValid() && "Failed to read the header");
}
/// Asserts the shape subsumes the actual dimension sizes. Is only
/// valid after parsing the header.
void SparseTensorReader::assertMatchesShape(uint64_t rank,
const uint64_t *shape) const {
assert(rank == getRank() && "Rank mismatch");
for (uint64_t r = 0; r < rank; r++)
assert((shape[r] == 0 || shape[r] == idata[2 + r]) &&
"Dimension size mismatch");
}
bool SparseTensorReader::canReadAs(PrimaryType valTy) const {
switch (valueKind_) {
case ValueKind::kInvalid:
assert(false && "Must readHeader() before calling canReadAs()");
return false; // In case assertions are disabled.
case ValueKind::kPattern:
return true;
case ValueKind::kInteger:
// When the file is specified to store integer values, we still
// allow implicitly converting those to floating primary-types.
return isRealPrimaryType(valTy);
case ValueKind::kReal:
// When the file is specified to store real/floating values, then
// we disallow implicit conversion to integer primary-types.
return isFloatingPrimaryType(valTy);
case ValueKind::kComplex:
// When the file is specified to store complex values, then we
// require a complex primary-type.
return isComplexPrimaryType(valTy);
case ValueKind::kUndefined:
// The "extended" FROSTT format doesn't specify a ValueKind.
// So we allow implicitly converting the stored values to both
// integer and floating primary-types.
return isRealPrimaryType(valTy);
}
fprintf(stderr, "Unknown ValueKind: %d\n", static_cast<uint8_t>(valueKind_));
return false;
}
/// Helper to convert C-style strings (i.e., '\0' terminated) to lower case.
static inline void toLower(char *token) {
for (char *c = token; *c; c++)
*c = tolower(*c);
}
/// Idiomatic name for checking string equality.
static inline bool streq(const char *lhs, const char *rhs) {
return strcmp(lhs, rhs) == 0;
}
/// Idiomatic name for checking string inequality.
static inline bool strne(const char *lhs, const char *rhs) {
return strcmp(lhs, rhs); // aka `!= 0`
}
/// Read the MME header of a general sparse matrix of type real.
void SparseTensorReader::readMMEHeader() {
char header[64];
char object[64];
char format[64];
char field[64];
char symmetry[64];
// Read header line.
if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
symmetry) != 5) {
fprintf(stderr, "Corrupt header in %s\n", filename);
exit(1);
}
// Convert all to lowercase up front (to avoid accidental redundancy).
toLower(header);
toLower(object);
toLower(format);
toLower(field);
toLower(symmetry);
// Process `field`, which specify pattern or the data type of the values.
if (streq(field, "pattern")) {
valueKind_ = ValueKind::kPattern;
} else if (streq(field, "real")) {
valueKind_ = ValueKind::kReal;
} else if (streq(field, "integer")) {
valueKind_ = ValueKind::kInteger;
} else if (streq(field, "complex")) {
valueKind_ = ValueKind::kComplex;
} else {
fprintf(stderr, "Unexpected header field value in %s\n", filename);
exit(1);
}
// Set properties.
isSymmetric_ = streq(symmetry, "symmetric");
// Make sure this is a general sparse matrix.
if (strne(header, "%%matrixmarket") || strne(object, "matrix") ||
strne(format, "coordinate") ||
(strne(symmetry, "general") && !isSymmetric_)) {
fprintf(stderr, "Cannot find a general sparse matrix in %s\n", filename);
exit(1);
}
// Skip comments.
while (true) {
readLine();
if (line[0] != '%')
break;
}
// Next line contains M N NNZ.
idata[0] = 2; // rank
if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
idata + 1) != 3) {
fprintf(stderr, "Cannot find size in %s\n", filename);
exit(1);
}
}
/// Read the "extended" FROSTT header. Although not part of the documented
/// format, we assume that the file starts with optional comments followed
/// by two lines that define the rank, the number of nonzeros, and the
/// dimensions sizes (one per rank) of the sparse tensor.
void SparseTensorReader::readExtFROSTTHeader() {
// Skip comments.
while (true) {
readLine();
if (line[0] != '#')
break;
}
// Next line contains RANK and NNZ.
if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) {
fprintf(stderr, "Cannot find metadata in %s\n", filename);
exit(1);
}
// Followed by a line with the dimension sizes (one per rank).
for (uint64_t r = 0; r < idata[0]; r++) {
if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) {
fprintf(stderr, "Cannot find dimension size %s\n", filename);
exit(1);
}
}
readLine(); // end of line
// The FROSTT format does not define the data type of the nonzero elements.
valueKind_ = ValueKind::kUndefined;
}
|