28 #ifndef GRAPHCHI_CONVERSIONS_DEF
29 #define GRAPHCHI_CONVERSIONS_DEF
34 #include "graphchi_types.hpp"
43 #define VARIABLE_IS_NOT_USED __attribute__ ((unused))
45 #define VARIABLE_IS_NOT_USED
59 static void FIXLINE(
char * s);
61 static void parse(
int &x,
const char * s) {
65 static void parse(
unsigned int &x,
const char * s) {
66 x = (
unsigned int) strtoul(s, NULL, 10);
69 static void parse(
float &x,
const char * s) {
83 static void parse(
long &x,
const char * s) {
87 static void parse(
char &x,
const char * s) {
91 static void parse(
bool &x,
const char * s) {
95 static void parse(
double &x,
const char * s) {
99 static void parse(
short &x,
const char * s) {
104 template <
typename T>
105 void parse(T &x,
const char * s) {
106 logstream(
LOG_FATAL) <<
"You need to define parse<your-type>(your-type &x, const char *s) function"
107 <<
" to support parsing the edge value." << std::endl;
114 void FIXLINE(
char * s) {
115 int len = (int) strlen(s)-1;
116 if(s[len] ==
'\n') s[len] = 0;
124 template <
typename EdgeDataType>
126 FILE * inf = fopen(inputfile.c_str(),
"r");
129 logstream(
LOG_FATAL) <<
"Could not load :" << inputfile <<
" error: " << strerror(errno) << std::endl;
133 logstream(
LOG_INFO) <<
"Reading in edge list format!" << std::endl;
135 while(fgets(s, 1024, inf) != NULL) {
137 if (s[0] ==
'#')
continue;
138 if (s[0] ==
'%')
continue;
140 char delims[] =
"\t ";
142 t = strtok(s, delims);
143 vid_t from = atoi(t);
144 t = strtok(NULL, delims);
148 t = strtok(NULL, delims);
151 parse(val, (
const char*) t);
153 val = EdgeDataType();
166 template <
typename EdgeDataType>
168 FILE * inf = fopen(inputfile.c_str(),
"r");
170 logstream(
LOG_FATAL) <<
"Could not load :" << inputfile <<
" error: " << strerror(errno) << std::endl;
173 logstream(
LOG_INFO) <<
"Reading in adjacency list format!" << std::endl;
175 int maxlen = 100000000;
176 char * s = (
char*) malloc(maxlen);
178 char delims[] =
" \t";
181 while(fgets(s, maxlen, inf) != NULL) {
184 if (s[0] ==
'#')
continue;
185 if (s[0] ==
'%')
continue;
186 char * t = strtok(s, delims);
187 vid_t from = atoi(t);
188 t = strtok(NULL,delims);
192 while((t = strtok(NULL,delims)) != NULL) {
200 logstream(
LOG_ERROR) <<
"Mismatch when reading adjacency list: " << num <<
" != " << i <<
" s: " << std::string(s)
201 <<
" on line: " << linenum << std::endl;
214 template <
typename EdgeDataType>
218 virtual std::string getSuffix() = 0;
219 virtual void reprocess(std::string preprocFilename, std::string basefileName) = 0;
226 template <
typename EdgeDataType>
228 std::string suffix =
"";
229 if (preprocessor != NULL) {
230 suffix = preprocessor->getSuffix();
235 std::string file_type_str = get_option_string_interactive(
"filetype",
"edgelist, adjlist");
236 if (file_type_str !=
"adjlist" && file_type_str !=
"edgelist") {
237 logstream(
LOG_ERROR) <<
"You need to specify filetype: 'edgelist' or 'adjlist'." << std::endl;
244 if (file_type_str ==
"adjlist") {
245 convert_adjlist<EdgeDataType>(basefilename, sharderobj);
246 }
else if (file_type_str ==
"edgelist") {
247 convert_edgelist<EdgeDataType>(basefilename, sharderobj);
253 if (preprocessor != NULL) {
254 preprocessor->reprocess(sharderobj.preprocessed_name(), basefilename);
260 logstream(
LOG_INFO) <<
"Successfully finished sharding for " << basefilename + suffix << std::endl;
261 logstream(
LOG_INFO) <<
"Created " << nshards <<
" shards." << std::endl;
266 template <
typename EdgeDataType>
267 int convert_if_notexists(std::string basefilename, std::string nshards_string, SharderPreprocessor<EdgeDataType> * preprocessor = NULL) {
269 std::string suffix =
"";
270 if (preprocessor != NULL) {
271 suffix = preprocessor->getSuffix();
275 if ((nshards = find_shards<EdgeDataType>(basefilename + suffix, nshards_string))) {
276 logstream(
LOG_INFO) <<
"Found preprocessed files for " << basefilename <<
", num shards=" << nshards << std::endl;
279 logstream(
LOG_INFO) <<
"Did not find preprocessed shards for " << basefilename + suffix << std::endl;
280 logstream(
LOG_INFO) <<
"Will try create them now..." << std::endl;
281 nshards = convert<EdgeDataType>(basefilename, nshards_string, preprocessor);
300 return a.deg < b.deg || (a.deg == b.deg && a.id < b.id);
307 template <
typename EdgeDataType>
312 vid_t * translate_table;
315 std::string getSuffix() {
319 vid_t translate(vid_t vid) {
320 if (vid >= max_vertex_id)
return vid;
321 return translate_table[vid];
324 void reprocess(std::string preprocessedFile, std::string baseFilename) {
325 size_t blocksize = 32 * 1024 * 1024;
326 while (blocksize %
sizeof(
edge_t)) blocksize++;
328 char * block = (
char*) malloc(blocksize);
329 size_t total_to_process = get_filesize(preprocessedFile);
331 FILE * inf = fopen(preprocessedFile.c_str(),
"r");
333 logstream(
LOG_ERROR) <<
"Could not open: " << preprocessedFile <<
" error: " << strerror(errno) << std::endl;
336 fread(&max_vertex_id,
sizeof(vid_t), 1, inf);;
338 vid_t nverts = max_vertex_id + 1;
339 for(vid_t i=0; i < nverts; i++) {
346 while(len < blocksize) {
347 int a = (int) fread(block + len, 1, blocksize - len, inf);
353 logstream(
LOG_DEBUG) <<
"Degree ordering -- read:" << (totread * 1.0 / total_to_process * 100) <<
"%" << std::endl;
357 for(
int i=0; i<(int)len; i++) {
358 degarray[ptr[i].src].deg++;
359 degarray[ptr[i].dst].deg++;
361 }
while (!feof(inf));
365 quickSort(degarray, nverts, vertex_degree_less);
368 translate_table = (vid_t*) calloc(
sizeof(vid_t), nverts);
369 for(vid_t i=0; i<nverts; i++) {
370 translate_table[degarray[i].id] = i;
375 std::string translate_table_file = baseFilename +
".vertexmap";
376 int df = open(translate_table_file.c_str(), O_RDWR | O_CREAT, S_IROTH | S_IWOTH | S_IWUSR | S_IRUSR);
377 if (df < 0) logstream(
LOG_ERROR) <<
"Could not write vertex map: " << translate_table_file <<
378 " error: " << strerror(errno) << std::endl;
380 pwrite(df, translate_table, nverts, 0);
384 std::string tmpfilename = preprocessedFile +
".old";
385 rename(preprocessedFile.c_str(), tmpfilename.c_str());
387 inf = fopen(tmpfilename.c_str(),
"r");
389 logstream(
LOG_ERROR) <<
"Could not open: " << tmpfilename <<
" error: " << strerror(errno) << std::endl;
392 fread(&max_vertex_id,
sizeof(vid_t), 1, inf);;
394 FILE * outf = fopen(preprocessedFile.c_str(),
"w");
396 logstream(
LOG_ERROR) <<
"Could not open: " << preprocessedFile <<
" error: " << strerror(errno) << std::endl;
398 assert(outf != NULL);
399 fwrite(&max_vertex_id,
sizeof(vid_t), 1, outf);
404 while(len < blocksize) {
405 int a = (int) fread(block + len, 1, blocksize - len, inf);
411 logstream(
LOG_DEBUG) <<
"Degree ordering -- write/read:" << (totread * 1.0 / total_to_process * 100) <<
"%" << std::endl;
416 for(
int i=0; i<(int)len; i++) {
417 ptr[i].src = translate(ptr[i].src);
418 ptr[i].dst = translate(ptr[i].dst);
420 fwrite(&ptr[i],
sizeof(
edge_t), 1, outf);
422 }
while (!feof(inf));
427 delete translate_table;