GraphChi  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Macros
chifilenames.hpp
Go to the documentation of this file.
1 
2 
33 #ifndef GRAPHCHI_FILENAMES_DEF
34 #define GRAPHCHI_FILENAMES_DEF
35 
36 #include <fcntl.h>
37 #include <string>
38 #include <sstream>
39 #include <stdlib.h>
40 #include <unistd.h>
41 
42 #include "logger/logger.hpp"
43 
44 namespace graphchi {
45 
46 #ifdef __GNUC__
47 #define VARIABLE_IS_NOT_USED __attribute__ ((unused))
48 #else
49 #define VARIABLE_IS_NOT_USED
50 #endif
51 
55  template <typename VertexDataType>
56  static std::string filename_vertex_data(std::string basefilename) {
57  std::stringstream ss;
58  ss << basefilename;
59  ss << "." << sizeof(VertexDataType) << "B.vout";
60  return ss.str();
61  }
62 
63  static std::string filename_degree_data(std::string basefilename) {
64  return basefilename + "_degs.bin";
65  }
66 
67  static std::string filename_intervals(std::string basefilename, int nshards) {
68  std::stringstream ss;
69  ss << basefilename;
70  ss << "." << nshards << ".intervals";
71  return ss.str();
72  }
73 
74 
75  static std::string VARIABLE_IS_NOT_USED get_part_str(int p, int nshards) {
76  char partstr[32];
77  sprintf(partstr, ".%d_%d", p, nshards);
78  return std::string(partstr);
79  }
80 
81  template <typename EdgeDataType>
82  static std::string filename_shard_edata(std::string basefilename, int p, int nshards) {
83  std::stringstream ss;
84  ss << basefilename;
85  ss << ".edata_azv.";
86  ss << "e" << sizeof(EdgeDataType) << "B.";
87  ss << p << "_" << nshards;
88  return ss.str();
89  }
90 
91  static std::string filename_shard_adj(std::string basefilename, int p, int nshards) {
92  std::stringstream ss;
93  ss << basefilename;
94  ss << ".edata_azv.";
95  ss << p << "_" << nshards << ".adj";
96  return ss.str();
97  }
98 
102  static std::string filename_config();
103  static std::string filename_config() {
104  char * chi_root = getenv("GRAPHCHI_ROOT");
105  if (chi_root != NULL) {
106  return std::string(chi_root) + "/conf/graphchi.cnf";
107  } else {
108  return "conf/graphchi.cnf";
109  }
110  }
111 
112  bool shard_file_exists(std::string sname);
113  bool shard_file_exists(std::string sname) {
114  int tryf = open(sname.c_str(), O_RDONLY);
115  if (tryf < 0) {
116  return false;
117  } else {
118  close(tryf);
119  return true;
120  }
121  }
122 
127  template<typename EdgeDataType>
128  static int find_shards(std::string base_filename, std::string shard_string="auto") {
129  int try_shard_num;
130  int start_num = 0;
131  int last_shard_num = 2400;
132  if (shard_string == "auto") {
133  start_num = 0;
134  } else {
135  start_num = atoi(shard_string.c_str());
136  }
137 
138  if (start_num > 0) {
139  last_shard_num = start_num;
140  }
141 
142  for(try_shard_num=start_num; try_shard_num <= last_shard_num; try_shard_num++) {
143  std::string last_shard_name = filename_shard_edata<EdgeDataType>(base_filename, try_shard_num - 1, try_shard_num);
144 
145  int tryf = open(last_shard_name.c_str(), O_RDONLY);
146  if (tryf >= 0) {
147  // Found!
148  close(tryf);
149 
150  int nshards_candidate = try_shard_num;
151  bool success = true;
152 
153  // Validate all relevant files exists
154  for(int p=0; p < nshards_candidate; p++) {
155  std::string sname = filename_shard_edata<EdgeDataType>(base_filename, p, nshards_candidate);
156  if (!shard_file_exists(sname)) {
157  logstream(LOG_DEBUG) << "Missing shard file: " << sname << std::endl;
158  success = false;
159  break;
160  }
161 
162  sname = filename_shard_adj(base_filename, p, nshards_candidate);
163  if (!shard_file_exists(sname)) {
164  logstream(LOG_DEBUG) << "Missing shard file: " << sname << std::endl;
165  success = false;
166  break;
167  }
168  }
169 
170  // Check degree file
171  std::string degreefname = filename_degree_data(base_filename);
172  if (!shard_file_exists(degreefname)) {
173  logstream(LOG_ERROR) << "Missing degree file: " << degreefname << std::endl;
174  logstream(LOG_ERROR) << "You need to preprocess (sharder) your file again!" << std::endl;
175  return 0;
176  }
177 
178  std::string intervalfname = filename_intervals(base_filename, nshards_candidate);
179  if (!shard_file_exists(intervalfname)) {
180  logstream(LOG_ERROR) << "Missing intervals file: " << intervalfname << std::endl;
181  logstream(LOG_ERROR) << "You need to preprocess (sharder) your file again!" << std::endl;
182  return 0;
183  }
184 
185  if (!success) {
186  continue;
187  }
188 
189  logstream(LOG_INFO) << "Detected number of shards: " << nshards_candidate << std::endl;
190  logstream(LOG_INFO) << "To specify a different number of shards, use command-line parameter 'nshards'" << std::endl;
191  return nshards_candidate;
192  }
193  }
194  if (last_shard_num == start_num) {
195  logstream(LOG_ERROR) << "Could not find shards with nshards = " << start_num << std::endl;
196  logstream(LOG_ERROR) << "Please define 'nshards 0' or 'nshards auto' to automatically detect." << std::endl;
197  }
198  return 0;
199  }
200 };
201 
202 #endif
203