#include #include #include #include #include #include #include #include using namespace relational; typedef double Float; typedef int ID; const Float R = 6371.0; const Float TWOPI = 2.0*3.1415926536; // 1. Define a row of type CityRow, which contains 4 fields. // unique<> places a unique index on the id column (duplicates throw duplicate_key) // indexed<> places an index on the z column. We use this to find nearby cities. RM_DEFINE_ROW_4(CityRow, unique, id, Float, x, Float, y, indexed, z) // 2. Define a class called City, which basically adds behaviour to the original CityRow. // We can have tables of C++ objects. class City : public CityRow { std::string m_name; public: City() { } // 3. Construct City with a given latitude and longtitude. // We actually store the Cartesian coordinates of each City, since that // makes the math easier. City(int id0, const std::string &name, Float lon_deg, Float lat_deg) : m_name(name) { id = id0; Float lat_rad = lat_deg * TWOPI / 360.0; Float lon_rad = lon_deg * TWOPI / 360.0; // Convert polar to Cartesian coordinates x = R * cos(lat_rad) * sin(lon_rad); y = R * cos(lat_rad) * cos(lon_rad); z = R * sin(lat_rad); } const std::string &getName() const { return m_name; } Float euclideanDistance(const City &other) const { return sqrt(euclideanDistance2(other)); } // 4. A function to return the square of the Euclidean (straight-line) distance between two cities. // We don't take square root since that is expensive and the square preserves ordering. // Uses Pythagoras's theorem. Float euclideanDistance2(const City &other) const { Float dx = x - other.x; Float dy = y - other.y; Float dz = z - other.z; return dx*dx + dy*dy + dz*dz; } // 5. A function to return the surface (spherical) distance between two cities. // This comes from the definition of a dot product. The result is slightly larger // than Euclidean distance. Float surfaceDistance(const City &other) const { Float cosTheta = (x*other.x + y*other.y + z*other.z) / (R*R); return R * acos(cosTheta); } }; // 6. A row to store the distance between two cities. // We index on all three columns since we wish to order on distance (squared) // and if we index on the cities we can find which cities are adjacent, and it makes // it efficient to delete cities. RM_DEFINE_ROW_3(Distance, indexed, distance2, // The square of the distance indexed, from, indexed, to); // 7. A class storing a collection of cities, and the distances between them. // It maintains a list of adjacent cities, up to a maximum distance of "cutoff". class WorldMap { table m_cities; // The table of cities table m_distances;// The table of distances between cities Float m_cutoff, // The cut-off distance m_cutoff2; // The square of the cut-off distance // 8. A functor that is called from WorldMap::addCity. // Calling this causes the new city to be added to the data structure, // but only if it is close to an existing city. The problem is we need to cull // the pairs of cities otherwise they would not fit in memory. class AddDistance { WorldMap &m_wm; const City &m_city; public: AddDistance(WorldMap &wm, const City &c) : m_wm(wm), m_city(c) { } // 9. Called by the for_each in WorldMap::addCity. void operator()(const City &other) { Float distance2 = m_city.euclideanDistance2(other); if(distance2<=m_wm.m_cutoff2) { m_wm.m_distances.insert( Distance(distance2, other.id, m_city.id)); } } }; // 10. A functor to print a row of the results. // It uses a stringstream because we don't want the outputting to the terminal to // dominate the results. struct PrintDistance { std::ostream &m_os; public: PrintDistance(std::ostream &os) : m_os(os) { } // 11. Prints a row. Called from Worldmap::listClosest void operator()(const Distance &distance, const City &city1, const City &city2) { m_os << std::setw(10) << sqrt(distance.distance2) << std::setw(10) << city1.surfaceDistance(city2) << std::setw(20) << city1.getName() << std::setw(20) << city2.getName() << std::endl; } }; public: // 12. Constructs a WorldMap. Pass in the desired cut-off distance. WorldMap(Float c) : m_cutoff(c), m_cutoff2(c*c) { } // 13. Adds a city to the world map. // It detects duplicates and does not add them. It performs a query on the cities table // to find cities close to the new city within a certain range. For each of these it calls // AddDistance which adds the pair of cities to m_distances, if the cities are within range. // Finally it adds the city. This function is not exception neutral. void addCity(const City &city) { if(find(select(m_cities, m_cities.x == city.x && m_cities.y == city.y && m_cities.z == city.z))) return; // Don't add duplicates for_each( select(m_cities, m_cities.x>=city.x - m_cutoff && m_cities.x<=city.x + m_cutoff && m_cities.y>=city.y - m_cutoff && m_cities.y<=city.y + m_cutoff && m_cities.z>=city.z - m_cutoff && // Picks up the index here m_cities.z<=city.z + m_cutoff), // Picks up the index here AddDistance(*this, city) ); m_cities.insert(city); } // 14. Deletes a city with a given id. It performs three index-lookups and is very efficient. void deleteCity(ID id) { m_cities.erase_where(m_cities.id == id); m_distances.erase_where(m_distances.from == id); m_distances.erase_where(m_distances.to == id); } // 15. Lists the pairs of closest cities, up to a limit of n results, and // outputs the results to the stream os. The query simply iterates the m_distances table in default order, // and joins on the id column of the two cities. The functor PrintDistance is called for each result. void listClosest(std::ostream &os, int n) { for_each(limit(select( (m_distances, m_cities, m_cities), col<0, Distance::from_column>() == col<1, City::id_column>() && col<0, Distance::to_column>() == col<2, City::id_column>()),n), PrintDistance(os)); os << "There are " << m_cities.size() << " cities and " << m_distances.size() << " distances in total\n"; } }; // 17. Fills a table of data with n randomly-placed cities. void acquireRandomData(int n, std::vector &cities) { for(int i=0; i &cities) { // This is a hack not production quality code std::ifstream file("aslatlong.txt"); while(file) { char line[500]; file.getline(line, 500); if(file.gcount()>40 && line[0]!='#') { char *p = strtok(line, "\t"); // Never use strtok in production code int id = atoi(p); p = strtok(0, "\t"); Float lat = atof(p); p = strtok(0, "\t"); Float lon = atof(p); p = strtok(0, "\t"); cities.push_back(City(id, p, lat, lon)); } } } // 19. Runs a test, and times what happened. We only time the inner loop, i.e. // adding the cities, and listing the closest pairs. We write to a std::stringstream // since that means that our timing tests are not measuring the speed of the terminal. void test(Float cutoff, const std::vector &cities, int count) { clock_t t0, t1; std::ostringstream ss; WorldMap wm(cutoff); try { t0 = clock(); for(std::vector::const_iterator i=cities.begin(); i!=cities.end(); ++i) wm.addCity(*i); wm.listClosest(ss, count); t1 = clock(); } catch(duplicate_key&) { ss << "Test failed because of a duplicate id\n"; } std::cout << ss.str(); std::cout << "Time = " << (1000*(t1-t0))/CLOCKS_PER_SEC << " milliseconds\n"; } int main(int argc, char *argv[]) { std::vector cities; acquireRealData(cities); test(1.2, cities, 500); // Using VC7.1 with full optimization gives 15-32 ms (clock loses accuracy) // Using VC7.1 with Release (no optimization), gives 156 ms //acquireRealData(cities); //test(5, cities, 500); // 46 ms //acquireRandomData(20000, cities); //test(8.0, cities, 500); // 109ms //acquireRandomData(100000, cities); //test(2.7, cities, 2000); // 1750ms //acquireRandomData(1000000, cities); //test(0.07, cities, 500); // 15500 ms return 0; }